summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c1546
1 files changed, 1349 insertions, 197 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 1b35c56..7be74ca 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -38,15 +38,7 @@
#include "tommath.h"
#include "tclStringRep.h"
-/*
- * Set COMPAT to 1 to restore the shimmering patterns to those of Tcl 8.5.
- * This is an escape hatch in case the changes have some unexpected unwelcome
- * impact on performance. If things go well, this mechanism can go away when
- * post-8.6 development begins.
- */
-
-#define COMPAT 0
-
+#include "assert.h"
/*
* Prototypes for functions defined later in this file:
*/
@@ -145,12 +137,12 @@ GrowStringBuffer(
char *ptr = NULL;
int attempt;
- if (objPtr->bytes == tclEmptyStringRep) {
+ if (objPtr->bytes == &tclEmptyString) {
objPtr->bytes = NULL;
}
if (flag == 0 || stringPtr->allocated > 0) {
- attempt = 2 * needed;
- if (attempt >= 0) {
+ if (needed <= INT_MAX / 2) {
+ attempt = 2 * needed;
ptr = attemptckrealloc(objPtr->bytes, attempt + 1);
}
if (ptr == NULL) {
@@ -199,8 +191,8 @@ GrowUnicodeBuffer(
* Subsequent appends - apply the growth algorithm.
*/
- attempt = 2 * needed;
- if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
+ if (needed <= STRING_MAXCHARS / 2) {
+ attempt = 2 * needed;
ptr = stringAttemptRealloc(stringPtr, attempt);
}
if (ptr == NULL) {
@@ -418,16 +410,26 @@ Tcl_GetCharLength(
int numChars;
/*
+ * Quick, no-shimmer return for short string reps.
+ */
+
+ if ((objPtr->bytes) && (objPtr->length < 2)) {
+ /* 0 bytes -> 0 chars; 1 byte -> 1 char */
+ return objPtr->length;
+ }
+
+ /*
* Optimize the case where we're really dealing with a bytearray object;
* we don't need to convert to a string to perform the get-length operation.
*
- * NOTE that we do not need the bytearray to be "pure". A ByteArray value
- * with a string rep cannot be trusted to represent the same value as the
- * string rep, but it *can* be trusted to have the same character length
- * as the string rep, which is all this routine cares about.
+ * Starting in Tcl 8.7, we check for a "pure" bytearray, because the
+ * machinery behind that test is using a proper bytearray ObjType. We
+ * could also compute length of an improper bytearray without shimmering
+ * but there's no value in that. We *want* to shimmer an improper bytearray
+ * because improper bytearrays have worthless internal reps.
*/
- if (objPtr->typePtr == &tclByteArrayType) {
+ if (TclIsPureByteArray(objPtr)) {
int length;
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
@@ -449,20 +451,50 @@ Tcl_GetCharLength(
if (numChars == -1) {
TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
stringPtr->numChars = numChars;
+ }
+ return numChars;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclCheckEmptyString --
+ *
+ * Determine whether the string value of an object is or would be the
+ * empty string, without generating a string representation.
+ *
+ * Results:
+ * Returns 1 if empty, 0 if not, and -1 if unknown.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+int
+TclCheckEmptyString(
+ Tcl_Obj *objPtr)
+{
+ int length = -1;
-#if COMPAT
- if (numChars < objPtr->length) {
- /*
- * Since we've just computed the number of chars, and not all UTF
- * chars are 1-byte long, go ahead and populate the unicode
- * string.
- */
+ if (objPtr->bytes == &tclEmptyString) {
+ return TCL_EMPTYSTRING_YES;
+ }
- FillUnicodeRep(objPtr);
- }
-#endif
+ if (TclListObjIsCanonical(objPtr)) {
+ Tcl_ListObjLength(NULL, objPtr, &length);
+ return length == 0;
}
- return numChars;
+
+ if (TclIsPureDict(objPtr)) {
+ Tcl_DictObjSize(NULL, objPtr, &length);
+ return length == 0;
+ }
+
+ if (objPtr->bytes == NULL) {
+ return TCL_EMPTYSTRING_UNKNOWN;
+ }
+ return objPtr->length == 0;
}
/*
@@ -470,8 +502,9 @@ Tcl_GetCharLength(
*
* Tcl_GetUniChar --
*
- * Get the index'th Unicode character from the String object. The index
- * is assumed to be in the appropriate range.
+ * Get the index'th Unicode character from the String object. If index
+ * is out of range or it references a low surrogate preceded by a high
+ * surrogate, the result = -1;
*
* Results:
* Returns the index'th Unicode character in the Object.
@@ -482,24 +515,31 @@ Tcl_GetCharLength(
*----------------------------------------------------------------------
*/
-Tcl_UniChar
+int
Tcl_GetUniChar(
Tcl_Obj *objPtr, /* The object to get the Unicode charater
* from. */
int index) /* Get the index'th Unicode character. */
{
String *stringPtr;
+ int ch, length;
+
+ if (index < 0) {
+ return -1;
+ }
/*
* Optimize the case where we're really dealing with a bytearray object
- * without string representation; we don't need to convert to a string to
- * perform the indexing operation.
+ * we don't need to convert to a string to perform the indexing operation.
*/
if (TclIsPureByteArray(objPtr)) {
- unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
+ if (index >= length) {
+ return -1;
+ }
- return (Tcl_UniChar) bytes[index];
+ return (int) bytes[index];
}
/*
@@ -523,7 +563,28 @@ Tcl_GetUniChar(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
- return stringPtr->unicode[index];
+
+ if (index >= stringPtr->numChars) {
+ return -1;
+ }
+ ch = stringPtr->unicode[index];
+#if TCL_UTF_MAX <= 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((ch & 0xF800) == 0xD800) {
+ if (ch & 0x400) {
+ if ((index > 0)
+ && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
+ ch = -1; /* low surrogate preceded by high surrogate */
+ }
+ } else if ((++index < stringPtr->numChars)
+ && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) {
+ /* high surrogate followed by low surrogate */
+ ch = (((ch & 0x3FF) << 10) |
+ (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
+ }
+ }
+#endif
+ return ch;
}
/*
@@ -545,6 +606,8 @@ Tcl_GetUniChar(
*----------------------------------------------------------------------
*/
+#ifndef TCL_NO_DEPRECATED
+#undef Tcl_GetUnicode
Tcl_UniChar *
Tcl_GetUnicode(
Tcl_Obj *objPtr) /* The object to find the unicode string
@@ -552,6 +615,7 @@ Tcl_GetUnicode(
{
return Tcl_GetUnicodeFromObj(objPtr, NULL);
}
+#endif /* TCL_NO_DEPRECATED */
/*
*----------------------------------------------------------------------
@@ -623,17 +687,27 @@ Tcl_GetRange(
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
String *stringPtr;
+ int length;
+
+ if (first < 0) {
+ first = 0;
+ }
/*
* Optimize the case where we're really dealing with a bytearray object
- * without string representation; we don't need to convert to a string to
- * perform the substring operation.
+ * we don't need to convert to a string to perform the substring operation.
*/
if (TclIsPureByteArray(objPtr)) {
- unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
- return Tcl_NewByteArrayObj(bytes+first, last-first+1);
+ if (last >= length) {
+ last = length - 1;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
+ return Tcl_NewByteArrayObj(bytes + first, last - first + 1);
}
/*
@@ -652,6 +726,12 @@ Tcl_GetRange(
TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
}
if (stringPtr->numChars == objPtr->length) {
+ if (last >= stringPtr->numChars) {
+ last = stringPtr->numChars - 1;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
newObjPtr = Tcl_NewStringObj(objPtr->bytes + first, last-first+1);
/*
@@ -666,19 +746,25 @@ Tcl_GetRange(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
-
-#if TCL_UTF_MAX == 4
- /* See: bug [11ae2be95dac9417] */
- if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
- && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
- ++first;
- }
- if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
- && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
- ++last;
- }
+ if (last > stringPtr->numChars) {
+ last = stringPtr->numChars;
+ }
+ if (last < first) {
+ return Tcl_NewObj();
+ }
+#if TCL_UTF_MAX <= 4
+ /* See: bug [11ae2be95dac9417] */
+ if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
+ ++first;
+ }
+ if ((last + 1 < stringPtr->numChars)
+ && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
+ && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) {
+ ++last;
+ }
#endif
- return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
+ return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1);
}
/*
@@ -794,7 +880,7 @@ Tcl_SetObjLength(
/*
* Need to enlarge the buffer.
*/
- if (objPtr->bytes == tclEmptyStringRep) {
+ if (objPtr->bytes == &tclEmptyString) {
objPtr->bytes = ckalloc(length + 1);
} else {
objPtr->bytes = ckrealloc(objPtr->bytes, length + 1);
@@ -900,7 +986,7 @@ Tcl_AttemptSetObjLength(
char *newBytes;
- if (objPtr->bytes == tclEmptyStringRep) {
+ if (objPtr->bytes == &tclEmptyString) {
newBytes = attemptckalloc(length + 1);
} else {
newBytes = attemptckrealloc(objPtr->bytes, length + 1);
@@ -1188,11 +1274,7 @@ Tcl_AppendUnicodeToObj(
* objPtr's string rep.
*/
- if (stringPtr->hasUnicode
-#if COMPAT
- && stringPtr->numChars > 0
-#endif
- ) {
+ if (stringPtr->hasUnicode) {
AppendUnicodeToUnicodeRep(objPtr, unicode, length);
} else {
AppendUnicodeToUtfRep(objPtr, unicode, length);
@@ -1233,52 +1315,58 @@ Tcl_AppendObjToObj(
* that appending nothing to anything leaves that starting anything...
*/
- if (appendObjPtr->bytes == tclEmptyStringRep) {
+ if (appendObjPtr->bytes == &tclEmptyString) {
return;
}
/*
* Handle append of one bytearray object to another as a special case.
- * Note that we only do this when the objects don't have string reps; if
- * it did, then appending the byte arrays together could well lose
- * information; this is a special-case optimization only.
+ * Note that we only do this when the objects are pure so that the
+ * bytearray faithfully represent the true value; Otherwise appending the
+ * byte arrays together could lose information;
*/
- if ((TclIsPureByteArray(objPtr) || objPtr->bytes == tclEmptyStringRep)
+ if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
&& TclIsPureByteArray(appendObjPtr)) {
-
/*
* You might expect the code here to be
*
* bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
* TclAppendBytesToByteArray(objPtr, bytes, length);
*
- * and essentially all of the time that would be fine. However,
- * it would run into trouble in the case where objPtr and
- * appendObjPtr point to the same thing. That may never be a
- * good idea. It seems to violate Copy On Write, and we don't
- * have any tests for the situation, since making any Tcl commands
- * that call Tcl_AppendObjToObj() do that appears impossible
- * (They honor Copy On Write!). For the sake of extensions that
- * go off into that realm, though, here's a more complex approach
- * that can handle all the cases.
+ * and essentially all of the time that would be fine. However, it
+ * would run into trouble in the case where objPtr and appendObjPtr
+ * point to the same thing. That may never be a good idea. It seems to
+ * violate Copy On Write, and we don't have any tests for the
+ * situation, since making any Tcl commands that call
+ * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On
+ * Write!). For the sake of extensions that go off into that realm,
+ * though, here's a more complex approach that can handle all the
+ * cases.
+ *
+ * First, get the lengths.
*/
- /* Get lengths */
int lengthSrc;
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
(void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc);
- /* Grow buffer enough for the append */
+ /*
+ * Grow buffer enough for the append.
+ */
+
TclAppendBytesToByteArray(objPtr, NULL, lengthSrc);
- /* Reset objPtr back to the original value */
+ /*
+ * Reset objPtr back to the original value.
+ */
+
Tcl_SetByteArrayLength(objPtr, length);
/*
- * Now do the append knowing that buffer growth cannot cause
- * any trouble.
+ * Now do the append knowing that buffer growth cannot cause any
+ * trouble.
*/
TclAppendBytesToByteArray(objPtr,
@@ -1298,11 +1386,7 @@ Tcl_AppendObjToObj(
* appendObjPtr and append it.
*/
- if (stringPtr->hasUnicode
-#if COMPAT
- && stringPtr->numChars > 0
-#endif
- ) {
+ if (stringPtr->hasUnicode) {
/*
* If appendObjPtr is not of the "String" type, don't convert it.
*/
@@ -1330,16 +1414,13 @@ Tcl_AppendObjToObj(
numChars = stringPtr->numChars;
if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
String *appendStringPtr = GET_STRING(appendObjPtr);
+
appendNumChars = appendStringPtr->numChars;
}
AppendUtfToUtfRep(objPtr, bytes, length);
- if (numChars >= 0 && appendNumChars >= 0
-#if COMPAT
- && appendNumChars == length
-#endif
- ) {
+ if (numChars >= 0 && appendNumChars >= 0) {
stringPtr->numChars = numChars + appendNumChars;
}
}
@@ -1463,14 +1544,6 @@ AppendUnicodeToUtfRep(
if (stringPtr->numChars != -1) {
stringPtr->numChars += numChars;
}
-
-#if COMPAT
- /*
- * Invalidate the unicode rep.
- */
-
- stringPtr->hasUnicode = 0;
-#endif
}
/*
@@ -1831,6 +1904,11 @@ Tcl_AppendFormatToObj(
width = 0;
if (isdigit(UCHAR(ch))) {
width = strtoul(format, &end, 10);
+ if (width < 0) {
+ msg = overflow;
+ errCode = "OVERFLOW";
+ goto errorMsg;
+ }
format = end;
step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
@@ -1913,6 +1991,25 @@ Tcl_AppendFormatToObj(
useWide = 1;
#endif
}
+ } else if (ch == 'I') {
+ if ((format[1] == '6') && (format[2] == '4')) {
+ format += (step + 2);
+ step = TclUtfToUniChar(format, &ch);
+#ifndef TCL_WIDE_INT_IS_LONG
+ useWide = 1;
+#endif
+ } else if ((format[1] == '3') && (format[2] == '2')) {
+ format += (step + 2);
+ step = TclUtfToUniChar(format, &ch);
+ } else {
+ format += step;
+ step = TclUtfToUniChar(format, &ch);
+ }
+ } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j')
+ || (ch == 'L')) {
+ format += step;
+ step = TclUtfToUniChar(format, &ch);
+ useBig = 1;
}
format += step;
@@ -1944,13 +2041,17 @@ Tcl_AppendFormatToObj(
}
break;
case 'c': {
- char buf[TCL_UTF_MAX];
+ char buf[4];
int code, length;
if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
goto error;
}
length = Tcl_UniCharToUtf(code, buf);
+ if (!length) {
+ /* Special case for handling upper surrogates. */
+ length = Tcl_UniCharToUtf(-1, buf);
+ }
segment = Tcl_NewStringObj(buf, length);
Tcl_IncrRefCount(segment);
allocSegment = 1;
@@ -1958,13 +2059,9 @@ Tcl_AppendFormatToObj(
}
case 'u':
- if (useBig) {
- msg = "unsigned bignum format is invalid";
- errCode = "BADUNSIGNED";
- goto errorMsg;
- }
case 'd':
case 'o':
+ case 'p':
case 'x':
case 'X':
case 'b': {
@@ -1975,53 +2072,58 @@ Tcl_AppendFormatToObj(
mp_int big;
int toAppend, isNegative = 0;
+#ifndef TCL_WIDE_INT_IS_LONG
+ if (ch == 'p') {
+ useWide = 1;
+ }
+#endif
if (useBig) {
+ int cmpResult;
if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
goto error;
}
- isNegative = (mp_cmp_d(&big, 0) == MP_LT);
+ cmpResult = mp_cmp_d(&big, 0);
+ isNegative = (cmpResult == MP_LT);
+ if (cmpResult == MP_EQ) gotHash = 0;
+ if (ch == 'u') {
+ if (isNegative) {
+ mp_clear(&big);
+ msg = "unsigned bignum format is invalid";
+ errCode = "BADUNSIGNED";
+ goto errorMsg;
+ } else {
+ ch = 'd';
+ }
+ }
#ifndef TCL_WIDE_INT_IS_LONG
} else if (useWide) {
- if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
- Tcl_Obj *objPtr;
-
- if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
- goto error;
- }
- mp_mod_2d(&big, (int) CHAR_BIT*sizeof(Tcl_WideInt), &big);
- objPtr = Tcl_NewBignumObj(&big);
- Tcl_IncrRefCount(objPtr);
- Tcl_GetWideIntFromObj(NULL, objPtr, &w);
- Tcl_DecrRefCount(objPtr);
+ if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) {
+ goto error;
}
isNegative = (w < (Tcl_WideInt) 0);
+ if (w == (Tcl_WideInt) 0) gotHash = 0;
#endif
} else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
- if (Tcl_GetWideIntFromObj(NULL, segment, &w) != TCL_OK) {
- Tcl_Obj *objPtr;
-
- if (Tcl_GetBignumFromObj(interp,segment,&big) != TCL_OK) {
- goto error;
- }
- mp_mod_2d(&big, (int) CHAR_BIT * sizeof(long), &big);
- objPtr = Tcl_NewBignumObj(&big);
- Tcl_IncrRefCount(objPtr);
- TclGetLongFromObj(NULL, objPtr, &l);
- Tcl_DecrRefCount(objPtr);
+ if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) {
+ goto error;
} else {
- l = Tcl_WideAsLong(w);
+ l = (long) w;
}
if (useShort) {
s = (short) l;
isNegative = (s < (short) 0);
+ if (s == (short) 0) gotHash = 0;
} else {
isNegative = (l < (long) 0);
+ if (l == (long) 0) gotHash = 0;
}
} else if (useShort) {
s = (short) l;
isNegative = (s < (short) 0);
+ if (s == (short) 0) gotHash = 0;
} else {
isNegative = (l < (long) 0);
+ if (l == (long) 0) gotHash = 0;
}
segment = Tcl_NewObj();
@@ -2035,18 +2137,15 @@ Tcl_AppendFormatToObj(
segmentLimit -= 1;
}
- if (gotHash) {
+ if (gotHash || (ch == 'p')) {
switch (ch) {
case 'o':
- Tcl_AppendToObj(segment, "0", 1);
- segmentLimit -= 1;
- precision--;
- break;
- case 'X':
- Tcl_AppendToObj(segment, "0X", 2);
+ Tcl_AppendToObj(segment, "0o", 2);
segmentLimit -= 2;
break;
+ case 'p':
case 'x':
+ case 'X':
Tcl_AppendToObj(segment, "0x", 2);
segmentLimit -= 2;
break;
@@ -2054,6 +2153,14 @@ Tcl_AppendFormatToObj(
Tcl_AppendToObj(segment, "0b", 2);
segmentLimit -= 2;
break;
+#if TCL_MAJOR_VERSION < 9
+ case 'd':
+ if (gotZero) {
+ Tcl_AppendToObj(segment, "0d", 2);
+ segmentLimit -= 2;
+ }
+ break;
+#endif
}
}
@@ -2125,6 +2232,7 @@ Tcl_AppendFormatToObj(
case 'u':
case 'o':
+ case 'p':
case 'x':
case 'X':
case 'b': {
@@ -2190,7 +2298,7 @@ Tcl_AppendFormatToObj(
* Need to be sure zero becomes "0", not "".
*/
- if ((numDigits == 0) && !((ch == 'o') && gotHash)) {
+ if (numDigits == 0) {
numDigits = 1;
}
pure = Tcl_NewObj();
@@ -2257,6 +2365,8 @@ Tcl_AppendFormatToObj(
break;
}
+ case 'a':
+ case 'A':
case 'e':
case 'E':
case 'f':
@@ -2325,6 +2435,12 @@ Tcl_AppendFormatToObj(
errCode = "OVERFLOW";
goto errorMsg;
}
+ if (ch == 'A') {
+ char *p = TclGetString(segment) + 1;
+ *p = 'x';
+ p = strchr(p, 'P');
+ if (p) *p = 'p';
+ }
break;
}
default:
@@ -2401,7 +2517,7 @@ Tcl_AppendFormatToObj(
/*
*---------------------------------------------------------------------------
*
- * Tcl_Format--
+ * Tcl_Format --
*
* Results:
* A refcount zero Tcl_Obj.
@@ -2512,6 +2628,7 @@ AppendPrintfToObjVA(
case 'u':
case 'd':
case 'o':
+ case 'p':
case 'x':
case 'X':
seekingConversion = 0;
@@ -2525,15 +2642,30 @@ AppendPrintfToObjVA(
Tcl_ListObjAppendElement(NULL, list, Tcl_NewLongObj(
va_arg(argList, long)));
break;
+ case 2:
+ Tcl_ListObjAppendElement(NULL, list, Tcl_NewWideIntObj(
+ va_arg(argList, Tcl_WideInt)));
+ break;
+ case 3:
+ Tcl_ListObjAppendElement(NULL, list, Tcl_NewBignumObj(
+ va_arg(argList, mp_int *)));
+ break;
}
break;
+ case 'a':
+ case 'A':
case 'e':
case 'E':
case 'f':
case 'g':
case 'G':
+ if (size > 0) {
Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
- va_arg(argList, double)));
+ (double)va_arg(argList, long double)));
+ } else {
+ Tcl_ListObjAppendElement(NULL, list, Tcl_NewDoubleObj(
+ va_arg(argList, double)));
+ }
seekingConversion = 0;
break;
case '*':
@@ -2553,9 +2685,35 @@ AppendPrintfToObjVA(
gotPrecision = 1;
p++;
break;
- /* TODO: support for wide (and bignum?) arguments */
case 'l':
- size = 1;
+ ++size;
+ p++;
+ break;
+ case 't':
+ case 'z':
+ if (sizeof(size_t) == sizeof(Tcl_WideInt)) {
+ size = 2;
+ }
+ p++;
+ break;
+ case 'j':
+ case 'q':
+ size = 2;
+ p++;
+ break;
+ case 'I':
+ if (p[1]=='6' && p[2]=='4') {
+ p += 2;
+ size = 2;
+ } else if (p[1]=='3' && p[2]=='2') {
+ p += 2;
+ } else if (sizeof(size_t) == sizeof(Tcl_WideInt)) {
+ size = 2;
+ }
+ p++;
+ break;
+ case 'L':
+ size = 3;
p++;
break;
case 'h':
@@ -2662,17 +2820,894 @@ TclGetStringStorage(
*sizePtr = stringPtr->allocated;
return objPtr->bytes;
}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringRepeat --
+ *
+ * Performs the [string repeat] function.
+ *
+ * Results:
+ * A (Tcl_Obj *) pointing to the result value, or NULL in case of an
+ * error.
+ *
+ * Side effects:
+ * On error, when interp is not NULL, error information is left in it.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclStringRepeat(
+ Tcl_Interp *interp,
+ Tcl_Obj *objPtr,
+ int count,
+ int flags)
+{
+ Tcl_Obj *objResultPtr;
+ int inPlace = flags & TCL_STRING_IN_PLACE;
+ int length = 0, unichar = 0, done = 1;
+ int binary = TclIsPureByteArray(objPtr);
+
+ /* assert (count >= 2) */
+
+ /*
+ * Analyze to determine what representation result should be.
+ * GOALS: Avoid shimmering & string rep generation.
+ * Produce pure bytearray when possible.
+ * Error on overflow.
+ */
+
+ if (!binary) {
+ if (objPtr->typePtr == &tclStringType) {
+ String *stringPtr = GET_STRING(objPtr);
+ if (stringPtr->hasUnicode) {
+ unichar = 1;
+ }
+ }
+ }
+
+ if (binary) {
+ /* Result will be pure byte array. Pre-size it */
+ Tcl_GetByteArrayFromObj(objPtr, &length);
+ } else if (unichar) {
+ /* Result will be pure Tcl_UniChar array. Pre-size it. */
+ Tcl_GetUnicodeFromObj(objPtr, &length);
+ } else {
+ /* Result will be concat of string reps. Pre-size it. */
+ Tcl_GetStringFromObj(objPtr, &length);
+ }
+
+ if (length == 0) {
+ /* Any repeats of empty is empty. */
+ return objPtr;
+ }
+
+ if (count > INT_MAX/length) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+
+ if (binary) {
+ /* Efficiently produce a pure byte array result */
+ objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ?
+ Tcl_DuplicateObj(objPtr) : objPtr;
+
+ Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */
+ Tcl_SetByteArrayLength(objResultPtr, length);
+ while (count - done > done) {
+ Tcl_AppendObjToObj(objResultPtr, objResultPtr);
+ done *= 2;
+ }
+ TclAppendBytesToByteArray(objResultPtr,
+ Tcl_GetByteArrayFromObj(objResultPtr, NULL),
+ (count - done) * length);
+ } else if (unichar) {
+ /*
+ * Efficiently produce a pure Tcl_UniChar array result.
+ */
+
+ if (!inPlace || Tcl_IsShared(objPtr)) {
+ objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length);
+ } else {
+ TclInvalidateStringRep(objPtr);
+ objResultPtr = objPtr;
+ }
+
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "string size overflow: unable to alloc %"
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(count*length)));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ Tcl_SetObjLength(objResultPtr, length);
+ while (count - done > done) {
+ Tcl_AppendObjToObj(objResultPtr, objResultPtr);
+ done *= 2;
+ }
+ Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr),
+ (count - done) * length);
+ } else {
+ /*
+ * Efficiently concatenate string reps.
+ */
+
+ if (!inPlace || Tcl_IsShared(objPtr)) {
+ objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length);
+ } else {
+ TclFreeIntRep(objPtr);
+ objResultPtr = objPtr;
+ }
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "string size overflow: unable to alloc %u bytes",
+ count*length));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ Tcl_SetObjLength(objResultPtr, length);
+ while (count - done > done) {
+ Tcl_AppendObjToObj(objResultPtr, objResultPtr);
+ done *= 2;
+ }
+ Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr),
+ (count - done) * length);
+ }
+ return objResultPtr;
+}
+
/*
*---------------------------------------------------------------------------
*
- * TclStringObjReverse --
+ * TclStringCat --
+ *
+ * Performs the [string cat] function.
+ *
+ * Results:
+ * A (Tcl_Obj *) pointing to the result value, or NULL in case of an
+ * error.
+ *
+ * Side effects:
+ * On error, when interp is not NULL, error information is left in it.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclStringCat(
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj * const objv[],
+ int flags)
+{
+ Tcl_Obj *objResultPtr, * const *ov;
+ int oc, length = 0, binary = 1;
+ int allowUniChar = 1, requestUniChar = 0;
+ int first = objc - 1; /* Index of first value possibly not empty */
+ int last = 0; /* Index of last value possibly not empty */
+ int inPlace = flags & TCL_STRING_IN_PLACE;
+
+ /* assert ( objc >= 0 ) */
+
+ if (objc <= 1) {
+ /* Only one or no objects; return first or empty */
+ return objc ? objv[0] : Tcl_NewObj();
+ }
+
+ /* assert ( objc >= 2 ) */
+
+ /*
+ * Analyze to determine what representation result should be.
+ * GOALS: Avoid shimmering & string rep generation.
+ * Produce pure bytearray when possible.
+ * Error on overflow.
+ */
+
+ ov = objv, oc = objc;
+ do {
+ Tcl_Obj *objPtr = *ov++;
+
+ if (TclIsPureByteArray(objPtr)) {
+ allowUniChar = 0;
+ } else if (objPtr->bytes) {
+ /* Value has a string rep. */
+ if (objPtr->length) {
+ /*
+ * Non-empty string rep. Not a pure bytearray, so we won't
+ * create a pure bytearray.
+ */
+
+ binary = 0;
+ if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
+ /* Prevent shimmer of non-string types. */
+ allowUniChar = 0;
+ }
+ }
+ } else {
+ /* assert (objPtr->typePtr != NULL) -- stork! */
+ binary = 0;
+ if (objPtr->typePtr == &tclStringType) {
+ /* Have a pure Unicode value; ask to preserve it */
+ requestUniChar = 1;
+ } else {
+ /* Have another type; prevent shimmer */
+ allowUniChar = 0;
+ }
+ }
+ } while (--oc && (binary || allowUniChar));
+
+ if (binary) {
+ /*
+ * Result will be pure byte array. Pre-size it
+ */
+
+ int numBytes;
+ ov = objv;
+ oc = objc;
+ do {
+ Tcl_Obj *objPtr = *ov++;
+
+ /*
+ * Every argument is either a bytearray with a ("pure")
+ * value we know we can safely use, or it is an empty string.
+ * We don't need to count bytes for the empty strings.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ Tcl_GetByteArrayFromObj(objPtr, &numBytes); /* PANIC? */
+
+ if (numBytes) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
+ }
+ }
+ } while (--oc);
+ } else if (allowUniChar && requestUniChar) {
+ /*
+ * Result will be pure Tcl_UniChar array. Pre-size it.
+ */
+
+ ov = objv;
+ oc = objc;
+ do {
+ Tcl_Obj *objPtr = *ov++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int numChars;
+
+ Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */
+ if (numChars) {
+ last = objc - oc;
+ if (length == 0) {
+ first = last;
+ } else if (numChars > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numChars;
+ }
+ }
+ } while (--oc);
+ } else {
+ /* Result will be concat of string reps. Pre-size it. */
+ ov = objv; oc = objc;
+ do {
+ Tcl_Obj *pendingPtr = NULL;
+
+ /*
+ * Loop until a possibly non-empty value is reached.
+ * Keep string rep generation pending when possible.
+ */
+
+ do {
+ /* assert ( pendingPtr == NULL ) */
+ /* assert ( length == 0 ) */
+
+ Tcl_Obj *objPtr = *ov++;
+
+ if (objPtr->bytes == NULL) {
+ /* No string rep; Take the chance we can avoid making it */
+ pendingPtr = objPtr;
+ } else {
+ Tcl_GetStringFromObj(objPtr, &length); /* PANIC? */
+ }
+ } while (--oc && (length == 0) && (pendingPtr == NULL));
+
+ /*
+ * Either we found a possibly non-empty value, and we remember
+ * this index as the first and last such value so far seen,
+ * or (oc == 0) and all values are known empty,
+ * so first = last = objc - 1 signals the right quick return.
+ */
+
+ first = last = objc - oc - 1;
+
+ if (oc && (length == 0)) {
+ int numBytes;
+
+ /* assert ( pendingPtr != NULL ) */
+
+ /*
+ * There's a pending value followed by more values. Loop over
+ * remaining values generating strings until a non-empty value
+ * is found, or the pending value gets its string generated.
+ */
+
+ do {
+ Tcl_Obj *objPtr = *ov++;
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ } while (--oc && numBytes == 0 && pendingPtr->bytes == NULL);
+
+ if (numBytes) {
+ last = objc -oc -1;
+ }
+ if (oc || numBytes) {
+ Tcl_GetStringFromObj(pendingPtr, &length);
+ }
+ if (length == 0) {
+ if (numBytes) {
+ first = last;
+ }
+ } else if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
+ }
+ } while (oc && (length == 0));
+
+ while (oc) {
+ int numBytes;
+ Tcl_Obj *objPtr = *ov++;
+
+ /* assert ( length > 0 && pendingPtr == NULL ) */
+
+ Tcl_GetStringFromObj(objPtr, &numBytes); /* PANIC? */
+ if (numBytes) {
+ last = objc - oc;
+ if (numBytes > INT_MAX - length) {
+ goto overflow;
+ }
+ length += numBytes;
+ }
+ --oc;
+ }
+ }
+
+ if (last <= first /*|| length == 0 */) {
+ /* Only one non-empty value or zero length; return first */
+ /* NOTE: (length == 0) implies (last <= first) */
+ return objv[first];
+ }
+
+ objv += first; objc = (last - first + 1);
+
+ if (binary) {
+ /* Efficiently produce a pure byte array result */
+ unsigned char *dst;
+
+ /*
+ * Broken interface! Byte array value routines offer no way to handle
+ * failure to allocate enough space. Following stanza may panic.
+ */
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+ Tcl_GetByteArrayFromObj(objResultPtr, &start);
+ dst = Tcl_SetByteArrayLength(objResultPtr, length) + start;
+ } else {
+ objResultPtr = Tcl_NewByteArrayObj(NULL, length);
+ dst = Tcl_SetByteArrayLength(objResultPtr, length);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ /*
+ * Every argument is either a bytearray with a ("pure")
+ * value we know we can safely use, or it is an empty string.
+ * We don't need to copy bytes from the empty strings.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ int more;
+ unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more);
+ memcpy(dst, src, (size_t) more);
+ dst += more;
+ }
+ }
+ } else if (allowUniChar && requestUniChar) {
+ /* Efficiently produce a pure Tcl_UniChar array result */
+ Tcl_UniChar *dst;
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+
+ /* Ugly interface! Force resize of the unicode array. */
+ Tcl_GetUnicodeFromObj(objResultPtr, &start);
+ Tcl_InvalidateStringRep(objResultPtr);
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "concatenation failed: unable to alloc %"
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(length)));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ dst = Tcl_GetUnicode(objResultPtr) + start;
+ } else {
+ Tcl_UniChar ch = 0;
+
+ /* Ugly interface! No scheme to init array size. */
+ objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ Tcl_DecrRefCount(objResultPtr);
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "concatenation failed: unable to alloc %"
+ TCL_Z_MODIFIER "u bytes",
+ STRING_SIZE(length)));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ dst = Tcl_GetUnicode(objResultPtr);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int more;
+ Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more);
+ memcpy(dst, src, more * sizeof(Tcl_UniChar));
+ dst += more;
+ }
+ }
+ } else {
+ /* Efficiently concatenate string reps */
+ char *dst;
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+
+ Tcl_GetStringFromObj(objResultPtr, &start);
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "concatenation failed: unable to alloc %u bytes",
+ length));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ dst = Tcl_GetString(objResultPtr) + start;
+
+ /* assert ( length > start ) */
+ TclFreeIntRep(objResultPtr);
+ } else {
+ objResultPtr = Tcl_NewObj(); /* PANIC? */
+ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
+ Tcl_DecrRefCount(objResultPtr);
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "concatenation failed: unable to alloc %u bytes",
+ length));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ dst = Tcl_GetString(objResultPtr);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int more;
+ char *src = Tcl_GetStringFromObj(objPtr, &more);
+
+ memcpy(dst, src, (size_t) more);
+ dst += more;
+ }
+ }
+ /* Must NUL-terminate! */
+ *dst = '\0';
+ }
+ return objResultPtr;
+
+ overflow:
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringCmp --
+ * Compare two Tcl_Obj values as strings.
+ *
+ * Results:
+ * Like memcmp, return -1, 0, or 1.
+ *
+ * Side effects:
+ * String representations may be generated. Internal representation may
+ * be changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int TclStringCmp(
+ Tcl_Obj *value1Ptr,
+ Tcl_Obj *value2Ptr,
+ int checkEq, /* comparison is only for equality */
+ int nocase, /* comparison is not case sensitive */
+ int reqlength) /* requested length */
+{
+ char *s1, *s2;
+ int empty, length, match, s1len, s2len;
+ memCmpFn_t memCmpFn;
+
+ if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
+ /*
+ * Always match at 0 chars of if it is the same obj.
+ */
+ match = 0;
+ } else {
+
+ if (!nocase && TclIsPureByteArray(value1Ptr)
+ && TclIsPureByteArray(value2Ptr)) {
+ /*
+ * Use binary versions of comparisons since that won't cause undue
+ * type conversions and it is much faster. Only do this if we're
+ * case-sensitive (which is all that really makes sense with byte
+ * arrays anyway, and we have no memcasecmp() for some reason... :^)
+ */
+
+ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
+ s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+ memCmpFn = memcmp;
+ } else if ((value1Ptr->typePtr == &tclStringType)
+ && (value2Ptr->typePtr == &tclStringType)) {
+ /*
+ * Do a unicode-specific comparison if both of the args are of
+ * String type. If the char length == byte length, we can do a
+ * memcmp. In benchmark testing this proved the most efficient
+ * check between the unicode and string comparison operations.
+ */
+
+ if (nocase) {
+ s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
+ s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
+ memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
+ } else {
+ s1len = Tcl_GetCharLength(value1Ptr);
+ s2len = Tcl_GetCharLength(value2Ptr);
+ if ((s1len == value1Ptr->length)
+ && (value1Ptr->bytes != NULL)
+ && (s2len == value2Ptr->length)
+ && (value2Ptr->bytes != NULL)) {
+ s1 = value1Ptr->bytes;
+ s2 = value2Ptr->bytes;
+ memCmpFn = memcmp;
+ } else {
+ s1 = (char *) Tcl_GetUnicode(value1Ptr);
+ s2 = (char *) Tcl_GetUnicode(value2Ptr);
+ if (
+#ifdef WORDS_BIGENDIAN
+ 1
+#else
+ checkEq
+#endif
+ ) {
+ memCmpFn = memcmp;
+ s1len *= sizeof(Tcl_UniChar);
+ s2len *= sizeof(Tcl_UniChar);
+ } else {
+ memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
+ }
+ }
+ }
+ } else {
+ if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
+ switch (TclCheckEmptyString(value2Ptr)) {
+ case -1:
+ s1 = 0;
+ s1len = 0;
+ s2 = TclGetStringFromObj(value2Ptr, &s2len);
+ break;
+ case 0:
+ match = -1;
+ goto matchdone;
+ case 1:
+ default: /* avoid warn: `s2` may be used uninitialized */
+ match = 0;
+ goto matchdone;
+ }
+ } else if (TclCheckEmptyString(value2Ptr) > 0) {
+ switch (empty) {
+ case -1:
+ s2 = 0;
+ s2len = 0;
+ s1 = TclGetStringFromObj(value1Ptr, &s1len);
+ break;
+ case 0:
+ match = 1;
+ goto matchdone;
+ case 1:
+ default: /* avoid warn: `s1` may be used uninitialized */
+ match = 0;
+ goto matchdone;
+ }
+ } else {
+ s1 = TclGetStringFromObj(value1Ptr, &s1len);
+ s2 = TclGetStringFromObj(value2Ptr, &s2len);
+ }
+ if (!nocase && checkEq) {
+ /*
+ * When we have equal-length we can check only for
+ * (in)equality. We can use memcmp in all (n)eq cases because
+ * we don't need to worry about lexical LE/BE variance.
+ */
+
+ memCmpFn = memcmp;
+ } else {
+ /*
+ * As a catch-all we will work with UTF-8. We cannot use
+ * memcmp() as that is unsafe with any string containing NUL
+ * (\xC0\x80 in Tcl's utf rep). We can use the more efficient
+ * TclpUtfNcmp2 if we are case-sensitive and no specific
+ * length was requested.
+ */
+
+ if ((reqlength < 0) && !nocase) {
+ memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+ } else {
+ s1len = Tcl_NumUtfChars(s1, s1len);
+ s2len = Tcl_NumUtfChars(s2, s2len);
+ memCmpFn = (memCmpFn_t)
+ (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
+ }
+ }
+ }
+
+ length = (s1len < s2len) ? s1len : s2len;
+ if (reqlength > 0 && reqlength < length) {
+ length = reqlength;
+ } else if (reqlength < 0) {
+ /*
+ * The requested length is negative, so we ignore it by setting it
+ * to length + 1 so we correct the match var.
+ */
+
+ reqlength = length + 1;
+ }
+
+ if (checkEq && (s1len != s2len)) {
+ match = 1; /* This will be reversed below. */
+ } else {
+ /*
+ * The comparison function should compare up to the minimum byte
+ * length only.
+ */
+
+ match = memCmpFn(s1, s2, (size_t) length);
+ }
+ if ((match == 0) && (reqlength > length)) {
+ match = s1len - s2len;
+ }
+ match = (match > 0) ? 1 : (match < 0) ? -1 : 0;
+ }
+ matchdone:
+ return match;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringFirst --
+ *
+ * Implements the [string first] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * first instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringFirst(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int start)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (start < 0) {
+ start = 0;
+ }
+ if (ln == 0) {
+ /* We don't find empty substrings. Bizarre!
+ * Whenever this routine is turned into a proper substring
+ * finder, change to `return start` after limits imposed. */
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *end, *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ /* Find bytes in bytes */
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ end = bh + lh;
+
+ try = bh + start;
+ while (try + ln <= end) {
+ /*
+ * Look for the leading byte of the needle in the haystack
+ * starting at try and stopping when there's not enough room
+ * for the needle left.
+ */
+ try = memchr(try, bn[0], (end + 1 - ln) - try);
+ if (try == NULL) {
+ /* Leading byte not found -> needle cannot be found. */
+ return -1;
+ }
+ /* Leading byte found, check rest of needle. */
+ if (0 == memcmp(try+1, bn+1, ln-1)) {
+ /* Checks! Return the successful index. */
+ return (try - bh);
+ }
+ /* Rest of needle match failed; Iterate to continue search. */
+ try++;
+ }
+ return -1;
+ }
+
+ /*
+ * TODO: It might be nice to support some cases where it is not
+ * necessary to shimmer to &tclStringType to compute the result,
+ * and instead operate just on the objPtr->bytes values directly.
+ * However, we also do not want the answer to change based on the
+ * code pathway, or if it does we want that to be for some values
+ * we explicitly decline to support. Getting there will involve
+ * locking down in practice more firmly just what encodings produce
+ * what supported results for the objPtr->bytes values. For now,
+ * do only the well-defined Tcl_UniChar array search.
+ */
+
+ {
+ Tcl_UniChar *try, *end, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ end = uh + lh;
+
+ for (try = uh + start; try + ln <= end; try++) {
+ if ((*try == *un) && (0 ==
+ memcmp(try + 1, un + 1, (ln-1) * sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringLast --
+ *
+ * Implements the [string last] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * last instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringLast(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int last)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return last", after limitation.
+ */
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (last >= lh) {
+ last = lh - 1;
+ }
+
+ if (last < ln - 1) {
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *try, *bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ try = bh + last + 1 - ln;
+ while (try >= bh) {
+ if ((*try == bn[0])
+ && (0 == memcmp(try+1, bn+1, ln-1))) {
+ return (try - bh);
+ }
+ try--;
+ }
+ return -1;
+ }
+
+ {
+ Tcl_UniChar *try, *uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ try = uh + last + 1 - ln;
+ while (try >= uh) {
+ if ((*try == un[0])
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try--;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringReverse --
*
* Implements the [string reverse] operation.
*
* Results:
- * An unshared Tcl value which is the [string reverse] of the argument
- * supplied. When sharing rules permit, the returned value might be the
- * argument with modifications done in place.
+ * A Tcl value which is the [string reverse] of the argument supplied.
+ * When sharing rules permit and the caller requests, the returned value
+ * might be the argument with modifications done in place.
*
* Side effects:
* May allocate a new Tcl_Obj.
@@ -2684,18 +3719,20 @@ static void
ReverseBytes(
unsigned char *to, /* Copy bytes into here... */
unsigned char *from, /* ...from here... */
- int count) /* Until this many are copied, */
+ int count) /* Until this many are copied, */
/* reversing as you go. */
{
unsigned char *src = from + count;
+
if (to == from) {
/* Reversing in place */
while (--src > to) {
unsigned char c = *src;
+
*src = *to;
*to++ = c;
}
- } else {
+ } else {
while (--src >= from) {
*to++ = *src;
}
@@ -2703,17 +3740,19 @@ ReverseBytes(
}
Tcl_Obj *
-TclStringObjReverse(
- Tcl_Obj *objPtr)
+TclStringReverse(
+ Tcl_Obj *objPtr,
+ int flags)
{
String *stringPtr;
Tcl_UniChar ch = 0;
+ int inPlace = flags & TCL_STRING_IN_PLACE;
if (TclIsPureByteArray(objPtr)) {
int numBytes;
unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
}
ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
@@ -2727,7 +3766,7 @@ TclStringObjReverse(
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
Tcl_UniChar *to;
/*
@@ -2742,7 +3781,10 @@ TclStringObjReverse(
*to++ = *src;
}
} else {
- /* Reversing in place */
+ /*
+ * Reversing in place.
+ */
+
while (--src > from) {
ch = *src;
*src = *from;
@@ -2756,7 +3798,7 @@ TclStringObjReverse(
int numBytes = objPtr->length;
char *to, *from = objPtr->bytes;
- if (Tcl_IsShared(objPtr)) {
+ if (!inPlace || Tcl_IsShared(objPtr)) {
objPtr = Tcl_NewObj();
Tcl_SetObjLength(objPtr, numBytes);
}
@@ -2766,20 +3808,22 @@ TclStringObjReverse(
/*
* Either numChars == -1 and we don't know how many chars are
* represented by objPtr->bytes and we need Pass 1 just in case,
- * or numChars >= 0 and we know we have fewer chars than bytes,
- * so we know there's a multibyte character needing Pass 1.
+ * or numChars >= 0 and we know we have fewer chars than bytes, so
+ * we know there's a multibyte character needing Pass 1.
*
* Pass 1. Reverse the bytes of each multi-byte character.
*/
+
int charCount = 0;
int bytesLeft = numBytes;
while (bytesLeft) {
/*
- * NOTE: We know that the from buffer is NUL-terminated.
- * It's part of the contract for objPtr->bytes values.
- * Thus, we can skip calling Tcl_UtfCharComplete() here.
+ * NOTE: We know that the from buffer is NUL-terminated. It's
+ * part of the contract for objPtr->bytes values. Thus, we can
+ * skip calling Tcl_UtfCharComplete() here.
*/
+
int bytesInChar = TclUtfToUniChar(from, &ch);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
@@ -2803,6 +3847,150 @@ TclStringObjReverse(
/*
*---------------------------------------------------------------------------
*
+ * TclStringReplace --
+ *
+ * Implements the inner engine of the [string replace] command.
+ *
+ * The result is a concatenation of a prefix from objPtr, characters
+ * 0 through first-1, the insertPtr string value, and a suffix from
+ * objPtr, characters from first + count to the end. The effect is as if
+ * the inner substring of characters first through first+count-1 are
+ * removed and replaced with insertPtr. If insertPtr is NULL, it is
+ * treated as an empty string. When passed the flag TCL_STRING_IN_PLACE,
+ * this routine will try to do the work within objPtr, so long as no
+ * sharing forbids it. Without that request, or as needed, a new Tcl
+ * value will be allocated to be the result.
+ *
+ * Results:
+ * A Tcl value that is the result of the substring replacement. May
+ * return NULL in case of an error. When NULL is returned and interp is
+ * non-NULL, error information is left in interp
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclStringReplace(
+ Tcl_Interp *interp, /* For error reporting, may be NULL */
+ Tcl_Obj *objPtr, /* String to act upon */
+ int first, /* First index to replace */
+ int count, /* How many chars to replace */
+ Tcl_Obj *insertPtr, /* Replacement string, may be NULL */
+ int flags) /* TCL_STRING_IN_PLACE => attempt in-place */
+{
+ int inPlace = flags & TCL_STRING_IN_PLACE;
+ Tcl_Obj *result;
+
+ /* Caller is expected to pass sensible arguments */
+ assert ( count >= 0 ) ;
+ assert ( first >= 0 ) ;
+
+ /* Replace nothing with nothing */
+ if ((insertPtr == NULL) && (count == 0)) {
+ if (inPlace) {
+ return objPtr;
+ } else {
+ return Tcl_DuplicateObj(objPtr);
+ }
+ }
+
+ /*
+ * The caller very likely had to call Tcl_GetCharLength() or similar
+ * to be able to process index values. This means it is like that
+ * objPtr is either a proper "bytearray" or a "string" or else it has
+ * a known and short string rep.
+ */
+
+ if (TclIsPureByteArray(objPtr)) {
+ int numBytes;
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
+
+ if (insertPtr == NULL) {
+ /* Replace something with nothing. */
+
+ assert ( first <= numBytes ) ;
+ assert ( count <= numBytes ) ;
+ assert ( first + count <= numBytes ) ;
+
+ result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */
+ TclAppendBytesToByteArray(result, bytes, first);
+ TclAppendBytesToByteArray(result, bytes + first + count,
+ numBytes - count - first);
+ return result;
+ }
+
+ /* Replace everything */
+ if ((first == 0) && (count == numBytes)) {
+ return insertPtr;
+ }
+
+ if (TclIsPureByteArray(insertPtr)) {
+ int newBytes;
+ unsigned char *iBytes
+ = Tcl_GetByteArrayFromObj(insertPtr, &newBytes);
+
+ if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) {
+ /*
+ * Removal count and replacement count are equal.
+ * Other conditions permit. Do in-place splice.
+ */
+
+ memcpy(bytes + first, iBytes, count);
+ Tcl_InvalidateStringRep(objPtr);
+ return objPtr;
+ }
+
+ if (newBytes > INT_MAX - (numBytes - count)) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded",
+ INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return NULL;
+ }
+ result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes);
+ /* PANIC? */
+ Tcl_SetByteArrayLength(result, 0);
+ TclAppendBytesToByteArray(result, bytes, first);
+ TclAppendBytesToByteArray(result, iBytes, newBytes);
+ TclAppendBytesToByteArray(result, bytes + first + count,
+ numBytes - count - first);
+ return result;
+ }
+
+ /* Flow through to try other approaches below */
+ }
+
+ /*
+ * TODO: Figure out how not to generate a Tcl_UniChar array rep
+ * when it can be determined objPtr->bytes points to a string of
+ * all single-byte characters so we can index it directly.
+ */
+
+ /* The traditional implementation... */
+ {
+ int numChars;
+ Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars);
+
+ /* TODO: Is there an in-place option worth pursuing here? */
+
+ result = Tcl_NewUnicodeObj(ustring, first);
+ if (insertPtr) {
+ Tcl_AppendObjToObj(result, insertPtr);
+ }
+ if (first + count < numChars) {
+ Tcl_AppendUnicodeToObj(result, ustring + first + count,
+ numChars - first - count);
+ }
+
+ return result;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* FillUnicodeRep --
*
* Populate the Unicode internal rep with the Unicode form of its string
@@ -2894,7 +4082,6 @@ DupStringInternalRep(
String *srcStringPtr = GET_STRING(srcPtr);
String *copyStringPtr = NULL;
-#if COMPAT==0
if (srcStringPtr->numChars == -1) {
/*
* The String struct in the source value holds zero useful data. Don't
@@ -2937,41 +4124,6 @@ DupStringInternalRep(
*/
copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0;
-#else /* COMPAT!=0 */
- /*
- * If the src obj is a string of 1-byte Utf chars, then copy the string
- * rep of the source object and create an "empty" Unicode internal rep for
- * the new object. Otherwise, copy Unicode internal rep, and invalidate
- * the string rep of the new object.
- */
-
- if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) {
- /*
- * Copy the full allocation for the Unicode buffer.
- */
-
- copyStringPtr = stringAlloc(srcStringPtr->maxChars);
- copyStringPtr->maxChars = srcStringPtr->maxChars;
- memcpy(copyStringPtr->unicode, srcStringPtr->unicode,
- srcStringPtr->numChars * sizeof(Tcl_UniChar));
- copyStringPtr->unicode[srcStringPtr->numChars] = 0;
- copyStringPtr->allocated = 0;
- } else {
- copyStringPtr = stringAlloc(0);
- copyStringPtr->unicode[0] = 0;
- copyStringPtr->maxChars = 0;
-
- /*
- * Tricky point: the string value was copied by generic object
- * management code, so it doesn't contain any extra bytes that might
- * exist in the source object.
- */
-
- copyStringPtr->allocated = copyPtr->length;
- }
- copyStringPtr->numChars = srcStringPtr->numChars;
- copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
-#endif /* COMPAT==0 */
SET_STRING(copyPtr, copyStringPtr);
copyPtr->typePtr = &tclStringType;
@@ -3059,7 +4211,7 @@ UpdateStringOfString(
stringPtr->allocated = 0;
if (stringPtr->numChars == 0) {
- TclInitStringRep(objPtr, tclEmptyStringRep, 0);
+ TclInitStringRep(objPtr, NULL, 0);
} else {
(void) ExtendStringRepWithUnicode(objPtr, stringPtr->unicode,
stringPtr->numChars);
@@ -3077,7 +4229,7 @@ ExtendStringRepWithUnicode(
*/
int i, origLength, size = 0;
- char *dst, buf[TCL_UTF_MAX];
+ char *dst;
String *stringPtr = GET_STRING(objPtr);
if (numChars < 0) {
@@ -3103,7 +4255,7 @@ ExtendStringRepWithUnicode(
}
for (i = 0; i < numChars && size >= 0; i++) {
- size += Tcl_UniCharToUtf((int) unicode[i], buf);
+ size += TclUtfCount(unicode[i]);
}
if (size < 0) {
Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);