summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2016-11-18 12:10:26 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2016-11-18 12:10:26 (GMT)
commit450b6f46c96b96bec44fb0bc119e3d11f3434f4a (patch)
tree5f4ccb4d3df569fcd06e804ff1b5ebd8a72505bb /generic/tclStringObj.c
parent4d9fc11c2ffbcc95218111734c64397df0dfcf63 (diff)
parentedcac92c086fa48aff50db9763afecbed00c2a37 (diff)
downloadtcl-450b6f46c96b96bec44fb0bc119e3d11f3434f4a.zip
tcl-450b6f46c96b96bec44fb0bc119e3d11f3434f4a.tar.gz
tcl-450b6f46c96b96bec44fb0bc119e3d11f3434f4a.tar.bz2
merge novem
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c463
1 files changed, 457 insertions, 6 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 57f368a..0087c34 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -409,6 +409,15 @@ Tcl_GetCharLength(
size_t numChars;
/*
+ * Quick, no-shimmer return for short string reps.
+ */
+
+ if ((objPtr->bytes) && (objPtr->length < 2)) {
+ /* 0 bytes -> 0 chars; 1 byte -> 1 char */
+ return objPtr->length;
+ }
+
+ /*
* Optimize the case where we're really dealing with a bytearray object
* without string representation; we don't need to convert to a string to
* perform the get-length operation.
@@ -773,7 +782,7 @@ Tcl_SetObjLength(
* Invalidate the unicode data.
*/
- stringPtr->numChars = -1;
+ stringPtr->numChars = (size_t)-1;
stringPtr->hasUnicode = 0;
} else {
/*
@@ -883,7 +892,7 @@ Tcl_AttemptSetObjLength(
* Invalidate the unicode data.
*/
- stringPtr->numChars = -1;
+ stringPtr->numChars = (size_t)-1;
stringPtr->hasUnicode = 0;
} else {
/*
@@ -1173,6 +1182,8 @@ Tcl_AppendUnicodeToObj(
* Side effects:
* The string rep of appendObjPtr is appended to the string
* representation of objPtr.
+ * IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr.
+ * Callers are counting on that.
*
*----------------------------------------------------------------------
*/
@@ -1289,7 +1300,7 @@ Tcl_AppendObjToObj(
AppendUtfToUtfRep(objPtr, bytes, length);
- if (numChars >= 0 && appendNumChars >= 0) {
+ if (numChars >= 0 && appendNumChars != (size_t)-1) {
stringPtr->numChars = numChars + appendNumChars;
}
}
@@ -1531,7 +1542,7 @@ AppendUtfToUtfRep(
* Invalidate the unicode data.
*/
- stringPtr->numChars = -1;
+ stringPtr->numChars = (size_t)-1;
stringPtr->hasUnicode = 0;
if (bytes) {
@@ -2285,7 +2296,7 @@ Tcl_AppendFormatToObj(
}
}
- Tcl_GetStringFromObj(segment, &segmentNumBytes);
+ TclGetStringFromObj(segment, &segmentNumBytes);
if (segmentNumBytes > limit) {
if (allocSegment) {
Tcl_DecrRefCount(segment);
@@ -2598,6 +2609,436 @@ TclGetStringStorage(
*sizePtr = stringPtr->allocated;
return objPtr->bytes;
}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringCatObjv --
+ *
+ * Performs the [string cat] function.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation
+ * of all objc values in objv.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringCatObjv(
+ Tcl_Interp *interp,
+ int inPlace,
+ int objc,
+ Tcl_Obj * const objv[],
+ Tcl_Obj **objPtrPtr)
+{
+ Tcl_Obj *objPtr, *objResultPtr, * const *ov;
+ int oc, length = 0, binary = 1, first = 0;
+ int allowUniChar = 1, requestUniChar = 0;
+
+ /* assert (objc >= 2) */
+
+ /*
+ * Analyze to determine what representation result should be.
+ * GOALS: Avoid shimmering & string rep generation.
+ * Produce pure bytearray when possible.
+ * Error on overflow.
+ */
+
+ ov = objv, oc = objc;
+ while (oc-- && (binary || allowUniChar)) {
+ objPtr = *ov++;
+
+ if (objPtr->bytes) {
+ /* Value has a string rep. */
+ if (objPtr->length) {
+ /*
+ * Non-empty string rep. Not a pure bytearray, so we
+ * won't create a pure bytearray
+ */
+ binary = 0;
+ if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
+ /* Prevent shimmer of non-string types. */
+ allowUniChar = 0;
+ }
+ }
+ } else {
+ /* assert (objPtr->typePtr != NULL) -- stork! */
+ if (TclIsPureByteArray(objPtr)) {
+ allowUniChar = 0;
+ } else {
+ binary = 0;
+ if (objPtr->typePtr == &tclStringType) {
+ /* Have a pure Unicode value; ask to preserve it */
+ requestUniChar = 1;
+ } else {
+ /* Have another type; prevent shimmer */
+ allowUniChar = 0;
+ }
+ }
+ }
+ }
+
+ if (binary) {
+ /* Result will be pure byte array. Pre-size it */
+ ov = objv; oc = objc;
+ while (oc-- && (length >= 0)) {
+ objPtr = *ov++;
+
+ if (objPtr->bytes == NULL) {
+ int numBytes;
+
+ Tcl_GetByteArrayFromObj(objPtr, &numBytes);
+ if (length == 0) {
+ first = objc - oc - 1;
+ }
+ length += numBytes;
+ }
+ }
+ } else if (allowUniChar && requestUniChar) {
+ /* Result will be pure Tcl_UniChar array. Pre-size it. */
+ ov = objv; oc = objc;
+ while (oc-- && (length >= 0)) {
+ objPtr = *ov++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int numChars;
+
+ Tcl_GetUnicodeFromObj(objPtr, &numChars);
+ if (length == 0) {
+ first = objc - oc - 1;
+ }
+ length += numChars;
+ }
+ }
+ } else {
+ /* Result will be concat of string reps. Pre-size it. */
+ ov = objv; oc = objc;
+ while (oc-- && (length >= 0)) {
+ int numBytes;
+
+ objPtr = *ov++;
+
+ Tcl_GetStringFromObj(objPtr, &numBytes);
+ if ((length == 0) && numBytes) {
+ first = objc - oc - 1;
+ }
+ length += numBytes;
+ }
+ }
+
+ if (length < 0) {
+ if (interp) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
+ Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
+ }
+ return TCL_ERROR;
+ }
+
+ if (length == 0) {
+ /* Total length of zero means every value has length zero */
+ *objPtrPtr = objv[0];
+ return TCL_OK;
+ }
+
+ objv += first; objc -= first;
+
+ if (binary) {
+ /* Efficiently produce a pure byte array result */
+ unsigned char *dst;
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+ Tcl_GetByteArrayFromObj(objResultPtr, &start);
+ dst = Tcl_SetByteArrayLength(objResultPtr, length) + start;
+ } else {
+ objResultPtr = Tcl_NewByteArrayObj(NULL, length);
+ dst = Tcl_SetByteArrayLength(objResultPtr, length);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ if (objPtr->bytes == NULL) {
+ int more;
+ unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more);
+ memcpy(dst, src, (size_t) more);
+ dst += more;
+ }
+ }
+ } else if (allowUniChar && requestUniChar) {
+ /* Efficiently produce a pure Tcl_UniChar array result */
+ Tcl_UniChar *dst;
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+
+ /* Ugly interface! Force resize of the unicode array. */
+ Tcl_GetUnicodeFromObj(objResultPtr, &start);
+ Tcl_InvalidateStringRep(objResultPtr);
+ Tcl_SetObjLength(objResultPtr, length);
+ dst = Tcl_GetUnicode(objResultPtr) + start;
+ } else {
+ Tcl_UniChar ch = 0;
+
+ /* Ugly interface! No scheme to init array size. */
+ objResultPtr = Tcl_NewUnicodeObj(&ch, 0);
+ Tcl_SetObjLength(objResultPtr, length);
+ dst = Tcl_GetUnicode(objResultPtr);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int more;
+ Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more);
+ memcpy(dst, src, more * sizeof(Tcl_UniChar));
+ dst += more;
+ }
+ }
+ } else {
+ /* Efficiently concatenate string reps */
+ char *dst;
+
+ if (inPlace && !Tcl_IsShared(*objv)) {
+ int start;
+
+ objResultPtr = *objv++; objc--;
+
+ Tcl_GetStringFromObj(objResultPtr, &start);
+ Tcl_SetObjLength(objResultPtr, length);
+ dst = Tcl_GetString(objResultPtr) + start;
+ if (length > start) {
+ TclFreeIntRep(objResultPtr);
+ }
+ } else {
+ objResultPtr = Tcl_NewObj();
+ Tcl_SetObjLength(objResultPtr, length);
+ dst = Tcl_GetString(objResultPtr);
+ }
+ while (objc--) {
+ Tcl_Obj *objPtr = *objv++;
+
+ if ((objPtr->bytes == NULL) || (objPtr->length)) {
+ int more;
+ char *src = Tcl_GetStringFromObj(objPtr, &more);
+ memcpy(dst, src, (size_t) more);
+ dst += more;
+ }
+ }
+ }
+ *objPtrPtr = objResultPtr;
+ return TCL_OK;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringFind --
+ *
+ * Implements the [string first] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * first instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringFind(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int start)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *end, *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+ end = bh + lh;
+
+ try = bh + start;
+ while (try + ln <= end) {
+ try = memchr(try, bn[0], end - try);
+
+ if (try == NULL) {
+ return -1;
+ }
+ if (0 == memcmp(try+1, bn+1, ln-1)) {
+ return (try - bh);
+ }
+ try++;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+ char *found = strstr(haystack->bytes + start, needle->bytes);
+
+ if (found) {
+ return (found - haystack->bytes);
+ } else {
+ return -1;
+ }
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *end, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+ end = uh + lh;
+
+ try = uh + start;
+ while (try + ln <= end) {
+ if ((*try == *un)
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try++;
+ }
+ return -1;
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclStringLast --
+ *
+ * Implements the [string last] operation.
+ *
+ * Results:
+ * If needle is found as a substring of haystack, the index of the
+ * last instance of such a find is returned. If needle is not present
+ * as a substring of haystack, -1 is returned.
+ *
+ * Side effects:
+ * needle and haystack may have their Tcl_ObjType changed.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+TclStringLast(
+ Tcl_Obj *needle,
+ Tcl_Obj *haystack,
+ int last)
+{
+ int lh, ln = Tcl_GetCharLength(needle);
+
+ if (ln == 0) {
+ /*
+ * We don't find empty substrings. Bizarre!
+ *
+ * TODO: When we one day make this a true substring
+ * finder, change this to "return 0"
+ */
+ return -1;
+ }
+
+ if (ln > last + 1) {
+ return -1;
+ }
+
+ if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
+ unsigned char *try, *bh;
+ unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);
+
+ bh = Tcl_GetByteArrayFromObj(haystack, &lh);
+
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ try = bh + last + 1 - ln;
+ while (try >= bh) {
+ if ((*try == bn[0])
+ && (0 == memcmp(try+1, bn+1, ln-1))) {
+ return (try - bh);
+ }
+ try--;
+ }
+ return -1;
+ }
+
+ lh = Tcl_GetCharLength(haystack);
+ if (last + 1 > lh) {
+ last = lh - 1;
+ }
+ if (haystack->bytes && (lh == haystack->length)) {
+ /* haystack is all single-byte chars */
+
+ if (needle->bytes && (ln == needle->length)) {
+ /* needle is also all single-byte chars */
+
+ char *try = haystack->bytes + last + 1 - ln;
+ while (try >= haystack->bytes) {
+ if ((*try == needle->bytes[0])
+ && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) {
+ return (try - haystack->bytes);
+ }
+ try--;
+ }
+ return -1;
+ } else {
+ /*
+ * Cannot find substring with a multi-byte char inside
+ * a string with no multi-byte chars.
+ */
+ return -1;
+ }
+ } else {
+ Tcl_UniChar *try, *uh;
+ Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);
+
+ uh = Tcl_GetUnicodeFromObj(haystack, &lh);
+
+ try = uh + last + 1 - ln;
+ while (try >= uh) {
+ if ((*try == un[0])
+ && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
+ return (try - uh);
+ }
+ try--;
+ }
+ return -1;
+ }
+}
+
/*
*---------------------------------------------------------------------------
*
@@ -2914,7 +3355,7 @@ SetStringFromAny(
* already in place at objPtr->bytes.
*/
- stringPtr->numChars = -1;
+ stringPtr->numChars = (size_t)-1;
stringPtr->allocated = objPtr->length;
stringPtr->maxChars = 0;
stringPtr->hasUnicode = 0;
@@ -2948,6 +3389,16 @@ UpdateStringOfString(
{
String *stringPtr = GET_STRING(objPtr);
+ /*
+ * This routine is only called when we need to generate the
+ * string rep objPtr->bytes because it does not exist -- it is NULL.
+ * In that circumstance, any lingering claim about the size of
+ * memory pointed to by that NULL pointer is clearly bogus, and
+ * needs a reset.
+ */
+
+ stringPtr->allocated = 0;
+
if (stringPtr->numChars == 0) {
TclInitStringRep(objPtr, tclEmptyStringRep, 0);
} else {