diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 463 |
1 files changed, 457 insertions, 6 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 57f368a..0087c34 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -409,6 +409,15 @@ Tcl_GetCharLength( size_t numChars; /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* * Optimize the case where we're really dealing with a bytearray object * without string representation; we don't need to convert to a string to * perform the get-length operation. @@ -773,7 +782,7 @@ Tcl_SetObjLength( * Invalidate the unicode data. */ - stringPtr->numChars = -1; + stringPtr->numChars = (size_t)-1; stringPtr->hasUnicode = 0; } else { /* @@ -883,7 +892,7 @@ Tcl_AttemptSetObjLength( * Invalidate the unicode data. */ - stringPtr->numChars = -1; + stringPtr->numChars = (size_t)-1; stringPtr->hasUnicode = 0; } else { /* @@ -1173,6 +1182,8 @@ Tcl_AppendUnicodeToObj( * Side effects: * The string rep of appendObjPtr is appended to the string * representation of objPtr. + * IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr. + * Callers are counting on that. * *---------------------------------------------------------------------- */ @@ -1289,7 +1300,7 @@ Tcl_AppendObjToObj( AppendUtfToUtfRep(objPtr, bytes, length); - if (numChars >= 0 && appendNumChars >= 0) { + if (numChars >= 0 && appendNumChars != (size_t)-1) { stringPtr->numChars = numChars + appendNumChars; } } @@ -1531,7 +1542,7 @@ AppendUtfToUtfRep( * Invalidate the unicode data. */ - stringPtr->numChars = -1; + stringPtr->numChars = (size_t)-1; stringPtr->hasUnicode = 0; if (bytes) { @@ -2285,7 +2296,7 @@ Tcl_AppendFormatToObj( } } - Tcl_GetStringFromObj(segment, &segmentNumBytes); + TclGetStringFromObj(segment, &segmentNumBytes); if (segmentNumBytes > limit) { if (allocSegment) { Tcl_DecrRefCount(segment); @@ -2598,6 +2609,436 @@ TclGetStringStorage( *sizePtr = stringPtr->allocated; return objPtr->bytes; } + +/* + *--------------------------------------------------------------------------- + * + * TclStringCatObjv -- + * + * Performs the [string cat] function. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation + * of all objc values in objv. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringCatObjv( + Tcl_Interp *interp, + int inPlace, + int objc, + Tcl_Obj * const objv[], + Tcl_Obj **objPtrPtr) +{ + Tcl_Obj *objPtr, *objResultPtr, * const *ov; + int oc, length = 0, binary = 1, first = 0; + int allowUniChar = 1, requestUniChar = 0; + + /* assert (objc >= 2) */ + + /* + * Analyze to determine what representation result should be. + * GOALS: Avoid shimmering & string rep generation. + * Produce pure bytearray when possible. + * Error on overflow. + */ + + ov = objv, oc = objc; + while (oc-- && (binary || allowUniChar)) { + objPtr = *ov++; + + if (objPtr->bytes) { + /* Value has a string rep. */ + if (objPtr->length) { + /* + * Non-empty string rep. Not a pure bytearray, so we + * won't create a pure bytearray + */ + binary = 0; + if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + /* Prevent shimmer of non-string types. */ + allowUniChar = 0; + } + } + } else { + /* assert (objPtr->typePtr != NULL) -- stork! */ + if (TclIsPureByteArray(objPtr)) { + allowUniChar = 0; + } else { + binary = 0; + if (objPtr->typePtr == &tclStringType) { + /* Have a pure Unicode value; ask to preserve it */ + requestUniChar = 1; + } else { + /* Have another type; prevent shimmer */ + allowUniChar = 0; + } + } + } + } + + if (binary) { + /* Result will be pure byte array. Pre-size it */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + objPtr = *ov++; + + if (objPtr->bytes == NULL) { + int numBytes; + + Tcl_GetByteArrayFromObj(objPtr, &numBytes); + if (length == 0) { + first = objc - oc - 1; + } + length += numBytes; + } + } + } else if (allowUniChar && requestUniChar) { + /* Result will be pure Tcl_UniChar array. Pre-size it. */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + objPtr = *ov++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int numChars; + + Tcl_GetUnicodeFromObj(objPtr, &numChars); + if (length == 0) { + first = objc - oc - 1; + } + length += numChars; + } + } + } else { + /* Result will be concat of string reps. Pre-size it. */ + ov = objv; oc = objc; + while (oc-- && (length >= 0)) { + int numBytes; + + objPtr = *ov++; + + Tcl_GetStringFromObj(objPtr, &numBytes); + if ((length == 0) && numBytes) { + first = objc - oc - 1; + } + length += numBytes; + } + } + + if (length < 0) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + + if (length == 0) { + /* Total length of zero means every value has length zero */ + *objPtrPtr = objv[0]; + return TCL_OK; + } + + objv += first; objc -= first; + + if (binary) { + /* Efficiently produce a pure byte array result */ + unsigned char *dst; + + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + Tcl_GetByteArrayFromObj(objResultPtr, &start); + dst = Tcl_SetByteArrayLength(objResultPtr, length) + start; + } else { + objResultPtr = Tcl_NewByteArrayObj(NULL, length); + dst = Tcl_SetByteArrayLength(objResultPtr, length); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if (objPtr->bytes == NULL) { + int more; + unsigned char *src = Tcl_GetByteArrayFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); + dst += more; + } + } + } else if (allowUniChar && requestUniChar) { + /* Efficiently produce a pure Tcl_UniChar array result */ + Tcl_UniChar *dst; + + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + + /* Ugly interface! Force resize of the unicode array. */ + Tcl_GetUnicodeFromObj(objResultPtr, &start); + Tcl_InvalidateStringRep(objResultPtr); + Tcl_SetObjLength(objResultPtr, length); + dst = Tcl_GetUnicode(objResultPtr) + start; + } else { + Tcl_UniChar ch = 0; + + /* Ugly interface! No scheme to init array size. */ + objResultPtr = Tcl_NewUnicodeObj(&ch, 0); + Tcl_SetObjLength(objResultPtr, length); + dst = Tcl_GetUnicode(objResultPtr); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int more; + Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + memcpy(dst, src, more * sizeof(Tcl_UniChar)); + dst += more; + } + } + } else { + /* Efficiently concatenate string reps */ + char *dst; + + if (inPlace && !Tcl_IsShared(*objv)) { + int start; + + objResultPtr = *objv++; objc--; + + Tcl_GetStringFromObj(objResultPtr, &start); + Tcl_SetObjLength(objResultPtr, length); + dst = Tcl_GetString(objResultPtr) + start; + if (length > start) { + TclFreeIntRep(objResultPtr); + } + } else { + objResultPtr = Tcl_NewObj(); + Tcl_SetObjLength(objResultPtr, length); + dst = Tcl_GetString(objResultPtr); + } + while (objc--) { + Tcl_Obj *objPtr = *objv++; + + if ((objPtr->bytes == NULL) || (objPtr->length)) { + int more; + char *src = Tcl_GetStringFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); + dst += more; + } + } + } + *objPtrPtr = objResultPtr; + return TCL_OK; +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringFind -- + * + * Implements the [string first] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * first instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringFind( + Tcl_Obj *needle, + Tcl_Obj *haystack, + int start) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *end, *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + end = bh + lh; + + try = bh + start; + while (try + ln <= end) { + try = memchr(try, bn[0], end - try); + + if (try == NULL) { + return -1; + } + if (0 == memcmp(try+1, bn+1, ln-1)) { + return (try - bh); + } + try++; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + char *found = strstr(haystack->bytes + start, needle->bytes); + + if (found) { + return (found - haystack->bytes); + } else { + return -1; + } + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *end, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + end = uh + lh; + + try = uh + start; + while (try + ln <= end) { + if ((*try == *un) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try++; + } + return -1; + } +} + +/* + *--------------------------------------------------------------------------- + * + * TclStringLast -- + * + * Implements the [string last] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * last instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringLast( + Tcl_Obj *needle, + Tcl_Obj *haystack, + int last) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (ln > last + 1) { + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + + if (last + 1 > lh) { + last = lh - 1; + } + try = bh + last + 1 - ln; + while (try >= bh) { + if ((*try == bn[0]) + && (0 == memcmp(try+1, bn+1, ln-1))) { + return (try - bh); + } + try--; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (last + 1 > lh) { + last = lh - 1; + } + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + + char *try = haystack->bytes + last + 1 - ln; + while (try >= haystack->bytes) { + if ((*try == needle->bytes[0]) + && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { + return (try - haystack->bytes); + } + try--; + } + return -1; + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + + try = uh + last + 1 - ln; + while (try >= uh) { + if ((*try == un[0]) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try--; + } + return -1; + } +} + /* *--------------------------------------------------------------------------- * @@ -2914,7 +3355,7 @@ SetStringFromAny( * already in place at objPtr->bytes. */ - stringPtr->numChars = -1; + stringPtr->numChars = (size_t)-1; stringPtr->allocated = objPtr->length; stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; @@ -2948,6 +3389,16 @@ UpdateStringOfString( { String *stringPtr = GET_STRING(objPtr); + /* + * This routine is only called when we need to generate the + * string rep objPtr->bytes because it does not exist -- it is NULL. + * In that circumstance, any lingering claim about the size of + * memory pointed to by that NULL pointer is clearly bogus, and + * needs a reset. + */ + + stringPtr->allocated = 0; + if (stringPtr->numChars == 0) { TclInitStringRep(objPtr, tclEmptyStringRep, 0); } else { |