diff options
Diffstat (limited to 'generic/tclStringObj.c')
| -rw-r--r-- | generic/tclStringObj.c | 205 |
1 files changed, 85 insertions, 120 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 1eb5037..52b6283 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -140,8 +140,8 @@ GrowStringBuffer( objPtr->bytes = NULL; } if (flag == 0 || stringPtr->allocated > 0) { - attempt = 2 * needed; - if (attempt <= STRING_MAXCHARS) { + if (needed <= STRING_MAXCHARS / 2) { + attempt = 2 * needed; ptr = attemptckrealloc(objPtr->bytes, attempt + 1); } if (ptr == NULL) { @@ -190,8 +190,8 @@ GrowUnicodeBuffer( * Subsequent appends - apply the growth algorithm. */ - attempt = 2 * needed; - if (attempt <= STRING_MAXCHARS) { + if (needed <= STRING_MAXCHARS / 2) { + attempt = 2 * needed; ptr = stringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { @@ -2663,23 +2663,24 @@ TclGetStringStorage( * Performs the [string repeat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of count copies of the value in objPtr. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int +Tcl_Obj * TclStringRepeat( Tcl_Interp *interp, Tcl_Obj *objPtr, int count, - Tcl_Obj **objPtrPtr) + int flags) { Tcl_Obj *objResultPtr; + int inPlace = flags & TCL_STRING_IN_PLACE; int length = 0, unichar = 0, done = 1; int binary = TclIsPureByteArray(objPtr); @@ -2714,8 +2715,7 @@ TclStringRepeat( if (length == 0) { /* Any repeats of empty is empty. */ - *objPtrPtr = objPtr; - return TCL_OK; + return objPtr; } if (count > INT_MAX/length) { @@ -2724,13 +2724,13 @@ TclStringRepeat( "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } if (binary) { /* Efficiently produce a pure byte array result */ - objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr) - : objPtr; + objResultPtr = (!inPlace || Tcl_IsShared(objPtr)) ? + Tcl_DuplicateObj(objPtr) : objPtr; Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ Tcl_SetByteArrayLength(objResultPtr, length); @@ -2743,7 +2743,7 @@ TclStringRepeat( (count - done) * length); } else if (unichar) { /* Efficiently produce a pure Tcl_UniChar array result */ - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); } else { TclInvalidateStringRep(objPtr); @@ -2758,7 +2758,7 @@ TclStringRepeat( (Tcl_WideUInt)STRING_SIZE(count*length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2769,7 +2769,7 @@ TclStringRepeat( (count - done) * length); } else { /* Efficiently concatenate string reps */ - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); } else { TclFreeIntRep(objPtr); @@ -2782,7 +2782,7 @@ TclStringRepeat( count*length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { @@ -2792,47 +2792,45 @@ TclStringRepeat( Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), (count - done) * length); } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; } /* *--------------------------------------------------------------------------- * - * TclStringCatObjv -- + * TclStringCat -- * * Performs the [string cat] function. * * Results: - * A standard Tcl result. + * A (Tcl_Obj *) pointing to the result value, or NULL in case of an + * error. * * Side effects: - * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation - * of all objc values in objv. + * On error, when interp is not NULL, error information is left in it. * *--------------------------------------------------------------------------- */ -int -TclStringCatObjv( +Tcl_Obj * +TclStringCat( Tcl_Interp *interp, - int inPlace, int objc, Tcl_Obj * const objv[], - Tcl_Obj **objPtrPtr) + int flags) { Tcl_Obj *objResultPtr, * const *ov; int oc, length = 0, binary = 1; int allowUniChar = 1, requestUniChar = 0; int first = objc - 1; /* Index of first value possibly not empty */ int last = 0; /* Index of last value possibly not empty */ + int inPlace = flags & TCL_STRING_IN_PLACE; /* assert ( objc >= 0 ) */ if (objc <= 1) { /* Only one or no objects; return first or empty */ - *objPtrPtr = objc ? objv[0] : Tcl_NewObj(); - return TCL_OK; + return objc ? objv[0] : Tcl_NewObj(); } /* assert ( objc >= 2 ) */ @@ -3009,8 +3007,7 @@ TclStringCatObjv( if (last <= first /*|| length == 0 */) { /* Only one non-empty value or zero length; return first */ /* NOTE: (length == 0) implies (last <= first) */ - *objPtrPtr = objv[first]; - return TCL_OK; + return objv[first]; } objv += first; objc = (last - first + 1); @@ -3064,7 +3061,7 @@ TclStringCatObjv( (Tcl_WideUInt)STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr) + start; } else { @@ -3081,7 +3078,7 @@ TclStringCatObjv( (Tcl_WideUInt)STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetUnicode(objResultPtr); } @@ -3112,7 +3109,7 @@ TclStringCatObjv( length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr) + start; @@ -3128,7 +3125,7 @@ TclStringCatObjv( length)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } dst = Tcl_GetString(objResultPtr); } @@ -3143,8 +3140,7 @@ TclStringCatObjv( } } } - *objPtrPtr = objResultPtr; - return TCL_OK; + return objResultPtr; overflow: if (interp) { @@ -3152,13 +3148,13 @@ TclStringCatObjv( "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } - return TCL_ERROR; + return NULL; } /* *--------------------------------------------------------------------------- * - * TclStringFind -- + * TclStringFirst -- * * Implements the [string first] operation. * @@ -3174,20 +3170,20 @@ TclStringCatObjv( */ int -TclStringFind( +TclStringFirst( Tcl_Obj *needle, Tcl_Obj *haystack, int start) { int lh, ln = Tcl_GetCharLength(needle); + if (start < 0) { + start = 0; + } if (ln == 0) { - /* - * We don't find empty substrings. Bizarre! - * - * TODO: When we one day make this a true substring - * finder, change this to "return 0" - */ + /* We don't find empty substrings. Bizarre! + * Whenever this routine is turned into a proper substring + * finder, change to `return start` after limits imposed. */ return -1; } @@ -3195,51 +3191,46 @@ TclStringFind( unsigned char *end, *try, *bh; unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + /* Find bytes in bytes */ bh = Tcl_GetByteArrayFromObj(haystack, &lh); end = bh + lh; try = bh + start; while (try + ln <= end) { - try = memchr(try, bn[0], end - try); - + /* + * Look for the leading byte of the needle in the haystack + * starting at try and stopping when there's not enough room + * for the needle left. + */ + try = memchr(try, bn[0], (end + 1 - ln) - try); if (try == NULL) { + /* Leading byte not found -> needle cannot be found. */ return -1; } + /* Leading byte found, check rest of needle. */ if (0 == memcmp(try+1, bn+1, ln-1)) { + /* Checks! Return the successful index. */ return (try - bh); } + /* Rest of needle match failed; Iterate to continue search. */ try++; } return -1; } /* - * Check if we have two strings of single-byte characters. If we have, we - * can use strstr() to do the search. Note that we can sometimes have - * multibyte characters when the string could be minimally represented - * using single byte characters; we can't assume that a mismatch here - * means no match. + * TODO: It might be nice to support some cases where it is not + * necessary to shimmer to &tclStringType to compute the result, + * and instead operate just on the objPtr->bytes values directly. + * However, we also do not want the answer to change based on the + * code pathway, or if it does we want that to be for some values + * we explicitly decline to support. Getting there will involve + * locking down in practice more firmly just what encodings produce + * what supported results for the objPtr->bytes values. For now, + * do only the well-defined Tcl_UniChar array search. */ - lh = Tcl_GetCharLength(haystack); - if (haystack->bytes && ((size_t)lh == haystack->length) && needle->bytes - && ((size_t)ln == needle->length)) { - /* - * Both haystack and needle are all single-byte chars. - */ - - char *found = strstr(haystack->bytes + start, needle->bytes); - - if (found) { - return (found - haystack->bytes); - } else { - return -1; - } - } else { - /* - * Do the search on the unicode representation for simplicity. - */ - + { Tcl_UniChar *try, *end, *uh; Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); @@ -3287,24 +3278,24 @@ TclStringLast( * We don't find empty substrings. Bizarre! * * TODO: When we one day make this a true substring - * finder, change this to "return 0" + * finder, change this to "return last", after limitation. */ return -1; } - if (ln > last + 1) { + lh = Tcl_GetCharLength(haystack); + if (last >= lh) { + last = lh - 1; + } + + if (last < ln - 1) { return -1; } if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { - unsigned char *try, *bh; + unsigned char *try, *bh = Tcl_GetByteArrayFromObj(haystack, &lh); unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); - bh = Tcl_GetByteArrayFromObj(haystack, &lh); - - if (last + 1 > lh) { - last = lh - 1; - } try = bh + last + 1 - ln; while (try >= bh) { if ((*try == bn[0]) @@ -3316,38 +3307,10 @@ TclStringLast( return -1; } - lh = Tcl_GetCharLength(haystack); - if (last + 1 > lh) { - last = lh - 1; - } - if (haystack->bytes && ((size_t)lh == haystack->length)) { - /* haystack is all single-byte chars */ - - if (needle->bytes && ((size_t)ln == needle->length)) { - /* needle is also all single-byte chars */ - - char *try = haystack->bytes + last + 1 - ln; - while (try >= haystack->bytes) { - if ((*try == needle->bytes[0]) - && (0 == memcmp(try+1, needle->bytes + 1, ln - 1))) { - return (try - haystack->bytes); - } - try--; - } - return -1; - } else { - /* - * Cannot find substring with a multi-byte char inside - * a string with no multi-byte chars. - */ - return -1; - } - } else { - Tcl_UniChar *try, *uh; + { + Tcl_UniChar *try, *uh = Tcl_GetUnicodeFromObj(haystack, &lh); Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - try = uh + last + 1 - ln; while (try >= uh) { if ((*try == un[0]) @@ -3363,14 +3326,14 @@ TclStringLast( /* *--------------------------------------------------------------------------- * - * TclStringObjReverse -- + * TclStringReverse -- * * Implements the [string reverse] operation. * * Results: - * An unshared Tcl value which is the [string reverse] of the argument - * supplied. When sharing rules permit, the returned value might be the - * argument with modifications done in place. + * A Tcl value which is the [string reverse] of the argument supplied. + * When sharing rules permit and the caller requests, the returned value + * might be the argument with modifications done in place. * * Side effects: * May allocate a new Tcl_Obj. @@ -3401,17 +3364,19 @@ ReverseBytes( } Tcl_Obj * -TclStringObjReverse( - Tcl_Obj *objPtr) +TclStringReverse( + Tcl_Obj *objPtr, + int flags) { String *stringPtr; Tcl_UniChar ch = 0; + int inPlace = flags & TCL_STRING_IN_PLACE; if (TclIsPureByteArray(objPtr)) { int numBytes; unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); @@ -3425,7 +3390,7 @@ TclStringObjReverse( Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { Tcl_UniChar *to; /* @@ -3454,7 +3419,7 @@ TclStringObjReverse( size_t numBytes = objPtr->length; char *to, *from = objPtr->bytes; - if (Tcl_IsShared(objPtr)) { + if (!inPlace || Tcl_IsShared(objPtr)) { objPtr = Tcl_NewObj(); Tcl_SetObjLength(objPtr, numBytes); } |
