From 0004d6152456219c0ef09046c169ad3ad7532ee1 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 30 Nov 2016 03:08:24 +0000 Subject: Route all [string repeat] operations through a common implementation. Code that to preserve bytearrays, eliminate unnecessary string rep generation, increase efficiency, and encapsulate access to internal rep details. --- generic/tclCmdMZ.c | 65 ++--------------------- generic/tclInt.h | 2 + generic/tclStringObj.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 61 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 023c671..2572b2b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2118,9 +2118,7 @@ StringReptCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - const char *string1; - char *string2; - int count, index, length1, length2; + int count; Tcl_Obj *resultPtr; if (objc != 3) { @@ -2138,70 +2136,15 @@ StringReptCmd( if (count == 1) { Tcl_SetObjResult(interp, objv[1]); - goto done; + return TCL_OK; } else if (count < 1) { - goto done; - } - string1 = TclGetStringFromObj(objv[1], &length1); - if (length1 <= 0) { - goto done; - } - - /* - * Only build up a string that has data. Instead of building it up with - * repeated appends, we just allocate the necessary space once and copy - * the string value in. - * - * We have to worry about overflow [Bugs 714106, 2561746]. - * At this point we know 1 <= length1 <= INT_MAX and 2 <= count <= INT_MAX. - * We need to keep 2 <= length2 <= INT_MAX. - */ - - if (count > INT_MAX/length1) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "result exceeds max size for a Tcl value (%d bytes)", - INT_MAX)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); - return TCL_ERROR; + return TCL_OK; } - length2 = length1 * count; - - /* - * Include space for the NUL. - */ - - string2 = attemptckalloc((unsigned) length2 + 1); - if (string2 == NULL) { - /* - * Alloc failed. Note that in this case we try to do an error message - * since this is a case that's most likely when the alloc is large and - * that's easy to do with this API. Note that if we fail allocating a - * short string, this will likely keel over too (and fatally). - */ - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "string size overflow, out of memory allocating %u bytes", - length2 + 1)); - Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + if (TCL_OK != TclStringRepeat(interp, objv[1], count, &resultPtr)) { return TCL_ERROR; } - for (index = 0; index < count; index++) { - memcpy(string2 + (length1 * index), string1, (size_t) length1); - } - string2[length2] = '\0'; - - /* - * We have to directly assign this instead of using Tcl_SetStringObj (and - * indirectly TclInitStringRep) because that makes another copy of the - * data. - */ - - TclNewObj(resultPtr); - resultPtr->bytes = string2; - resultPtr->length = length2; Tcl_SetObjResult(interp, resultPtr); - - done: return TCL_OK; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 4257ea1..8f85f19 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3147,6 +3147,8 @@ MODULE_SCOPE int TclStringMatch(const char *str, int strLen, MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, Tcl_Obj *patternObj, int flags); MODULE_SCOPE Tcl_Obj * TclStringObjReverse(Tcl_Obj *objPtr); +MODULE_SCOPE int TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr, + int count, Tcl_Obj **objPtrPtr); MODULE_SCOPE void TclSubstCompile(Tcl_Interp *interp, const char *bytes, int numBytes, int flags, int line, struct CompileEnv *envPtr); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 34931e0..6184461 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2613,6 +2613,146 @@ TclGetStringStorage( /* *--------------------------------------------------------------------------- * + * TclStringRepeat -- + * + * Performs the [string repeat] function. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * Writes to *objPtrPtr the address of Tcl_Obj that is concatenation + * of count copies of the value in objPtr. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringRepeat( + Tcl_Interp *interp, + Tcl_Obj *objPtr, + int count, + Tcl_Obj **objPtrPtr) +{ + Tcl_Obj *objResultPtr; + int length = 0, unichar = 0, done = 1; + int binary = TclIsPureByteArray(objPtr); + + /* assert (count >= 2) */ + + /* + * Analyze to determine what representation result should be. + * GOALS: Avoid shimmering & string rep generation. + * Produce pure bytearray when possible. + * Error on overflow. + */ + + if (!binary) { + if (objPtr->typePtr == &tclStringType) { + String *stringPtr = GET_STRING(objPtr); + if (stringPtr->hasUnicode) { + unichar = 1; + } + } + } + + if (binary) { + /* Result will be pure byte array. Pre-size it */ + Tcl_GetByteArrayFromObj(objPtr, &length); + } else if (unichar) { + /* Result will be pure Tcl_UniChar array. Pre-size it. */ + Tcl_GetUnicodeFromObj(objPtr, &length); + } else { + /* Result will be concat of string reps. Pre-size it. */ + Tcl_GetStringFromObj(objPtr, &length); + } + + if (length == 0) { + /* Any repeats of empty is empty. */ + *objPtrPtr = objPtr; + return TCL_OK; + } + + if (count > INT_MAX/length) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + + if (binary) { + /* Efficiently produce a pure byte array result */ + objResultPtr = Tcl_IsShared(objPtr) ? Tcl_DuplicateObj(objPtr) + : objPtr; + + Tcl_SetByteArrayLength(objResultPtr, count*length); /* PANIC? */ + Tcl_SetByteArrayLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + TclAppendBytesToByteArray(objResultPtr, + Tcl_GetByteArrayFromObj(objResultPtr, NULL), + (count - done) * length); + } else if (unichar) { + /* Efficiently produce a pure Tcl_UniChar array result */ + if (Tcl_IsShared(objPtr)) { + objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); + } else { + TclInvalidateStringRep(objPtr); + objResultPtr = objPtr; + } + + if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow: unable to alloc %lu bytes", + STRING_SIZE(count*length))); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + Tcl_SetObjLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), + (count - done) * length); + } else { + /* Efficiently concatenate string reps */ + if (Tcl_IsShared(objPtr)) { + objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); + } else { + TclFreeIntRep(objPtr); + objResultPtr = objPtr; + } + if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow: unable to alloc %u bytes", + count*length)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return TCL_ERROR; + } + Tcl_SetObjLength(objResultPtr, length); + while (count - done > done) { + Tcl_AppendObjToObj(objResultPtr, objResultPtr); + done *= 2; + } + Tcl_AppendToObj(objResultPtr, Tcl_GetString(objResultPtr), + (count - done) * length); + } + *objPtrPtr = objResultPtr; + return TCL_OK; +} + +/* + *--------------------------------------------------------------------------- + * * TclStringCatObjv -- * * Performs the [string cat] function. -- cgit v0.12