From 2d87ed6d2935ef33ceddacd1e31a2695a85f5c44 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 14 Mar 2018 15:17:27 +0000 Subject: New routine TclStringReplace() serves as a common implementation for both compiled and direct eval paths of [string replace]. One routine to debug, optimize, convert to new data structures, etc. This routine will also make a good core engine for [string insert]. With this refactoring done, the value of the INST_STR_REPLACE instruction is no longer clear. Until now, the value of compiling to it was that it routed you through the "better" of the two implementations. An instruction that does nothing different from what invocation to the direct eval fallback would do does not have obvious value. --- generic/tclCmdMZ.c | 23 +++----- generic/tclExecute.c | 77 +------------------------- generic/tclInt.h | 3 + generic/tclStringObj.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+), 91 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 3c5c5e4..eb1c16c 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2339,26 +2339,17 @@ StringRplcCmd( } else { Tcl_Obj *resultPtr; - /* - * We are re-fetching in case the string argument is same value as - * an index argument, and shimmering cost us our ustring. - */ - - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - end = length-1; - if (first < 0) { first = 0; } - - resultPtr = Tcl_NewUnicodeObj(ustring, first); - if (objc == 5) { - Tcl_AppendObjToObj(resultPtr, objv[4]); - } - if (last < end) { - Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, - end - last); + if (last > end) { + last = end; } + + resultPtr = TclStringReplace(interp, objv[1], first, + last + 1 - first, (objc == 5) ? objv[4] : NULL, + TCL_STRING_IN_PLACE); + Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 59f6826..ad09713 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5467,82 +5467,9 @@ TEBCresume( NEXT_INST_F(1, 0, 0); } - length3 = Tcl_GetCharLength(value3Ptr); + objResultPtr = TclStringReplace(interp, valuePtr, fromIdx, + toIdx - fromIdx + 1, value3Ptr, TCL_STRING_IN_PLACE); - /* - * See if we can splice in place. This happens when the number of - * characters being replaced is the same as the number of characters - * in the string to be inserted. - */ - - if (length3 - 1 == toIdx - fromIdx) { - unsigned char *bytes1, *bytes2; - - if (Tcl_IsShared(valuePtr)) { - objResultPtr = Tcl_DuplicateObj(valuePtr); - } else { - objResultPtr = valuePtr; - } - if (TclIsPureByteArray(objResultPtr) - && TclIsPureByteArray(value3Ptr)) { - bytes1 = Tcl_GetByteArrayFromObj(objResultPtr, NULL); - bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); - memcpy(bytes1 + fromIdx, bytes2, length3); - } else { - ustring1 = Tcl_GetUnicodeFromObj(objResultPtr, NULL); - ustring2 = Tcl_GetUnicodeFromObj(value3Ptr, NULL); - memcpy(ustring1 + fromIdx, ustring2, - length3 * sizeof(Tcl_UniChar)); - } - Tcl_InvalidateStringRep(objResultPtr); - TclDecrRefCount(value3Ptr); - TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); - if (objResultPtr == valuePtr) { - NEXT_INST_F(1, 0, 0); - } else { - NEXT_INST_F(1, 1, 1); - } - } - - /* - * Get the unicode representation; this is where we guarantee to lose - * bytearrays. - */ - - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - length--; - - /* - * Remove substring using copying. - */ - - objResultPtr = NULL; - if (fromIdx > 0) { - objResultPtr = Tcl_NewUnicodeObj(ustring1, fromIdx); - } - if (length3 > 0) { - if (objResultPtr) { - Tcl_AppendObjToObj(objResultPtr, value3Ptr); - } else if (Tcl_IsShared(value3Ptr)) { - objResultPtr = Tcl_DuplicateObj(value3Ptr); - } else { - objResultPtr = value3Ptr; - } - } - if (toIdx < length) { - if (objResultPtr) { - Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, - length - toIdx); - } else { - objResultPtr = Tcl_NewUnicodeObj(ustring1 + toIdx + 1, - length - toIdx); - } - } - if (objResultPtr == NULL) { - /* This has to be the case [string replace $s 0 end {}] */ - /* which has result {} which is same as value3Ptr. */ - objResultPtr = value3Ptr; - } if (objResultPtr == value3Ptr) { /* See [Bug 82e7f67325] */ TclDecrRefCount(OBJ_AT_TOS); diff --git a/generic/tclInt.h b/generic/tclInt.h index 57f648c..81b1c05 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4006,6 +4006,9 @@ MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, int last); MODULE_SCOPE Tcl_Obj * TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr, int count, int flags); +MODULE_SCOPE Tcl_Obj * TclStringReplace(Tcl_Interp *interp, Tcl_Obj *objPtr, + int first, int count, Tcl_Obj *insertPtr, + int flags); MODULE_SCOPE Tcl_Obj * TclStringReverse(Tcl_Obj *objPtr, int flags); /* Flag values for the [string] ensemble functions. */ diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 26a3a28..9913160 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -38,6 +38,7 @@ #include "tommath.h" #include "tclStringRep.h" +#include "assert.h" /* * Prototypes for functions defined later in this file: */ @@ -3531,6 +3532,150 @@ TclStringReverse( /* *--------------------------------------------------------------------------- * + * TclStringReplace -- + * + * Implements the inner engine of the [string replace] command. + * + * The result is a concatenation of a prefix from objPtr, characters + * 0 through first-1, the insertPtr string value, and a suffix from + * objPtr, characters from first + count to the end. The effect is + * as if the inner substring of characters first through first+count-1 + * are removed and replaced with insertPtr. + * If insertPtr is NULL, it is treated as an empty string. + * When passed the flag TCL_STRING_IN_PLACE, this routine will try + * to do the work within objPtr, so long as no sharing forbids it. + * Without that request, or as needed, a new Tcl value will be allocated + * to be the result. + * + * Results: + * A Tcl value that is the result of the substring replacement. + * May return NULL in case of an error. When NULL is returned and + * interp is non-NULL, error information is left in interp + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclStringReplace( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* String to act upon */ + int first, /* First index to replace */ + int count, /* How many chars to replace */ + Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ + int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ +{ + int inPlace = flags & TCL_STRING_IN_PLACE; + Tcl_Obj *result; + + /* Caller is expected to pass sensible arguments */ + assert ( count >= 0 ) ; + assert ( first >= 0 ) ; + + /* Replace nothing with nothing */ + if ((insertPtr == NULL) && (count == 0)) { + if (inPlace) { + return objPtr; + } else { + return Tcl_DuplicateObj(objPtr); + } + } + + /* + * The caller very likely had to call Tcl_GetCharLength() or similar + * to be able to process index values. This means it is like that + * objPtr is either a proper "bytearray" or a "string" or else it has + * a known and short string rep. + */ + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (insertPtr == NULL) { + /* Replace something with nothing. */ + + assert ( first <= numBytes ) ; + assert ( count <= numBytes ) ; + assert ( first + count <= numBytes ) ; + + result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Replace everything */ + if ((first == 0) && (count == numBytes)) { + return insertPtr; + } + + if (TclIsPureByteArray(insertPtr)) { + int newBytes; + unsigned char *iBytes + = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); + + if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { + /* + * Removal count and replacement count are equal. + * Other conditions permit. Do in-place splice. + */ + + memcpy(bytes + first, iBytes, count); + Tcl_InvalidateStringRep(objPtr); + return objPtr; + } + + if (newBytes > INT_MAX - (numBytes - count)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; + } + result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); + /* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, iBytes, newBytes); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Flow through to try other approaches below */ + } + + /* + * TODO: Figure out how not to generate a Tcl_UniChar array rep + * when it can be determined objPtr->bytes points to a string of + * all single-byte characters so we can index it directly. + */ + + /* The traditional implementation... */ + { + int numChars; + Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + + /* TODO: Is there an in-place option worth pursuing here? */ + + result = Tcl_NewUnicodeObj(ustring, first); + if (insertPtr) { + Tcl_AppendObjToObj(result, insertPtr); + } + if (first + count < numChars) { + Tcl_AppendUnicodeToObj(result, ustring + first + count, + numChars - first - count); + } + + return result; + } +} + +/* + *--------------------------------------------------------------------------- + * * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string -- cgit v0.12