diff options
| -rw-r--r-- | generic/tclCmdMZ.c | 23 | ||||
| -rw-r--r-- | generic/tclExecute.c | 77 | ||||
| -rw-r--r-- | generic/tclInt.h | 3 | ||||
| -rw-r--r-- | generic/tclStringObj.c | 145 |
4 files changed, 157 insertions, 91 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 3c5c5e4..eb1c16c 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2339,26 +2339,17 @@ StringRplcCmd( } else { Tcl_Obj *resultPtr; - /* - * We are re-fetching in case the string argument is same value as - * an index argument, and shimmering cost us our ustring. - */ - - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - end = length-1; - if (first < 0) { first = 0; } - - resultPtr = Tcl_NewUnicodeObj(ustring, first); - if (objc == 5) { - Tcl_AppendObjToObj(resultPtr, objv[4]); - } - if (last < end) { - Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, - end - last); + if (last > end) { + last = end; } + + resultPtr = TclStringReplace(interp, objv[1], first, + last + 1 - first, (objc == 5) ? objv[4] : NULL, + TCL_STRING_IN_PLACE); + Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 59f6826..ad09713 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5467,82 +5467,9 @@ TEBCresume( NEXT_INST_F(1, 0, 0); } - length3 = Tcl_GetCharLength(value3Ptr); + objResultPtr = TclStringReplace(interp, valuePtr, fromIdx, + toIdx - fromIdx + 1, value3Ptr, TCL_STRING_IN_PLACE); - /* - * See if we can splice in place. This happens when the number of - * characters being replaced is the same as the number of characters - * in the string to be inserted. - */ - - if (length3 - 1 == toIdx - fromIdx) { - unsigned char *bytes1, *bytes2; - - if (Tcl_IsShared(valuePtr)) { - objResultPtr = Tcl_DuplicateObj(valuePtr); - } else { - objResultPtr = valuePtr; - } - if (TclIsPureByteArray(objResultPtr) - && TclIsPureByteArray(value3Ptr)) { - bytes1 = Tcl_GetByteArrayFromObj(objResultPtr, NULL); - bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); - memcpy(bytes1 + fromIdx, bytes2, length3); - } else { - ustring1 = Tcl_GetUnicodeFromObj(objResultPtr, NULL); - ustring2 = Tcl_GetUnicodeFromObj(value3Ptr, NULL); - memcpy(ustring1 + fromIdx, ustring2, - length3 * sizeof(Tcl_UniChar)); - } - Tcl_InvalidateStringRep(objResultPtr); - TclDecrRefCount(value3Ptr); - TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); - if (objResultPtr == valuePtr) { - NEXT_INST_F(1, 0, 0); - } else { - NEXT_INST_F(1, 1, 1); - } - } - - /* - * Get the unicode representation; this is where we guarantee to lose - * bytearrays. - */ - - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - length--; - - /* - * Remove substring using copying. - */ - - objResultPtr = NULL; - if (fromIdx > 0) { - objResultPtr = Tcl_NewUnicodeObj(ustring1, fromIdx); - } - if (length3 > 0) { - if (objResultPtr) { - Tcl_AppendObjToObj(objResultPtr, value3Ptr); - } else if (Tcl_IsShared(value3Ptr)) { - objResultPtr = Tcl_DuplicateObj(value3Ptr); - } else { - objResultPtr = value3Ptr; - } - } - if (toIdx < length) { - if (objResultPtr) { - Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, - length - toIdx); - } else { - objResultPtr = Tcl_NewUnicodeObj(ustring1 + toIdx + 1, - length - toIdx); - } - } - if (objResultPtr == NULL) { - /* This has to be the case [string replace $s 0 end {}] */ - /* which has result {} which is same as value3Ptr. */ - objResultPtr = value3Ptr; - } if (objResultPtr == value3Ptr) { /* See [Bug 82e7f67325] */ TclDecrRefCount(OBJ_AT_TOS); diff --git a/generic/tclInt.h b/generic/tclInt.h index 57f648c..81b1c05 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4006,6 +4006,9 @@ MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, int last); MODULE_SCOPE Tcl_Obj * TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr, int count, int flags); +MODULE_SCOPE Tcl_Obj * TclStringReplace(Tcl_Interp *interp, Tcl_Obj *objPtr, + int first, int count, Tcl_Obj *insertPtr, + int flags); MODULE_SCOPE Tcl_Obj * TclStringReverse(Tcl_Obj *objPtr, int flags); /* Flag values for the [string] ensemble functions. */ diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 26a3a28..9913160 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -38,6 +38,7 @@ #include "tommath.h" #include "tclStringRep.h" +#include "assert.h" /* * Prototypes for functions defined later in this file: */ @@ -3531,6 +3532,150 @@ TclStringReverse( /* *--------------------------------------------------------------------------- * + * TclStringReplace -- + * + * Implements the inner engine of the [string replace] command. + * + * The result is a concatenation of a prefix from objPtr, characters + * 0 through first-1, the insertPtr string value, and a suffix from + * objPtr, characters from first + count to the end. The effect is + * as if the inner substring of characters first through first+count-1 + * are removed and replaced with insertPtr. + * If insertPtr is NULL, it is treated as an empty string. + * When passed the flag TCL_STRING_IN_PLACE, this routine will try + * to do the work within objPtr, so long as no sharing forbids it. + * Without that request, or as needed, a new Tcl value will be allocated + * to be the result. + * + * Results: + * A Tcl value that is the result of the substring replacement. + * May return NULL in case of an error. When NULL is returned and + * interp is non-NULL, error information is left in interp + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclStringReplace( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* String to act upon */ + int first, /* First index to replace */ + int count, /* How many chars to replace */ + Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ + int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ +{ + int inPlace = flags & TCL_STRING_IN_PLACE; + Tcl_Obj *result; + + /* Caller is expected to pass sensible arguments */ + assert ( count >= 0 ) ; + assert ( first >= 0 ) ; + + /* Replace nothing with nothing */ + if ((insertPtr == NULL) && (count == 0)) { + if (inPlace) { + return objPtr; + } else { + return Tcl_DuplicateObj(objPtr); + } + } + + /* + * The caller very likely had to call Tcl_GetCharLength() or similar + * to be able to process index values. This means it is like that + * objPtr is either a proper "bytearray" or a "string" or else it has + * a known and short string rep. + */ + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (insertPtr == NULL) { + /* Replace something with nothing. */ + + assert ( first <= numBytes ) ; + assert ( count <= numBytes ) ; + assert ( first + count <= numBytes ) ; + + result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Replace everything */ + if ((first == 0) && (count == numBytes)) { + return insertPtr; + } + + if (TclIsPureByteArray(insertPtr)) { + int newBytes; + unsigned char *iBytes + = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); + + if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { + /* + * Removal count and replacement count are equal. + * Other conditions permit. Do in-place splice. + */ + + memcpy(bytes + first, iBytes, count); + Tcl_InvalidateStringRep(objPtr); + return objPtr; + } + + if (newBytes > INT_MAX - (numBytes - count)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; + } + result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); + /* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, iBytes, newBytes); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Flow through to try other approaches below */ + } + + /* + * TODO: Figure out how not to generate a Tcl_UniChar array rep + * when it can be determined objPtr->bytes points to a string of + * all single-byte characters so we can index it directly. + */ + + /* The traditional implementation... */ + { + int numChars; + Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + + /* TODO: Is there an in-place option worth pursuing here? */ + + result = Tcl_NewUnicodeObj(ustring, first); + if (insertPtr) { + Tcl_AppendObjToObj(result, insertPtr); + } + if (first + count < numChars) { + Tcl_AppendUnicodeToObj(result, ustring + first + count, + numChars - first - count); + } + + return result; + } +} + +/* + *--------------------------------------------------------------------------- + * * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string |
