From 0dc5e35d3b0f0bda4129dd72223c109c778a4331 Mon Sep 17 00:00:00 2001 From: dkf Date: Tue, 24 Dec 2013 18:57:46 +0000 Subject: interim commit; not yet working --- generic/tclExecute.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 73f388b..d178934 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5157,6 +5157,126 @@ TEBCresume( int length3; Tcl_Obj *value3Ptr; + case INST_STR_REPLACE: + valuePtr = OBJ_AT_DEPTH(3); + length = Tcl_GetCharLength(valuePtr) - 1; + value3Ptr = OBJ_AT_TOS; + TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), + O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(value3Ptr))); + if (TclGetIntForIndexM(interp, OBJ_AT_DEPTH(2), length, + &fromIdx) != TCL_OK + || TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, + &toIdx) != TCL_OK) { + goto gotError; + } + if (fromIdx < 0) { + fromIdx = 0; + } + + if (fromIdx > toIdx || fromIdx > length) { + TRACE_APPEND(("%.30s\n", O2S(valuePtr))); + NEXT_INST_F(1, 3, 0); + } + + if (fromIdx == 0 && toIdx == length) { + objResultPtr = value3Ptr; + TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); + NEXT_INST_F(1, 4, 1); + } + + length3 = Tcl_GetCharLength(value3Ptr); + + /* + * Remove substring. In-place. + */ + + if (length3 == 0 && !Tcl_IsShared(valuePtr) && toIdx == length) { + Tcl_SetObjLength(valuePtr, fromIdx); + TRACE_APPEND(("%.30s\n", O2S(valuePtr))); + NEXT_INST_F(1, 3, 0); + } + + // Splice in place. + + if (length3 == toIdx - fromIdx) { + unsigned char *bytes1, *bytes2; + + if (Tcl_IsShared(valuePtr)) { + objResultPtr = Tcl_DuplicateObj(valuePtr); + // splice "in place" + if (TclIsPureByteArray(objResultPtr) + && TclIsPureByteArray(value3Ptr)) { + bytes1 = Tcl_GetByteArrayFromObj(objResultPtr); + bytes2 = Tcl_GetByteArrayFromObj(value3Ptr); + } else { + } + NEXT_INST_F(1, 4, 1); + } else { + // splice "in place" + if (TclIsPureByteArray(valuePtr) + && TclIsPureByteArray(value3Ptr)) { + bytes1 = Tcl_GetByteArrayFromObj(valuePtr); + bytes2 = Tcl_GetByteArrayFromObj(value3Ptr); + } else { + } + NEXT_INST_F(1, 3, 0); + } + } + + /* + * Get the unicode representation; this is where we guarantee to lose + * bytearrays. + */ + + ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + length--; + + /* + * Remove substring using copying. + */ + + if (length3 == 0) { + if (fromIdx > 0) { + objResultPtr = Tcl_NewUnicodeObj(ustring1, fromIdx); + if (toIdx < length) { + Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, + length - toIdx); + } + } else { + objResultPtr = Tcl_NewUnicodeObj(ustring1 + toIdx + 1, + length - toIdx); + } + TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); + NEXT_INST_F(1, 4, 1); + } + + /* + * Splice string pieces by full copying. + */ + + if (fromIdx > 0) { + objResultPtr = Tcl_NewUnicodeObj(ustring1, fromIdx); + Tcl_AppendObjToObj(objResultPtr, value3Ptr); + if (toIdx < length) { + Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, + length - toIdx); + } + } else if (Tcl_IsShared(value3Ptr)) { + objResultPtr = Tcl_DuplicateObj(value3Ptr); + if (toIdx < length) { + Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, + length - toIdx); + } + } else { + objResultPtr = value3Ptr; + if (toIdx < length) { + Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, + length - toIdx); + } + } + TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); + NEXT_INST_F(1, 4, 1); + case INST_STR_MAP: valuePtr = OBJ_AT_TOS; /* "Main" string. */ value3Ptr = OBJ_UNDER_TOS; /* "Target" string. */ -- cgit v0.12 From 7ccc50d8b67a7e642928d04bfb66ec3ee4052fbb Mon Sep 17 00:00:00 2001 From: dkf Date: Sat, 28 Dec 2013 17:21:39 +0000 Subject: completed instruction implementation --- generic/tclCompile.c | 4 +++ generic/tclCompile.h | 3 ++- generic/tclExecute.c | 72 +++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index db97c45..5474535 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -606,6 +606,10 @@ InstructionDesc const tclInstructionTable[] = { /* [string totitle] core: converts whole string to upper case using * the default (extended "C" locale) rules. * Stack: ... string => ... newString */ + {"strReplace", 1, -3, 0, {OPERAND_NONE}}, + /* [string replace] core: replaces a non-empty range of one string + * with the contents of another. + * Stack: ... string fromIdx toIdx replacement => ... newString */ {NULL, 0, 0, 0, {OPERAND_NONE}} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 6226f7f..207b710 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -784,9 +784,10 @@ typedef struct ByteCode { #define INST_STR_UPPER 174 #define INST_STR_LOWER 175 #define INST_STR_TITLE 176 +#define INST_STR_REPLACE 177 /* The last opcode */ -#define LAST_INST_OPCODE 176 +#define LAST_INST_OPCODE 177 /* * Table describing the Tcl bytecode instructions: their name (for displaying diff --git a/generic/tclExecute.c b/generic/tclExecute.c index d178934..3ba252f 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5158,30 +5158,41 @@ TEBCresume( Tcl_Obj *value3Ptr; case INST_STR_REPLACE: - valuePtr = OBJ_AT_DEPTH(3); + value3Ptr = POP_OBJECT(); + valuePtr = OBJ_AT_DEPTH(2); length = Tcl_GetCharLength(valuePtr) - 1; - value3Ptr = OBJ_AT_TOS; TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), - O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(value3Ptr))); - if (TclGetIntForIndexM(interp, OBJ_AT_DEPTH(2), length, + O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr))); + if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, &fromIdx) != TCL_OK - || TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, + || TclGetIntForIndexM(interp, OBJ_AT_TOS, length, &toIdx) != TCL_OK) { + TclDecrRefCount(value3Ptr); goto gotError; } + TclDecrRefCount(OBJ_AT_TOS); + (void) POP_OBJECT(); + TclDecrRefCount(OBJ_AT_TOS); + (void) POP_OBJECT(); if (fromIdx < 0) { fromIdx = 0; } if (fromIdx > toIdx || fromIdx > length) { TRACE_APPEND(("%.30s\n", O2S(valuePtr))); - NEXT_INST_F(1, 3, 0); + TclDecrRefCount(value3Ptr); + NEXT_INST_F(1, 0, 0); + } + + if (toIdx > length) { + toIdx = length; } if (fromIdx == 0 && toIdx == length) { - objResultPtr = value3Ptr; - TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); - NEXT_INST_F(1, 4, 1); + TclDecrRefCount(OBJ_AT_TOS); + OBJ_AT_TOS = value3Ptr; + TRACE_APPEND(("%.30s\n", O2S(value3Ptr))); + NEXT_INST_F(1, 0, 0); } length3 = Tcl_GetCharLength(value3Ptr); @@ -5191,35 +5202,52 @@ TEBCresume( */ if (length3 == 0 && !Tcl_IsShared(valuePtr) && toIdx == length) { + TclDecrRefCount(value3Ptr); Tcl_SetObjLength(valuePtr, fromIdx); TRACE_APPEND(("%.30s\n", O2S(valuePtr))); - NEXT_INST_F(1, 3, 0); + NEXT_INST_F(1, 0, 0); } - // Splice in place. + /* + * See if we can splice in place. This happens when the number of + * characters being replaced is the same as the number of characters + * in the string to be inserted. + */ if (length3 == toIdx - fromIdx) { unsigned char *bytes1, *bytes2; if (Tcl_IsShared(valuePtr)) { objResultPtr = Tcl_DuplicateObj(valuePtr); - // splice "in place" if (TclIsPureByteArray(objResultPtr) && TclIsPureByteArray(value3Ptr)) { - bytes1 = Tcl_GetByteArrayFromObj(objResultPtr); - bytes2 = Tcl_GetByteArrayFromObj(value3Ptr); + bytes1 = Tcl_GetByteArrayFromObj(objResultPtr, NULL); + bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); + memcpy(bytes1 + fromIdx, bytes2, length3); } else { + ustring1 = Tcl_GetUnicode(objResultPtr); + ustring2 = Tcl_GetUnicode(value3Ptr); + memcpy(ustring1 + fromIdx, ustring2, + length3 * sizeof(Tcl_UniChar)); } - NEXT_INST_F(1, 4, 1); + Tcl_InvalidateStringRep(objResultPtr); + TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); + NEXT_INST_F(1, 1, 1); } else { - // splice "in place" if (TclIsPureByteArray(valuePtr) && TclIsPureByteArray(value3Ptr)) { - bytes1 = Tcl_GetByteArrayFromObj(valuePtr); - bytes2 = Tcl_GetByteArrayFromObj(value3Ptr); + bytes1 = Tcl_GetByteArrayFromObj(valuePtr, NULL); + bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); + memcpy(bytes1 + fromIdx, bytes2, length3); } else { + ustring1 = Tcl_GetUnicode(valuePtr); + ustring2 = Tcl_GetUnicode(value3Ptr); + memcpy(ustring1 + fromIdx, ustring2, + length3 * sizeof(Tcl_UniChar)); } - NEXT_INST_F(1, 3, 0); + Tcl_InvalidateStringRep(valuePtr); + TRACE_APPEND(("%.30s\n", O2S(valuePtr))); + NEXT_INST_F(1, 0, 0); } } @@ -5246,8 +5274,9 @@ TEBCresume( objResultPtr = Tcl_NewUnicodeObj(ustring1 + toIdx + 1, length - toIdx); } + TclDecrRefCount(value3Ptr); TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); - NEXT_INST_F(1, 4, 1); + NEXT_INST_F(1, 1, 1); } /* @@ -5274,8 +5303,9 @@ TEBCresume( length - toIdx); } } + TclDecrRefCount(value3Ptr); TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); - NEXT_INST_F(1, 4, 1); + NEXT_INST_F(1, 1, 1); case INST_STR_MAP: valuePtr = OBJ_AT_TOS; /* "Main" string. */ -- cgit v0.12 From 348916814c5f0e9bac693424abe20aefe1150869 Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 29 Dec 2013 16:59:15 +0000 Subject: use the new instruction --- generic/tclCompCmdsSZ.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 110476e..649a76a 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -765,12 +765,25 @@ TclCompileStringReplaceCmd( } else { /* - * Too complicated to optimize, but we know the number of arguments is - * correct so we can issue a call of the "standard" implementation. + * Need to process indices at runtime. This could be because the + * indices are not constants, or because we need to resolve them to + * absolute indices to work out if a replacement is going to happen. + * In any case, to runtime it is. */ genericReplace: - return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr); + CompileWord(envPtr, valueTokenPtr, interp, 1); + tokenPtr = TokenAfter(valueTokenPtr); + CompileWord(envPtr, tokenPtr, interp, 2); + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 3); + if (replacementTokenPtr != NULL) { + CompileWord(envPtr, replacementTokenPtr, interp, 4); + } else { + PUSH( ""); + } + OP( STR_REPLACE); + return TCL_OK; } } -- cgit v0.12 From 295782408448c9a034b86367186e93bc84f0ce7b Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 29 Dec 2013 18:48:09 +0000 Subject: precondition was wrong, and needed to flush part of the string/internal rep --- generic/tclExecute.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 3ba252f..bbc3731 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5214,7 +5214,7 @@ TEBCresume( * in the string to be inserted. */ - if (length3 == toIdx - fromIdx) { + if (length3 - 1 == toIdx - fromIdx) { unsigned char *bytes1, *bytes2; if (Tcl_IsShared(valuePtr)) { @@ -5225,10 +5225,21 @@ TEBCresume( bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); memcpy(bytes1 + fromIdx, bytes2, length3); } else { - ustring1 = Tcl_GetUnicode(objResultPtr); - ustring2 = Tcl_GetUnicode(value3Ptr); + ustring1 = Tcl_GetUnicodeFromObj(objResultPtr, NULL); + ustring2 = Tcl_GetUnicodeFromObj(value3Ptr, NULL); memcpy(ustring1 + fromIdx, ustring2, length3 * sizeof(Tcl_UniChar)); + + /* + * Magic! Flush the info in the string internal rep that + * refers to the about-to-be-invalidated UTF-8 rep. This + * sets the 'allocated' field of the String structure to 0 + * to indicate that a new buffer needs to be allocated. + * This is safe; we know we've got a tclStringTypePtr set + * at this point (post Tcl_GetUnicodeFromObj). + */ + + ((int *) objResultPtr->internalRep.otherValuePtr)[1] = 0; } Tcl_InvalidateStringRep(objResultPtr); TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); @@ -5240,10 +5251,21 @@ TEBCresume( bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); memcpy(bytes1 + fromIdx, bytes2, length3); } else { - ustring1 = Tcl_GetUnicode(valuePtr); - ustring2 = Tcl_GetUnicode(value3Ptr); + ustring1 = Tcl_GetUnicodeFromObj(valuePtr, NULL); + ustring2 = Tcl_GetUnicodeFromObj(value3Ptr, NULL); memcpy(ustring1 + fromIdx, ustring2, length3 * sizeof(Tcl_UniChar)); + + /* + * Magic! Flush the info in the string internal rep that + * refers to the about-to-be-invalidated UTF-8 rep. This + * sets the 'allocated' field of the String structure to 0 + * to indicate that a new buffer needs to be allocated. + * This is safe; we know we've got a tclStringTypePtr set + * at this point (post Tcl_GetUnicodeFromObj). + */ + + ((int *) objResultPtr->internalRep.otherValuePtr)[1] = 0; } Tcl_InvalidateStringRep(valuePtr); TRACE_APPEND(("%.30s\n", O2S(valuePtr))); -- cgit v0.12