diff options
author | dgp <dgp@users.sourceforge.net> | 2018-03-13 17:14:40 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2018-03-13 17:14:40 (GMT) |
commit | 62079a5c6951d9555cce57bdeb5a227a5be8b906 (patch) | |
tree | 14a91693203f05d725c045e5073d28eab0f3cf5d /generic/tclCompCmdsSZ.c | |
parent | 41ed3e0e2bc9fb87e4a63f737ddcc00041af42da (diff) | |
parent | a275b8a5ea188f0633caab60b3ab709de4239f1f (diff) | |
download | tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.zip tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.gz tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.bz2 |
merge 8.6
Diffstat (limited to 'generic/tclCompCmdsSZ.c')
-rw-r--r-- | generic/tclCompCmdsSZ.c | 240 |
1 files changed, 145 insertions, 95 deletions
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 79c5c78..cf088bb 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -995,147 +995,197 @@ TclCompileStringReplaceCmd( * compiled. */ CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL; + Tcl_Token *tokenPtr, *valueTokenPtr; DefineLineInformation; /* TIP #280 */ - int idx1, idx2; + int first, last; if (parsePtr->numWords < 4 || parsePtr->numWords > 5) { return TCL_ERROR; } + + /* Bytecode to compute/push string argument being replaced */ valueTokenPtr = TokenAfter(parsePtr->tokenPtr); - if (parsePtr->numWords == 5) { - tokenPtr = TokenAfter(valueTokenPtr); - tokenPtr = TokenAfter(tokenPtr); - replacementTokenPtr = TokenAfter(tokenPtr); - } + CompileWord(envPtr, valueTokenPtr, interp, 1); + /* + * Check for first index known and useful at compile time. + */ tokenPtr = TokenAfter(valueTokenPtr); - if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, - &idx1) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER, + &first) != TCL_OK) { goto genericReplace; } + /* - * Token parsed as an index value. Indices before the string are - * treated as index of start of string. + * Check for last index known and useful at compile time. */ - tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, - &idx2) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER, + &last) != TCL_OK) { goto genericReplace; } - /* - * Token parsed as an index value. Indices after the string are - * treated as index of end of string. - */ -/* TODO...... */ - /* - * We handle these replacements specially: first character (where - * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything - * else and the semantics get rather screwy. + /* + * [string replace] is an odd bird. For many arguments it is + * a conventional substring replacer. However it also goes out + * of its way to become a no-op for many cases where it would be + * replacing an empty substring. Precisely, it is a no-op when * - * TODO: These seem to be very narrow cases. They are not even - * covered by the test suite, and any programming that ends up - * here could have been coded by the programmer using [string range] - * and [string cat]. [*] Not clear at all to me that the bytecode - * generated here is worthwhile. + * (last < first) OR + * (last < 0) OR + * (end < first) * - * [*] Except for the empty string exceptions. UGGGGHHHH. + * For some compile-time values we can detect these cases, and + * compile direct to bytecode implementing the no-op. */ - if (idx1 == 0 && idx2 == 0) { - int notEq, end; + if ((last == TCL_INDEX_BEFORE) /* Know (last < 0) */ + || (first == TCL_INDEX_AFTER) /* Know (first > end) */ /* - * Just working with the first character. + * Tricky to determine when runtime (last < first) can be + * certainly known based on the encoded values. Consider the + * cases... + * + * (first <= TCL_INDEX_END) && + * (last == TCL_INDEX_AFTER) => cannot tell REJECT + * (last <= TCL_INDEX END) && (last < first) => ACCEPT + * else => cannot tell REJECT */ - - CompileWord(envPtr, valueTokenPtr, interp, 1); - if (replacementTokenPtr == NULL) { - /* Drop first */ - OP44( STR_RANGE_IMM, 1, TCL_INDEX_END); - return TCL_OK; - } - /* Replace first */ - CompileWord(envPtr, replacementTokenPtr, interp, 4); - + || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END) + && (last < first)) /* Know (last < first) */ /* - * NOTE: The following tower of bullshit is present because - * [string replace] was boneheadedly defined not to replace - * empty strings, so we actually have to detect the empty - * string case and treat it differently. + * (first == TCL_INDEX_BEFORE) && + * (last == TCL_INDEX_AFTER) => (first < last) REJECT + * (last <= TCL_INDEX_END) => cannot tell REJECT + * else => (first < last) REJECT + * + * else [[first >= TCL_INDEX_START]] && + * (last == TCL_INDEX_AFTER) => cannot tell REJECT + * (last <= TCL_INDEX_END) => cannot tell REJECT + * else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT */ - - OP4( OVER, 1); - PUSH( ""); - OP( STR_EQ); - JUMP1( JUMP_FALSE, notEq); - OP( POP); - JUMP1( JUMP, end); - FIXJUMP1(notEq); - TclAdjustStackDepth(1, envPtr); - OP4( REVERSE, 2); - OP44( STR_RANGE_IMM, 1, TCL_INDEX_END); - OP1( STR_CONCAT1, 2); - FIXJUMP1(end); + || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START) + && (last < first))) { /* Know (last < first) */ + if (parsePtr->numWords == 5) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); + OP( POP); /* Pop newString */ + } + /* Original string argument now on TOS as result */ return TCL_OK; + } - } else if (idx1 == TCL_INDEX_END && idx2 == TCL_INDEX_END) { - int notEq, end; - - /* - * Just working with the last character. - */ + if (parsePtr->numWords == 5) { + /* + * When we have a string replacement, we have to take care about + * not replacing empty substrings that [string replace] promises + * not to replace + * + * The remaining index values might be suitable for conventional + * string replacement, but only if they cannot possibly meet the + * conditions described above at runtime. If there's a chance they + * might, we would have to emit bytecode to check and at that point + * we're paying more in bytecode execution time than would make + * things worthwhile. Trouble is we are very limited in + * how much we can detect that at compile time. After decoding, + * we need, first: + * + * (first <= end) + * + * The encoded indices (first <= TCL_INDEX END) and + * (first == TCL_INDEX_BEFORE) always meets this condition, but + * any other encoded first index has some list for which it fails. + * + * We also need, second: + * + * (last >= 0) + * + * The encoded indices (last >= TCL_INDEX_START) and + * (last == TCL_INDEX_AFTER) always meet this condition but any + * other encoded last index has some list for which it fails. + * + * Finally we need, third: + * + * (first <= last) + * + * Considered in combination with the constraints we already have, + * we see that we can proceed when (first == TCL_INDEX_BEFORE) + * or (last == TCL_INDEX_AFTER). These also permit simplification + * of the prefix|replace|suffix construction. The other constraints, + * though, interfere with getting a guarantee that first <= last. + */ - CompileWord(envPtr, valueTokenPtr, interp, 1); - if (replacementTokenPtr == NULL) { - /* Drop last */ - OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1); - return TCL_OK; + if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) { + /* empty prefix */ + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); + OP4( REVERSE, 2); + if (last == TCL_INDEX_AFTER) { + OP( POP); /* Pop original */ + } else { + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + OP1( STR_CONCAT1, 2); } - /* Replace last */ - CompileWord(envPtr, replacementTokenPtr, interp, 4); - - /* More bullshit; see NOTE above. */ + return TCL_OK; + } - OP4( OVER, 1); - PUSH( ""); - OP( STR_EQ); - JUMP1( JUMP_FALSE, notEq); - OP( POP); - JUMP1( JUMP, end); - FIXJUMP1(notEq); - TclAdjustStackDepth(1, envPtr); - OP4( REVERSE, 2); - OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1); - OP4( REVERSE, 2); + if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) { + OP44( STR_RANGE_IMM, 0, first-1); + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); OP1( STR_CONCAT1, 2); - FIXJUMP1(end); return TCL_OK; + } + + /* FLOW THROUGH TO genericReplace */ } else { - /* - * Need to process indices at runtime. This could be because the - * indices are not constants, or because we need to resolve them to - * absolute indices to work out if a replacement is going to happen. - * In any case, to runtime it is. + /* + * When we have no replacement string to worry about, we may + * have more luck, because the forbidden empty string replacements + * are harmless when they are replaced by another empty string. */ + if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) { + /* empty prefix - build suffix only */ + + if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) { + /* empty suffix too => empty result */ + OP( POP); /* Pop original */ + PUSH ( ""); + return TCL_OK; + } + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + return TCL_OK; + } else { + if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) { + /* empty suffix - build prefix only */ + OP44( STR_RANGE_IMM, 0, first-1); + return TCL_OK; + } + OP( DUP); + OP44( STR_RANGE_IMM, 0, first-1); + OP4( REVERSE, 2); + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + OP1( STR_CONCAT1, 2); + return TCL_OK; + } + } + genericReplace: - CompileWord(envPtr, valueTokenPtr, interp, 1); tokenPtr = TokenAfter(valueTokenPtr); CompileWord(envPtr, tokenPtr, interp, 2); tokenPtr = TokenAfter(tokenPtr); CompileWord(envPtr, tokenPtr, interp, 3); - if (replacementTokenPtr != NULL) { - CompileWord(envPtr, replacementTokenPtr, interp, 4); + if (parsePtr->numWords == 5) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); } else { PUSH( ""); } OP( STR_REPLACE); return TCL_OK; - } } int |