merge 8.6

author: dgp <dgp@users.sourceforge.net> 2018-03-13 17:14:40 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2018-03-13 17:14:40 (GMT)
commit: 62079a5c6951d9555cce57bdeb5a227a5be8b906 (patch)
tree: 14a91693203f05d725c045e5073d28eab0f3cf5d /generic/tclCompCmdsSZ.c
parent: 41ed3e0e2bc9fb87e4a63f737ddcc00041af42da (diff)
parent: a275b8a5ea188f0633caab60b3ab709de4239f1f (diff)
download: tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.zip
tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.gz
tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.bz2
1 files changed, 145 insertions, 95 deletions
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 79c5c78..cf088bb 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -995,147 +995,197 @@ TclCompileStringReplaceCmd(
 				 * compiled. */
     CompileEnv *envPtr)		/* Holds the resulting instructions. */
 {
-    Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL;
+    Tcl_Token *tokenPtr, *valueTokenPtr;
     DefineLineInformation;	/* TIP #280 */
-    int idx1, idx2;
+    int first, last;
 
     if (parsePtr->numWords < 4 || parsePtr->numWords > 5) {
 	return TCL_ERROR;
     }
+ 
+    /* Bytecode to compute/push string argument being replaced */
     valueTokenPtr = TokenAfter(parsePtr->tokenPtr);
-    if (parsePtr->numWords == 5) {
-	tokenPtr = TokenAfter(valueTokenPtr);
-	tokenPtr = TokenAfter(tokenPtr);
-	replacementTokenPtr = TokenAfter(tokenPtr);
-    }
+    CompileWord(envPtr, valueTokenPtr, interp, 1);
 
+    /*
+     * Check for first index known and useful at compile time. 
+     */
     tokenPtr = TokenAfter(valueTokenPtr);
-    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER,
-	    &idx1) != TCL_OK) {
+    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+	    &first) != TCL_OK) {
 	goto genericReplace;
     }
+
     /*
-     * Token parsed as an index value. Indices before the string are
-     * treated as index of start of string.
+     * Check for last index known and useful at compile time. 
      */
-
     tokenPtr = TokenAfter(tokenPtr);
-    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END,
-	    &idx2) != TCL_OK) {
+    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+	    &last) != TCL_OK) {
 	goto genericReplace;
     }
-    /*
-     * Token parsed as an index value. Indices after the string are
-     * treated as index of end of string.
-     */
 
-/* TODO...... */
-    /*
-     * We handle these replacements specially: first character (where
-     * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything
-     * else and the semantics get rather screwy.
+    /* 
+     * [string replace] is an odd bird.  For many arguments it is
+     * a conventional substring replacer.  However it also goes out
+     * of its way to become a no-op for many cases where it would be
+     * replacing an empty substring.  Precisely, it is a no-op when
      *
-     * TODO: These seem to be very narrow cases.  They are not even
-     * covered by the test suite, and any programming that ends up
-     * here could have been coded by the programmer using [string range]
-     * and [string cat]. [*]  Not clear at all to me that the bytecode
-     * generated here is worthwhile.
+     *		(last < first)		OR
+     *		(last < 0)		OR
+     *		(end < first)
      *
-     *  [*] Except for the empty string exceptions.  UGGGGHHHH.
+     * For some compile-time values we can detect these cases, and
+     * compile direct to bytecode implementing the no-op.
      */
 
-    if (idx1 == 0 && idx2 == 0) {
-	int notEq, end;
+    if ((last == TCL_INDEX_BEFORE)		/* Know (last < 0) */
+	    || (first == TCL_INDEX_AFTER)	/* Know (first > end) */
 
 	/*
-	 * Just working with the first character.
+	 * Tricky to determine when runtime (last < first) can be
+	 * certainly known based on the encoded values. Consider the
+	 * cases...
+	 *
+	 * (first <= TCL_INDEX_END) &&
+	 *	(last == TCL_INDEX_AFTER) => cannot tell REJECT
+	 *	(last <= TCL_INDEX END) && (last < first) => ACCEPT
+	 *	else => cannot tell REJECT
 	 */
-
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
-	if (replacementTokenPtr == NULL) {
-	    /* Drop first */
-	    OP44(	STR_RANGE_IMM, 1, TCL_INDEX_END);
-	    return TCL_OK;
-	}
-	/* Replace first */
-	CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
+	    || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END)
+		&& (last < first))		/* Know (last < first) */
 	/*
-	 * NOTE: The following tower of bullshit is present because
-	 * [string replace] was boneheadedly defined not to replace
-	 * empty strings, so we actually have to detect the empty
-	 * string case and treat it differently.
+	 * (first == TCL_INDEX_BEFORE) &&
+	 *	(last == TCL_INDEX_AFTER) => (first < last) REJECT
+	 *	(last <= TCL_INDEX_END) => cannot tell REJECT
+	 *	else		=> (first < last) REJECT
+	 *
+	 * else [[first >= TCL_INDEX_START]] &&
+	 *	(last == TCL_INDEX_AFTER) => cannot tell REJECT
+	 *	(last <= TCL_INDEX_END) => cannot tell REJECT
+	 *	else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT
 	 */
-
-	OP4(		OVER, 1);
-	PUSH(		"");
-	OP(		STR_EQ);
-	JUMP1(		JUMP_FALSE, notEq);
-	OP(		POP);
-	JUMP1(		JUMP, end);
-	FIXJUMP1(notEq);
-	TclAdjustStackDepth(1, envPtr);
-	OP4(		REVERSE, 2);
-	OP44(		STR_RANGE_IMM, 1, TCL_INDEX_END);
-	OP1(		STR_CONCAT1, 2);
-	FIXJUMP1(end);
+	    || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START)
+		&& (last < first))) {		/* Know (last < first) */
+	if (parsePtr->numWords == 5) {
+	    tokenPtr = TokenAfter(tokenPtr);
+	    CompileWord(envPtr, tokenPtr, interp, 4);
+	    OP(		POP);		/* Pop newString */
+	}
+	/* Original string argument now on TOS as result */
 	return TCL_OK;
+    }
 
-    } else if (idx1 == TCL_INDEX_END && idx2 == TCL_INDEX_END) {
-	int notEq, end;
-
-	/*
-	 * Just working with the last character.
-	 */
+    if (parsePtr->numWords == 5) {
+    /*
+     * When we have a string replacement, we have to take care about
+     * not replacing empty substrings that [string replace] promises
+     * not to replace
+     *
+     * The remaining index values might be suitable for conventional
+     * string replacement, but only if they cannot possibly meet the
+     * conditions described above at runtime. If there's a chance they
+     * might, we would have to emit bytecode to check and at that point
+     * we're paying more in bytecode execution time than would make
+     * things worthwhile. Trouble is we are very limited in
+     * how much we can detect that at compile time. After decoding,
+     * we need, first:
+     *
+     *		(first <= end)
+     *
+     * The encoded indices (first <= TCL_INDEX END) and
+     * (first == TCL_INDEX_BEFORE) always meets this condition, but
+     * any other encoded first index has some list for which it fails.
+     *
+     * We also need, second:
+     *
+     *		(last >= 0)
+     *
+     * The encoded indices (last >= TCL_INDEX_START) and
+     * (last == TCL_INDEX_AFTER) always meet this condition but any
+     * other encoded last index has some list for which it fails.
+     *
+     * Finally we need, third:
+     *
+     *		(first <= last)
+     * 
+     * Considered in combination with the constraints we already have,
+     * we see that we can proceed when (first == TCL_INDEX_BEFORE)
+     * or (last == TCL_INDEX_AFTER). These also permit simplification
+     * of the prefix|replace|suffix construction. The other constraints,
+     * though, interfere with getting a guarantee that first <= last. 
+     */
 
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
-	if (replacementTokenPtr == NULL) {
-	    /* Drop last */
-	    OP44(	STR_RANGE_IMM, 0, TCL_INDEX_END-1);
-	    return TCL_OK;
+    if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) {
+	/* empty prefix */
+	tokenPtr = TokenAfter(tokenPtr);
+	CompileWord(envPtr, tokenPtr, interp, 4);
+	OP4(		REVERSE, 2);
+	if (last == TCL_INDEX_AFTER) {
+	    OP(		POP);		/* Pop  original */
+	} else {
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    OP1(	STR_CONCAT1, 2);
 	}
-	/* Replace last */
-	CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
-	/* More bullshit; see NOTE above. */
+	return TCL_OK;
+    }
 
-	OP4(		OVER, 1);
-	PUSH(		"");
-	OP(		STR_EQ);
-	JUMP1(		JUMP_FALSE, notEq);
-	OP(		POP);
-	JUMP1(		JUMP, end);
-	FIXJUMP1(notEq);
-	TclAdjustStackDepth(1, envPtr);
-	OP4(		REVERSE, 2);
-	OP44(		STR_RANGE_IMM, 0, TCL_INDEX_END-1);
-	OP4(		REVERSE, 2);
+    if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) {
+	OP44(		STR_RANGE_IMM, 0, first-1);
+	tokenPtr = TokenAfter(tokenPtr);
+	CompileWord(envPtr, tokenPtr, interp, 4);
 	OP1(		STR_CONCAT1, 2);
-	FIXJUMP1(end);
 	return TCL_OK;
+    }
+
+	/* FLOW THROUGH TO genericReplace */
 
     } else {
-	/*
-	 * Need to process indices at runtime. This could be because the
-	 * indices are not constants, or because we need to resolve them to
-	 * absolute indices to work out if a replacement is going to happen.
-	 * In any case, to runtime it is.
+	/* 
+	 * When we have no replacement string to worry about, we may
+	 * have more luck, because the forbidden empty string replacements
+	 * are harmless when they are replaced by another empty string.
 	 */
 
+	if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) {
+	    /* empty prefix - build suffix only */
+
+	    if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+		/* empty suffix too => empty result */
+		OP(	POP);		/* Pop  original */
+		PUSH	(	"");
+		return TCL_OK;
+	    }
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    return TCL_OK;
+	} else {
+	    if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+		/* empty suffix - build prefix only */
+		OP44(	STR_RANGE_IMM, 0, first-1);
+		return TCL_OK;
+	    }
+	    OP(		DUP);
+	    OP44(	STR_RANGE_IMM, 0, first-1);
+	    OP4(	REVERSE, 2);
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    OP1(	STR_CONCAT1, 2);
+	    return TCL_OK;
+	}
+    }
+
     genericReplace:
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
 	tokenPtr = TokenAfter(valueTokenPtr);
 	CompileWord(envPtr, tokenPtr, interp, 2);
 	tokenPtr = TokenAfter(tokenPtr);
 	CompileWord(envPtr, tokenPtr, interp, 3);
-	if (replacementTokenPtr != NULL) {
-	    CompileWord(envPtr, replacementTokenPtr, interp, 4);
+	if (parsePtr->numWords == 5) {
+	    tokenPtr = TokenAfter(tokenPtr);
+	    CompileWord(envPtr, tokenPtr, interp, 4);
 	} else {
 	    PUSH(	"");
 	}
 	OP(		STR_REPLACE);
 	return TCL_OK;
-    }
 }
 
 int
author	dgp <dgp@users.sourceforge.net>	2018-03-13 17:14:40 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2018-03-13 17:14:40 (GMT)
commit	62079a5c6951d9555cce57bdeb5a227a5be8b906 (patch)
tree	14a91693203f05d725c045e5073d28eab0f3cf5d /generic/tclCompCmdsSZ.c
parent	41ed3e0e2bc9fb87e4a63f737ddcc00041af42da (diff)
parent	a275b8a5ea188f0633caab60b3ab709de4239f1f (diff)
download	tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.zip tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.gz tcl-62079a5c6951d9555cce57bdeb5a227a5be8b906.tar.bz2