merge 8.6

author: dgp <dgp@users.sourceforge.net> 2018-03-14 21:51:13 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2018-03-14 21:51:13 (GMT)
commit: 72c721f3e71c327c2e70def50fb04f48c4527442 (patch)
tree: 5be4eacb21d2c81585f74ef9cc9864e6a13bf086
parent: 6d4320b5fbe2afd973751c4b5de70a89e0d62214 (diff)
parent: 1bf60bab3cd706cba08c15df5a6a5267d918aeae (diff)
download: tcl-72c721f3e71c327c2e70def50fb04f48c4527442.zip
tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.gz
tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.bz2
6 files changed, 378 insertions, 194 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d63a985..a3a79f8 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2353,7 +2353,7 @@ StringRplcCmd(
     Tcl_Obj *const objv[])	/* Argument objects. */
 {
     Tcl_UniChar *ustring;
-    int first, last, length;
+    int first, last, length, end;
 
     if (objc < 4 || objc > 5) {
 	Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?");
@@ -2361,20 +2361,38 @@ StringRplcCmd(
     }
 
     ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
-    length--;
+    end = length - 1;
 
-    if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
-	    TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
+    if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK ||
+	    TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK){
 	return TCL_ERROR;
     }
 
-    if ((last < first) || (last < 0) || (first > length)) {
+    /*
+     * The following test screens out most empty substrings as
+     * candidates for replacement. When they are detected, no
+     * replacement is done, and the result is the original string,
+     */
+    if ((last < 0) ||		/* Range ends before start of string */
+	    (first > end) ||	/* Range begins after end of string */
+	    (last < first)) {	/* Range begins after it starts */
+
+	/*
+	 * BUT!!! when (end < 0) -- an empty original string -- we can
+	 * have (first <= end < 0 <= last) and an empty string is permitted
+	 * to be replaced.
+	 */
 	Tcl_SetObjResult(interp, objv[1]);
     } else {
 	Tcl_Obj *resultPtr;
 
+	/*
+	 * We are re-fetching in case the string argument is same value as 
+	 * an index argument, and shimmering cost us our ustring.
+	 */
+
 	ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
-	length--;
+	end = length-1;
 
 	if (first < 0) {
 	    first = 0;
@@ -2384,9 +2402,9 @@ StringRplcCmd(
 	if (objc == 5) {
 	    Tcl_AppendObjToObj(resultPtr, objv[4]);
 	}
-	if (last < length) {
+	if (last < end) {
 	    Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1,
-		    length - last);
+		    end - last);
 	}
 	Tcl_SetObjResult(interp, resultPtr);
     }
@@ -3267,8 +3285,7 @@ StringTrimCmd(
     }
     string1 = TclGetStringFromObj(objv[1], &length1);
 
-    triml = TclTrimLeft(string1, length1, string2, length2);
-    trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);
+    triml = TclTrim(string1, length1, string2, length2, &trimr);
 
     Tcl_SetObjResult(interp,
 	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 5cd1c3b..c13376b 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -995,147 +995,197 @@ TclCompileStringReplaceCmd(
 				 * compiled. */
     CompileEnv *envPtr)		/* Holds the resulting instructions. */
 {
-    Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL;
+    Tcl_Token *tokenPtr, *valueTokenPtr;
     DefineLineInformation;	/* TIP #280 */
-    int idx1, idx2;
+    int first, last;
 
     if (parsePtr->numWords < 4 || parsePtr->numWords > 5) {
 	return TCL_ERROR;
     }
+ 
+    /* Bytecode to compute/push string argument being replaced */
     valueTokenPtr = TokenAfter(parsePtr->tokenPtr);
-    if (parsePtr->numWords == 5) {
-	tokenPtr = TokenAfter(valueTokenPtr);
-	tokenPtr = TokenAfter(tokenPtr);
-	replacementTokenPtr = TokenAfter(tokenPtr);
-    }
+    CompileWord(envPtr, valueTokenPtr, interp, 1);
 
+    /*
+     * Check for first index known and useful at compile time. 
+     */
     tokenPtr = TokenAfter(valueTokenPtr);
-    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER,
-	    &idx1) != TCL_OK) {
+    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+	    &first) != TCL_OK) {
 	goto genericReplace;
     }
+
     /*
-     * Token parsed as an index value. Indices before the string are
-     * treated as index of start of string.
+     * Check for last index known and useful at compile time. 
      */
-
     tokenPtr = TokenAfter(tokenPtr);
-    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END,
-	    &idx2) != TCL_OK) {
+    if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+	    &last) != TCL_OK) {
 	goto genericReplace;
     }
-    /*
-     * Token parsed as an index value. Indices after the string are
-     * treated as index of end of string.
-     */
 
-/* TODO...... */
-    /*
-     * We handle these replacements specially: first character (where
-     * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything
-     * else and the semantics get rather screwy.
+    /* 
+     * [string replace] is an odd bird.  For many arguments it is
+     * a conventional substring replacer.  However it also goes out
+     * of its way to become a no-op for many cases where it would be
+     * replacing an empty substring.  Precisely, it is a no-op when
      *
-     * TODO: These seem to be very narrow cases.  They are not even
-     * covered by the test suite, and any programming that ends up
-     * here could have been coded by the programmer using [string range]
-     * and [string cat]. [*]  Not clear at all to me that the bytecode
-     * generated here is worthwhile.
+     *		(last < first)		OR
+     *		(last < 0)		OR
+     *		(end < first)
      *
-     *  [*] Except for the empty string exceptions.  UGGGGHHHH.
+     * For some compile-time values we can detect these cases, and
+     * compile direct to bytecode implementing the no-op.
      */
 
-    if (idx1 == 0 && idx2 == 0) {
-	int notEq, end;
+    if ((last == TCL_INDEX_BEFORE)		/* Know (last < 0) */
+	    || (first == TCL_INDEX_AFTER)	/* Know (first > end) */
 
 	/*
-	 * Just working with the first character.
+	 * Tricky to determine when runtime (last < first) can be
+	 * certainly known based on the encoded values. Consider the
+	 * cases...
+	 *
+	 * (first <= TCL_INDEX_END) &&
+	 *	(last == TCL_INDEX_AFTER) => cannot tell REJECT
+	 *	(last <= TCL_INDEX END) && (last < first) => ACCEPT
+	 *	else => cannot tell REJECT
 	 */
-
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
-	if (replacementTokenPtr == NULL) {
-	    /* Drop first */
-	    OP44(	STR_RANGE_IMM, 1, TCL_INDEX_END);
-	    return TCL_OK;
-	}
-	/* Replace first */
-	CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
+	    || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END)
+		&& (last < first))		/* Know (last < first) */
 	/*
-	 * NOTE: The following tower of bullshit is present because
-	 * [string replace] was boneheadedly defined not to replace
-	 * empty strings, so we actually have to detect the empty
-	 * string case and treat it differently.
+	 * (first == TCL_INDEX_BEFORE) &&
+	 *	(last == TCL_INDEX_AFTER) => (first < last) REJECT
+	 *	(last <= TCL_INDEX_END) => cannot tell REJECT
+	 *	else		=> (first < last) REJECT
+	 *
+	 * else [[first >= TCL_INDEX_START]] &&
+	 *	(last == TCL_INDEX_AFTER) => cannot tell REJECT
+	 *	(last <= TCL_INDEX_END) => cannot tell REJECT
+	 *	else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT
 	 */
-
-	OP4(		OVER, 1);
-	PUSH(		"");
-	OP(		STR_EQ);
-	JUMP1(		JUMP_FALSE, notEq);
-	OP(		POP);
-	JUMP1(		JUMP, end);
-	FIXJUMP1(notEq);
-	TclAdjustStackDepth(1, envPtr);
-	OP4(		REVERSE, 2);
-	OP44(		STR_RANGE_IMM, 1, TCL_INDEX_END);
-	OP1(		STR_CONCAT1, 2);
-	FIXJUMP1(end);
+	    || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START)
+		&& (last < first))) {		/* Know (last < first) */
+	if (parsePtr->numWords == 5) {
+	    tokenPtr = TokenAfter(tokenPtr);
+	    CompileWord(envPtr, tokenPtr, interp, 4);
+	    OP(		POP);		/* Pop newString */
+	}
+	/* Original string argument now on TOS as result */
 	return TCL_OK;
+    }
 
-    } else if (idx1 == TCL_INDEX_END && idx2 == TCL_INDEX_END) {
-	int notEq, end;
-
-	/*
-	 * Just working with the last character.
-	 */
+    if (parsePtr->numWords == 5) {
+    /*
+     * When we have a string replacement, we have to take care about
+     * not replacing empty substrings that [string replace] promises
+     * not to replace
+     *
+     * The remaining index values might be suitable for conventional
+     * string replacement, but only if they cannot possibly meet the
+     * conditions described above at runtime. If there's a chance they
+     * might, we would have to emit bytecode to check and at that point
+     * we're paying more in bytecode execution time than would make
+     * things worthwhile. Trouble is we are very limited in
+     * how much we can detect that at compile time. After decoding,
+     * we need, first:
+     *
+     *		(first <= end)
+     *
+     * The encoded indices (first <= TCL_INDEX END) and
+     * (first == TCL_INDEX_BEFORE) always meets this condition, but
+     * any other encoded first index has some list for which it fails.
+     *
+     * We also need, second:
+     *
+     *		(last >= 0)
+     *
+     * The encoded indices (last >= TCL_INDEX_START) and
+     * (last == TCL_INDEX_AFTER) always meet this condition but any
+     * other encoded last index has some list for which it fails.
+     *
+     * Finally we need, third:
+     *
+     *		(first <= last)
+     * 
+     * Considered in combination with the constraints we already have,
+     * we see that we can proceed when (first == TCL_INDEX_BEFORE)
+     * or (last == TCL_INDEX_AFTER). These also permit simplification
+     * of the prefix|replace|suffix construction. The other constraints,
+     * though, interfere with getting a guarantee that first <= last. 
+     */
 
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
-	if (replacementTokenPtr == NULL) {
-	    /* Drop last */
-	    OP44(	STR_RANGE_IMM, 0, TCL_INDEX_END-1);
-	    return TCL_OK;
+    if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) {
+	/* empty prefix */
+	tokenPtr = TokenAfter(tokenPtr);
+	CompileWord(envPtr, tokenPtr, interp, 4);
+	OP4(		REVERSE, 2);
+	if (last == TCL_INDEX_AFTER) {
+	    OP(		POP);		/* Pop  original */
+	} else {
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    OP1(	STR_CONCAT1, 2);
 	}
-	/* Replace last */
-	CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
-	/* More bullshit; see NOTE above. */
+	return TCL_OK;
+    }
 
-	OP4(		OVER, 1);
-	PUSH(		"");
-	OP(		STR_EQ);
-	JUMP1(		JUMP_FALSE, notEq);
-	OP(		POP);
-	JUMP1(		JUMP, end);
-	FIXJUMP1(notEq);
-	TclAdjustStackDepth(1, envPtr);
-	OP4(		REVERSE, 2);
-	OP44(		STR_RANGE_IMM, 0, TCL_INDEX_END-1);
-	OP4(		REVERSE, 2);
+    if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) {
+	OP44(		STR_RANGE_IMM, 0, first-1);
+	tokenPtr = TokenAfter(tokenPtr);
+	CompileWord(envPtr, tokenPtr, interp, 4);
 	OP1(		STR_CONCAT1, 2);
-	FIXJUMP1(end);
 	return TCL_OK;
+    }
+
+	/* FLOW THROUGH TO genericReplace */
 
     } else {
-	/*
-	 * Need to process indices at runtime. This could be because the
-	 * indices are not constants, or because we need to resolve them to
-	 * absolute indices to work out if a replacement is going to happen.
-	 * In any case, to runtime it is.
+	/* 
+	 * When we have no replacement string to worry about, we may
+	 * have more luck, because the forbidden empty string replacements
+	 * are harmless when they are replaced by another empty string.
 	 */
 
+	if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) {
+	    /* empty prefix - build suffix only */
+
+	    if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+		/* empty suffix too => empty result */
+		OP(	POP);		/* Pop  original */
+		PUSH	(	"");
+		return TCL_OK;
+	    }
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    return TCL_OK;
+	} else {
+	    if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+		/* empty suffix - build prefix only */
+		OP44(	STR_RANGE_IMM, 0, first-1);
+		return TCL_OK;
+	    }
+	    OP(		DUP);
+	    OP44(	STR_RANGE_IMM, 0, first-1);
+	    OP4(	REVERSE, 2);
+	    OP44(	STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+	    OP1(	STR_CONCAT1, 2);
+	    return TCL_OK;
+	}
+    }
+
     genericReplace:
-	CompileWord(envPtr, valueTokenPtr, interp, 1);
 	tokenPtr = TokenAfter(valueTokenPtr);
 	CompileWord(envPtr, tokenPtr, interp, 2);
 	tokenPtr = TokenAfter(tokenPtr);
 	CompileWord(envPtr, tokenPtr, interp, 3);
-	if (replacementTokenPtr != NULL) {
-	    CompileWord(envPtr, replacementTokenPtr, interp, 4);
+	if (parsePtr->numWords == 5) {
+	    tokenPtr = TokenAfter(tokenPtr);
+	    CompileWord(envPtr, tokenPtr, interp, 4);
 	} else {
 	    PUSH(	"");
 	}
 	OP(		STR_REPLACE);
 	return TCL_OK;
-    }
 }
 
 int
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index ef64ac1..aab9092 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5701,18 +5701,18 @@ TEBCresume(
 
     {
 	Tcl_UniChar *ustring1, *ustring2, *ustring3, *end, *p;
-	int length3;
+	int length3, endIdx;
 	Tcl_Obj *value3Ptr;
 
     case INST_STR_REPLACE:
 	value3Ptr = POP_OBJECT();
 	valuePtr = OBJ_AT_DEPTH(2);
-	length = Tcl_GetCharLength(valuePtr) - 1;
+	endIdx = Tcl_GetCharLength(valuePtr) - 1;
 	TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr),
 		O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr)));
-	if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length,
+	if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, endIdx,
 		    &fromIdx) != TCL_OK
-	    || TclGetIntForIndexM(interp, OBJ_AT_TOS, length,
+	    || TclGetIntForIndexM(interp, OBJ_AT_TOS, endIdx,
 		    &toIdx) != TCL_OK) {
 	    TclDecrRefCount(value3Ptr);
 	    TRACE_ERROR(interp);
@@ -5722,21 +5722,24 @@ TEBCresume(
 	(void) POP_OBJECT();
 	TclDecrRefCount(OBJ_AT_TOS);
 	(void) POP_OBJECT();
-	if (fromIdx < 0) {
-	    fromIdx = 0;
-	}
 
-	if (fromIdx > toIdx || fromIdx > length) {
+	if ((toIdx < 0) ||
+		(fromIdx > endIdx) ||
+		(toIdx < fromIdx)) {
 	    TRACE_APPEND(("\"%.30s\"\n", O2S(valuePtr)));
 	    TclDecrRefCount(value3Ptr);
 	    NEXT_INST_F(1, 0, 0);
 	}
 
-	if (toIdx > length) {
-	    toIdx = length;
+	if (fromIdx < 0) {
+	    fromIdx = 0;
+	}
+
+	if (toIdx > endIdx) {
+	    toIdx = endIdx;
 	}
 
-	if (fromIdx == 0 && toIdx == length) {
+	if (fromIdx == 0 && toIdx == endIdx) {
 	    TclDecrRefCount(OBJ_AT_TOS);
 	    OBJ_AT_TOS = value3Ptr;
 	    TRACE_APPEND(("\"%.30s\"\n", O2S(value3Ptr)));
@@ -6019,12 +6022,7 @@ TEBCresume(
 	value2Ptr = OBJ_AT_TOS;		/* TrimSet */
 	string2 = TclGetStringFromObj(value2Ptr, &length2);
 	string1 = TclGetStringFromObj(valuePtr, &length);
-	trim1 = TclTrimLeft(string1, length, string2, length2);
-	if (trim1 < length) {
-	    trim2 = TclTrimRight(string1, length, string2, length2);
-	} else {
-	    trim2 = 0;
-	}
+	trim1 = TclTrim(string1, length, string2, length2, &trim2);
     createTrimmedString:
 	/*
 	 * Careful here; trim set often contains non-ASCII characters so we
diff --git a/generic/tclInt.h b/generic/tclInt.h
index ef48126..dc7909c 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3166,6 +3166,8 @@ MODULE_SCOPE void	TclSubstParse(Tcl_Interp *interp, const char *bytes,
 MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
 			    int count, int *tokensLeftPtr, int line,
 			    int *clNextOuter, const char *outerScript);
+MODULE_SCOPE int	TclTrim(const char *bytes, int numBytes,
+			    const char *trim, int numTrim, int *trimRight);
 MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
 			    const char *trim, int numTrim);
 MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 2bd6b51..6eab7b8 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1643,11 +1643,46 @@ Tcl_Backslash(
 /*
  *----------------------------------------------------------------------
  *
- * TclTrimRight --
+ * UtfWellFormedEnd --
+ *	Checks the end of utf string is malformed, if yes - wraps bytes
+ *	to the given buffer (as well-formed NTS string).  The buffer
+ *	argument should be initialized by the caller and ready to use.
+ *
+ * Results:
+ *	The bytes with well-formed end of the string.
  *
- *	Takes two counted strings in the Tcl encoding which must both be null
- *	terminated. Conceptually trims from the right side of the first string
- *	all characters found in the second string.
+ * Side effects:
+ *	Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+UtfWellFormedEnd(
+    Tcl_DString *buffer,	/* Buffer used to hold well-formed string. */
+    const char *bytes,		/* Pointer to the beginning of the string. */
+    int length)			/* Length of the string. */
+{
+    const char *l = bytes + length;
+    const char *p = Tcl_UtfPrev(l, bytes);
+
+    if (Tcl_UtfCharComplete(p, l - p)) {
+	return bytes;
+    }
+    /* 
+     * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+     * avoid segfault by access violation out of range.
+     */
+    Tcl_DStringAppend(buffer, bytes, length);
+    return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimRight --
+ *	Takes two counted strings in the Tcl encoding.  Conceptually
+ *	finds the sub string (offset) to trim from the right side of the
+ *	first string all characters found in the second string.
  *
  * Results:
  *	The number of bytes to be removed from the end of the string.
@@ -1658,8 +1693,8 @@ Tcl_Backslash(
  *----------------------------------------------------------------------
  */
 
-int
-TclTrimRight(
+static inline int
+TrimRight(
     const char *bytes,		/* String to be trimmed... */
     int numBytes,		/* ...and its length in bytes */
     const char *trim,		/* String of trim characters... */
@@ -1668,18 +1703,6 @@ TclTrimRight(
     const char *p = bytes + numBytes;
     int pInc;
 
-    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
-	Tcl_Panic("TclTrimRight works only on null-terminated strings");
-    }
-
-    /*
-     * Empty strings -> nothing to do.
-     */
-
-    if ((numBytes == 0) || (numTrim == 0)) {
-	return 0;
-    }
-
     /*
      * Outer loop: iterate over string to be trimmed.
      */
@@ -1720,15 +1743,46 @@ TclTrimRight(
 
     return numBytes - (p - bytes);
 }
+
+int
+TclTrimRight(
+    const char *bytes,	/* String to be trimmed... */
+    int numBytes,	/* ...and its length in bytes */
+    const char *trim,	/* String of trim characters... */
+    int numTrim)	/* ...and its length in bytes */
+{
+    int res;
+    Tcl_DString bytesBuf, trimBuf;
+
+    /* Empty strings -> nothing to do */
+    if ((numBytes == 0) || (numTrim == 0)) {
+	return 0;
+    }
+
+    Tcl_DStringInit(&bytesBuf);
+    Tcl_DStringInit(&trimBuf);
+    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+    res = TrimRight(bytes, numBytes, trim, numTrim);
+    if (res > numBytes) {
+	res = numBytes;
+    }
+
+    Tcl_DStringFree(&bytesBuf);
+    Tcl_DStringFree(&trimBuf);
+
+    return res;
+}
 
 /*
  *----------------------------------------------------------------------
  *
  * TclTrimLeft --
  *
- *	Takes two counted strings in the Tcl encoding which must both be null
- *	terminated. Conceptually trims from the left side of the first string
- *	all characters found in the second string.
+ *	Takes two counted strings in the Tcl encoding.  Conceptually
+ *	finds the sub string (offset) to trim from the left side of the
+ *	first string all characters found in the second string.
  *
  * Results:
  *	The number of bytes to be removed from the start of the string.
@@ -1739,8 +1793,8 @@ TclTrimRight(
  *----------------------------------------------------------------------
  */
 
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
     const char *bytes,		/* String to be trimmed... */
     int numBytes,		/* ...and its length in bytes */
     const char *trim,		/* String of trim characters... */
@@ -1748,18 +1802,6 @@ TclTrimLeft(
 {
     const char *p = bytes;
 
-    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
-	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
-    }
-
-    /*
-     * Empty strings -> nothing to do.
-     */
-
-    if ((numBytes == 0) || (numTrim == 0)) {
-	return 0;
-    }
-
     /*
      * Outer loop: iterate over string to be trimmed.
      */
@@ -1796,10 +1838,99 @@ TclTrimLeft(
 
 	p += pInc;
 	numBytes -= pInc;
-    } while (numBytes);
+    } while (numBytes > 0);
 
     return p - bytes;
 }
+
+int
+TclTrimLeft(
+    const char *bytes,	/* String to be trimmed... */
+    int numBytes,	/* ...and its length in bytes */
+    const char *trim,	/* String of trim characters... */
+    int numTrim)	/* ...and its length in bytes */
+{
+    int res;
+    Tcl_DString bytesBuf, trimBuf;
+
+    /* Empty strings -> nothing to do */
+    if ((numBytes == 0) || (numTrim == 0)) {
+	return 0;
+    }
+
+    Tcl_DStringInit(&bytesBuf);
+    Tcl_DStringInit(&trimBuf);
+    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+    res = TrimLeft(bytes, numBytes, trim, numTrim);
+    if (res > numBytes) {
+	res = numBytes;
+    }
+
+    Tcl_DStringFree(&bytesBuf);
+    Tcl_DStringFree(&trimBuf);
+
+    return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ *	Finds the sub string (offset) to trim from both sides of the
+ *	first string all characters found in the second string.
+ *
+ * Results:
+ *	The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+    const char *bytes,	/* String to be trimmed... */
+    int numBytes,	/* ...and its length in bytes */
+    const char *trim,	/* String of trim characters... */
+    int numTrim,	/* ...and its length in bytes */
+    int *trimRight)		/* Offset from the end of the string. */
+{
+    int trimLeft;
+    Tcl_DString bytesBuf, trimBuf;
+
+    *trimRight = 0;
+    /* Empty strings -> nothing to do */
+    if ((numBytes == 0) || (numTrim == 0)) {
+	return 0;
+    }
+
+    Tcl_DStringInit(&bytesBuf);
+    Tcl_DStringInit(&trimBuf);
+    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+    trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+    if (trimLeft > numBytes) {
+	trimLeft = numBytes;
+    }
+    numBytes -= trimLeft;
+    /* have to trim yet (first char was already verified within TrimLeft) */
+    if (numBytes > 1) {
+	bytes += trimLeft;
+	*trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+	if (*trimRight > numBytes) {
+	    *trimRight = numBytes;
+	}
+    }
+
+    Tcl_DStringFree(&bytesBuf);
+    Tcl_DStringFree(&trimBuf);
+
+    return trimLeft;
+}
 
 /*
  *----------------------------------------------------------------------
@@ -1867,30 +1998,20 @@ Tcl_Concat(
     result = ckalloc((unsigned) (bytesNeeded + argc));
 
     for (p = result, i = 0;  i < argc;  i++) {
-	int trim, elemLength;
+	int triml, trimr, elemLength;
 	const char *element;
 
 	element = argv[i];
 	elemLength = strlen(argv[i]);
 
-	/*
-	 * Trim away the leading whitespace.
-	 */
-
-	trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
-		CONCAT_WS_SIZE);
-	element += trim;
-	elemLength -= trim;
-
-	/*
-	 * Trim away the trailing whitespace. Do not permit trimming to expose
-	 * a final backslash character.
-	 */
+	/* Trim away the leading/trailing whitespace. */
+	triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+		CONCAT_WS_SIZE, &trimr);
+	element += triml;
+	elemLength -= triml + trimr;
 
-	trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
-		CONCAT_WS_SIZE);
-	trim -= trim && (element[elemLength - trim - 1] == '\\');
-	elemLength -= trim;
+	/* Do not permit trimming to expose a final backslash character. */
+	elemLength += trimr && (element[elemLength - 1] == '\\');
 
 	/*
 	 * If we're left with empty element after trimming, do nothing.
@@ -2010,28 +2131,18 @@ Tcl_ConcatObj(
     Tcl_SetObjLength(resPtr, 0);
 
     for (i = 0;  i < objc;  i++) {
-	int trim;
+	int triml, trimr;
 
 	element = TclGetStringFromObj(objv[i], &elemLength);
 
-	/*
-	 * Trim away the leading whitespace.
-	 */
-
-	trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
-		CONCAT_WS_SIZE);
-	element += trim;
-	elemLength -= trim;
-
-	/*
-	 * Trim away the trailing whitespace. Do not permit trimming to expose
-	 * a final backslash character.
-	 */
+	/* Trim away the leading/trailing whitespace. */
+	triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+		CONCAT_WS_SIZE, &trimr);
+	element += triml;
+	elemLength -= triml + trimr;
 
-	trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
-		CONCAT_WS_SIZE);
-	trim -= trim && (element[elemLength - trim - 1] == '\\');
-	elemLength -= trim;
+	/* Do not permit trimming to expose a final backslash character. */
+	elemLength += trimr && (element[elemLength - 1] == '\\');
 
 	/*
 	 * If we're left with empty element after trimming, do nothing.
diff --git a/tests/string.test b/tests/string.test
index fc8dabb..d69fda4 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -1378,6 +1378,12 @@ test string-14.16 {string replace} {
 test string-14.17 {string replace} {
     string replace abcdefghijklmnop end end-1
 } {abcdefghijklmnop}
+test string-14.18 {string replace} {
+    string replace abcdefghijklmnop 10 9 XXX
+} {abcdefghijklmnop}
+test string-14.19 {string replace} {
+    string replace {} -1 0 A
+} A
 
 test string-15.1 {string tolower too few args} {
     list [catch {string tolower} msg] $msg
author	dgp <dgp@users.sourceforge.net>	2018-03-14 21:51:13 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2018-03-14 21:51:13 (GMT)
commit	72c721f3e71c327c2e70def50fb04f48c4527442 (patch)
tree	5be4eacb21d2c81585f74ef9cc9864e6a13bf086
parent	6d4320b5fbe2afd973751c4b5de70a89e0d62214 (diff)
parent	1bf60bab3cd706cba08c15df5a6a5267d918aeae (diff)
download	tcl-72c721f3e71c327c2e70def50fb04f48c4527442.zip tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.gz tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.bz2