summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2018-03-14 21:51:13 (GMT)
committerdgp <dgp@users.sourceforge.net>2018-03-14 21:51:13 (GMT)
commit72c721f3e71c327c2e70def50fb04f48c4527442 (patch)
tree5be4eacb21d2c81585f74ef9cc9864e6a13bf086
parent6d4320b5fbe2afd973751c4b5de70a89e0d62214 (diff)
parent1bf60bab3cd706cba08c15df5a6a5267d918aeae (diff)
downloadtcl-72c721f3e71c327c2e70def50fb04f48c4527442.zip
tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.gz
tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.bz2
merge 8.6
-rw-r--r--generic/tclCmdMZ.c37
-rw-r--r--generic/tclCompCmdsSZ.c240
-rw-r--r--generic/tclExecute.c32
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclUtil.c255
-rw-r--r--tests/string.test6
6 files changed, 378 insertions, 194 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d63a985..a3a79f8 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2353,7 +2353,7 @@ StringRplcCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
Tcl_UniChar *ustring;
- int first, last, length;
+ int first, last, length, end;
if (objc < 4 || objc > 5) {
Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?");
@@ -2361,20 +2361,38 @@ StringRplcCmd(
}
ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
- length--;
+ end = length - 1;
- if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
- TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
+ if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK ||
+ TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK){
return TCL_ERROR;
}
- if ((last < first) || (last < 0) || (first > length)) {
+ /*
+ * The following test screens out most empty substrings as
+ * candidates for replacement. When they are detected, no
+ * replacement is done, and the result is the original string,
+ */
+ if ((last < 0) || /* Range ends before start of string */
+ (first > end) || /* Range begins after end of string */
+ (last < first)) { /* Range begins after it starts */
+
+ /*
+ * BUT!!! when (end < 0) -- an empty original string -- we can
+ * have (first <= end < 0 <= last) and an empty string is permitted
+ * to be replaced.
+ */
Tcl_SetObjResult(interp, objv[1]);
} else {
Tcl_Obj *resultPtr;
+ /*
+ * We are re-fetching in case the string argument is same value as
+ * an index argument, and shimmering cost us our ustring.
+ */
+
ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
- length--;
+ end = length-1;
if (first < 0) {
first = 0;
@@ -2384,9 +2402,9 @@ StringRplcCmd(
if (objc == 5) {
Tcl_AppendObjToObj(resultPtr, objv[4]);
}
- if (last < length) {
+ if (last < end) {
Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1,
- length - last);
+ end - last);
}
Tcl_SetObjResult(interp, resultPtr);
}
@@ -3267,8 +3285,7 @@ StringTrimCmd(
}
string1 = TclGetStringFromObj(objv[1], &length1);
- triml = TclTrimLeft(string1, length1, string2, length2);
- trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);
+ triml = TclTrim(string1, length1, string2, length2, &trimr);
Tcl_SetObjResult(interp,
Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 5cd1c3b..c13376b 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -995,147 +995,197 @@ TclCompileStringReplaceCmd(
* compiled. */
CompileEnv *envPtr) /* Holds the resulting instructions. */
{
- Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL;
+ Tcl_Token *tokenPtr, *valueTokenPtr;
DefineLineInformation; /* TIP #280 */
- int idx1, idx2;
+ int first, last;
if (parsePtr->numWords < 4 || parsePtr->numWords > 5) {
return TCL_ERROR;
}
+
+ /* Bytecode to compute/push string argument being replaced */
valueTokenPtr = TokenAfter(parsePtr->tokenPtr);
- if (parsePtr->numWords == 5) {
- tokenPtr = TokenAfter(valueTokenPtr);
- tokenPtr = TokenAfter(tokenPtr);
- replacementTokenPtr = TokenAfter(tokenPtr);
- }
+ CompileWord(envPtr, valueTokenPtr, interp, 1);
+ /*
+ * Check for first index known and useful at compile time.
+ */
tokenPtr = TokenAfter(valueTokenPtr);
- if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER,
- &idx1) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+ &first) != TCL_OK) {
goto genericReplace;
}
+
/*
- * Token parsed as an index value. Indices before the string are
- * treated as index of start of string.
+ * Check for last index known and useful at compile time.
*/
-
tokenPtr = TokenAfter(tokenPtr);
- if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END,
- &idx2) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+ &last) != TCL_OK) {
goto genericReplace;
}
- /*
- * Token parsed as an index value. Indices after the string are
- * treated as index of end of string.
- */
-/* TODO...... */
- /*
- * We handle these replacements specially: first character (where
- * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything
- * else and the semantics get rather screwy.
+ /*
+ * [string replace] is an odd bird. For many arguments it is
+ * a conventional substring replacer. However it also goes out
+ * of its way to become a no-op for many cases where it would be
+ * replacing an empty substring. Precisely, it is a no-op when
*
- * TODO: These seem to be very narrow cases. They are not even
- * covered by the test suite, and any programming that ends up
- * here could have been coded by the programmer using [string range]
- * and [string cat]. [*] Not clear at all to me that the bytecode
- * generated here is worthwhile.
+ * (last < first) OR
+ * (last < 0) OR
+ * (end < first)
*
- * [*] Except for the empty string exceptions. UGGGGHHHH.
+ * For some compile-time values we can detect these cases, and
+ * compile direct to bytecode implementing the no-op.
*/
- if (idx1 == 0 && idx2 == 0) {
- int notEq, end;
+ if ((last == TCL_INDEX_BEFORE) /* Know (last < 0) */
+ || (first == TCL_INDEX_AFTER) /* Know (first > end) */
/*
- * Just working with the first character.
+ * Tricky to determine when runtime (last < first) can be
+ * certainly known based on the encoded values. Consider the
+ * cases...
+ *
+ * (first <= TCL_INDEX_END) &&
+ * (last == TCL_INDEX_AFTER) => cannot tell REJECT
+ * (last <= TCL_INDEX END) && (last < first) => ACCEPT
+ * else => cannot tell REJECT
*/
-
- CompileWord(envPtr, valueTokenPtr, interp, 1);
- if (replacementTokenPtr == NULL) {
- /* Drop first */
- OP44( STR_RANGE_IMM, 1, TCL_INDEX_END);
- return TCL_OK;
- }
- /* Replace first */
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
+ || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END)
+ && (last < first)) /* Know (last < first) */
/*
- * NOTE: The following tower of bullshit is present because
- * [string replace] was boneheadedly defined not to replace
- * empty strings, so we actually have to detect the empty
- * string case and treat it differently.
+ * (first == TCL_INDEX_BEFORE) &&
+ * (last == TCL_INDEX_AFTER) => (first < last) REJECT
+ * (last <= TCL_INDEX_END) => cannot tell REJECT
+ * else => (first < last) REJECT
+ *
+ * else [[first >= TCL_INDEX_START]] &&
+ * (last == TCL_INDEX_AFTER) => cannot tell REJECT
+ * (last <= TCL_INDEX_END) => cannot tell REJECT
+ * else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT
*/
-
- OP4( OVER, 1);
- PUSH( "");
- OP( STR_EQ);
- JUMP1( JUMP_FALSE, notEq);
- OP( POP);
- JUMP1( JUMP, end);
- FIXJUMP1(notEq);
- TclAdjustStackDepth(1, envPtr);
- OP4( REVERSE, 2);
- OP44( STR_RANGE_IMM, 1, TCL_INDEX_END);
- OP1( STR_CONCAT1, 2);
- FIXJUMP1(end);
+ || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START)
+ && (last < first))) { /* Know (last < first) */
+ if (parsePtr->numWords == 5) {
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
+ OP( POP); /* Pop newString */
+ }
+ /* Original string argument now on TOS as result */
return TCL_OK;
+ }
- } else if (idx1 == TCL_INDEX_END && idx2 == TCL_INDEX_END) {
- int notEq, end;
-
- /*
- * Just working with the last character.
- */
+ if (parsePtr->numWords == 5) {
+ /*
+ * When we have a string replacement, we have to take care about
+ * not replacing empty substrings that [string replace] promises
+ * not to replace
+ *
+ * The remaining index values might be suitable for conventional
+ * string replacement, but only if they cannot possibly meet the
+ * conditions described above at runtime. If there's a chance they
+ * might, we would have to emit bytecode to check and at that point
+ * we're paying more in bytecode execution time than would make
+ * things worthwhile. Trouble is we are very limited in
+ * how much we can detect that at compile time. After decoding,
+ * we need, first:
+ *
+ * (first <= end)
+ *
+ * The encoded indices (first <= TCL_INDEX END) and
+ * (first == TCL_INDEX_BEFORE) always meets this condition, but
+ * any other encoded first index has some list for which it fails.
+ *
+ * We also need, second:
+ *
+ * (last >= 0)
+ *
+ * The encoded indices (last >= TCL_INDEX_START) and
+ * (last == TCL_INDEX_AFTER) always meet this condition but any
+ * other encoded last index has some list for which it fails.
+ *
+ * Finally we need, third:
+ *
+ * (first <= last)
+ *
+ * Considered in combination with the constraints we already have,
+ * we see that we can proceed when (first == TCL_INDEX_BEFORE)
+ * or (last == TCL_INDEX_AFTER). These also permit simplification
+ * of the prefix|replace|suffix construction. The other constraints,
+ * though, interfere with getting a guarantee that first <= last.
+ */
- CompileWord(envPtr, valueTokenPtr, interp, 1);
- if (replacementTokenPtr == NULL) {
- /* Drop last */
- OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1);
- return TCL_OK;
+ if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) {
+ /* empty prefix */
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
+ OP4( REVERSE, 2);
+ if (last == TCL_INDEX_AFTER) {
+ OP( POP); /* Pop original */
+ } else {
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ OP1( STR_CONCAT1, 2);
}
- /* Replace last */
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
-
- /* More bullshit; see NOTE above. */
+ return TCL_OK;
+ }
- OP4( OVER, 1);
- PUSH( "");
- OP( STR_EQ);
- JUMP1( JUMP_FALSE, notEq);
- OP( POP);
- JUMP1( JUMP, end);
- FIXJUMP1(notEq);
- TclAdjustStackDepth(1, envPtr);
- OP4( REVERSE, 2);
- OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1);
- OP4( REVERSE, 2);
+ if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) {
+ OP44( STR_RANGE_IMM, 0, first-1);
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
OP1( STR_CONCAT1, 2);
- FIXJUMP1(end);
return TCL_OK;
+ }
+
+ /* FLOW THROUGH TO genericReplace */
} else {
- /*
- * Need to process indices at runtime. This could be because the
- * indices are not constants, or because we need to resolve them to
- * absolute indices to work out if a replacement is going to happen.
- * In any case, to runtime it is.
+ /*
+ * When we have no replacement string to worry about, we may
+ * have more luck, because the forbidden empty string replacements
+ * are harmless when they are replaced by another empty string.
*/
+ if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) {
+ /* empty prefix - build suffix only */
+
+ if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+ /* empty suffix too => empty result */
+ OP( POP); /* Pop original */
+ PUSH ( "");
+ return TCL_OK;
+ }
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ return TCL_OK;
+ } else {
+ if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+ /* empty suffix - build prefix only */
+ OP44( STR_RANGE_IMM, 0, first-1);
+ return TCL_OK;
+ }
+ OP( DUP);
+ OP44( STR_RANGE_IMM, 0, first-1);
+ OP4( REVERSE, 2);
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ OP1( STR_CONCAT1, 2);
+ return TCL_OK;
+ }
+ }
+
genericReplace:
- CompileWord(envPtr, valueTokenPtr, interp, 1);
tokenPtr = TokenAfter(valueTokenPtr);
CompileWord(envPtr, tokenPtr, interp, 2);
tokenPtr = TokenAfter(tokenPtr);
CompileWord(envPtr, tokenPtr, interp, 3);
- if (replacementTokenPtr != NULL) {
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
+ if (parsePtr->numWords == 5) {
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
} else {
PUSH( "");
}
OP( STR_REPLACE);
return TCL_OK;
- }
}
int
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index ef64ac1..aab9092 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5701,18 +5701,18 @@ TEBCresume(
{
Tcl_UniChar *ustring1, *ustring2, *ustring3, *end, *p;
- int length3;
+ int length3, endIdx;
Tcl_Obj *value3Ptr;
case INST_STR_REPLACE:
value3Ptr = POP_OBJECT();
valuePtr = OBJ_AT_DEPTH(2);
- length = Tcl_GetCharLength(valuePtr) - 1;
+ endIdx = Tcl_GetCharLength(valuePtr) - 1;
TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr),
O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr)));
- if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length,
+ if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, endIdx,
&fromIdx) != TCL_OK
- || TclGetIntForIndexM(interp, OBJ_AT_TOS, length,
+ || TclGetIntForIndexM(interp, OBJ_AT_TOS, endIdx,
&toIdx) != TCL_OK) {
TclDecrRefCount(value3Ptr);
TRACE_ERROR(interp);
@@ -5722,21 +5722,24 @@ TEBCresume(
(void) POP_OBJECT();
TclDecrRefCount(OBJ_AT_TOS);
(void) POP_OBJECT();
- if (fromIdx < 0) {
- fromIdx = 0;
- }
- if (fromIdx > toIdx || fromIdx > length) {
+ if ((toIdx < 0) ||
+ (fromIdx > endIdx) ||
+ (toIdx < fromIdx)) {
TRACE_APPEND(("\"%.30s\"\n", O2S(valuePtr)));
TclDecrRefCount(value3Ptr);
NEXT_INST_F(1, 0, 0);
}
- if (toIdx > length) {
- toIdx = length;
+ if (fromIdx < 0) {
+ fromIdx = 0;
+ }
+
+ if (toIdx > endIdx) {
+ toIdx = endIdx;
}
- if (fromIdx == 0 && toIdx == length) {
+ if (fromIdx == 0 && toIdx == endIdx) {
TclDecrRefCount(OBJ_AT_TOS);
OBJ_AT_TOS = value3Ptr;
TRACE_APPEND(("\"%.30s\"\n", O2S(value3Ptr)));
@@ -6019,12 +6022,7 @@ TEBCresume(
value2Ptr = OBJ_AT_TOS; /* TrimSet */
string2 = TclGetStringFromObj(value2Ptr, &length2);
string1 = TclGetStringFromObj(valuePtr, &length);
- trim1 = TclTrimLeft(string1, length, string2, length2);
- if (trim1 < length) {
- trim2 = TclTrimRight(string1, length, string2, length2);
- } else {
- trim2 = 0;
- }
+ trim1 = TclTrim(string1, length, string2, length2, &trim2);
createTrimmedString:
/*
* Careful here; trim set often contains non-ASCII characters so we
diff --git a/generic/tclInt.h b/generic/tclInt.h
index ef48126..dc7909c 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3166,6 +3166,8 @@ MODULE_SCOPE void TclSubstParse(Tcl_Interp *interp, const char *bytes,
MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
int count, int *tokensLeftPtr, int line,
int *clNextOuter, const char *outerScript);
+MODULE_SCOPE int TclTrim(const char *bytes, int numBytes,
+ const char *trim, int numTrim, int *trimRight);
MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes,
const char *trim, int numTrim);
MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes,
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 2bd6b51..6eab7b8 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1643,11 +1643,46 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
- * TclTrimRight --
+ * UtfWellFormedEnd --
+ * Checks the end of utf string is malformed, if yes - wraps bytes
+ * to the given buffer (as well-formed NTS string). The buffer
+ * argument should be initialized by the caller and ready to use.
+ *
+ * Results:
+ * The bytes with well-formed end of the string.
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the right side of the first string
- * all characters found in the second string.
+ * Side effects:
+ * Buffer (DString) may be allocated, so must be released.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline const char*
+UtfWellFormedEnd(
+ Tcl_DString *buffer, /* Buffer used to hold well-formed string. */
+ const char *bytes, /* Pointer to the beginning of the string. */
+ int length) /* Length of the string. */
+{
+ const char *l = bytes + length;
+ const char *p = Tcl_UtfPrev(l, bytes);
+
+ if (Tcl_UtfCharComplete(p, l - p)) {
+ return bytes;
+ }
+ /*
+ * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
+ * avoid segfault by access violation out of range.
+ */
+ Tcl_DStringAppend(buffer, bytes, length);
+ return Tcl_DStringValue(buffer);
+}
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the right side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the end of the string.
@@ -1658,8 +1693,8 @@ Tcl_Backslash(
*----------------------------------------------------------------------
*/
-int
-TclTrimRight(
+static inline int
+TrimRight(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1668,18 +1703,6 @@ TclTrimRight(
const char *p = bytes + numBytes;
int pInc;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimRight works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1720,15 +1743,46 @@ TclTrimRight(
return numBytes - (p - bytes);
}
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimRight(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
/*
*----------------------------------------------------------------------
*
* TclTrimLeft --
*
- * Takes two counted strings in the Tcl encoding which must both be null
- * terminated. Conceptually trims from the left side of the first string
- * all characters found in the second string.
+ * Takes two counted strings in the Tcl encoding. Conceptually
+ * finds the sub string (offset) to trim from the left side of the
+ * first string all characters found in the second string.
*
* Results:
* The number of bytes to be removed from the start of the string.
@@ -1739,8 +1793,8 @@ TclTrimRight(
*----------------------------------------------------------------------
*/
-int
-TclTrimLeft(
+static inline int
+TrimLeft(
const char *bytes, /* String to be trimmed... */
int numBytes, /* ...and its length in bytes */
const char *trim, /* String of trim characters... */
@@ -1748,18 +1802,6 @@ TclTrimLeft(
{
const char *p = bytes;
- if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
- Tcl_Panic("TclTrimLeft works only on null-terminated strings");
- }
-
- /*
- * Empty strings -> nothing to do.
- */
-
- if ((numBytes == 0) || (numTrim == 0)) {
- return 0;
- }
-
/*
* Outer loop: iterate over string to be trimmed.
*/
@@ -1796,10 +1838,99 @@ TclTrimLeft(
p += pInc;
numBytes -= pInc;
- } while (numBytes);
+ } while (numBytes > 0);
return p - bytes;
}
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ int res;
+ Tcl_DString bytesBuf, trimBuf;
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ res = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (res > numBytes) {
+ res = numBytes;
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return res;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrim --
+ * Finds the sub string (offset) to trim from both sides of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the start of the string
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrim(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim, /* ...and its length in bytes */
+ int *trimRight) /* Offset from the end of the string. */
+{
+ int trimLeft;
+ Tcl_DString bytesBuf, trimBuf;
+
+ *trimRight = 0;
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ Tcl_DStringInit(&bytesBuf);
+ Tcl_DStringInit(&trimBuf);
+ bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
+ trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
+
+ trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
+ if (trimLeft > numBytes) {
+ trimLeft = numBytes;
+ }
+ numBytes -= trimLeft;
+ /* have to trim yet (first char was already verified within TrimLeft) */
+ if (numBytes > 1) {
+ bytes += trimLeft;
+ *trimRight = TrimRight(bytes, numBytes, trim, numTrim);
+ if (*trimRight > numBytes) {
+ *trimRight = numBytes;
+ }
+ }
+
+ Tcl_DStringFree(&bytesBuf);
+ Tcl_DStringFree(&trimBuf);
+
+ return trimLeft;
+}
/*
*----------------------------------------------------------------------
@@ -1867,30 +1998,20 @@ Tcl_Concat(
result = ckalloc((unsigned) (bytesNeeded + argc));
for (p = result, i = 0; i < argc; i++) {
- int trim, elemLength;
+ int triml, trimr, elemLength;
const char *element;
element = argv[i];
elemLength = strlen(argv[i]);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
@@ -2010,28 +2131,18 @@ Tcl_ConcatObj(
Tcl_SetObjLength(resPtr, 0);
for (i = 0; i < objc; i++) {
- int trim;
+ int triml, trimr;
element = TclGetStringFromObj(objv[i], &elemLength);
- /*
- * Trim away the leading whitespace.
- */
-
- trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- element += trim;
- elemLength -= trim;
-
- /*
- * Trim away the trailing whitespace. Do not permit trimming to expose
- * a final backslash character.
- */
+ /* Trim away the leading/trailing whitespace. */
+ triml = TclTrim(element, elemLength, CONCAT_TRIM_SET,
+ CONCAT_WS_SIZE, &trimr);
+ element += triml;
+ elemLength -= triml + trimr;
- trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET,
- CONCAT_WS_SIZE);
- trim -= trim && (element[elemLength - trim - 1] == '\\');
- elemLength -= trim;
+ /* Do not permit trimming to expose a final backslash character. */
+ elemLength += trimr && (element[elemLength - 1] == '\\');
/*
* If we're left with empty element after trimming, do nothing.
diff --git a/tests/string.test b/tests/string.test
index fc8dabb..d69fda4 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -1378,6 +1378,12 @@ test string-14.16 {string replace} {
test string-14.17 {string replace} {
string replace abcdefghijklmnop end end-1
} {abcdefghijklmnop}
+test string-14.18 {string replace} {
+ string replace abcdefghijklmnop 10 9 XXX
+} {abcdefghijklmnop}
+test string-14.19 {string replace} {
+ string replace {} -1 0 A
+} A
test string-15.1 {string tolower too few args} {
list [catch {string tolower} msg] $msg