diff options
| author | dgp <dgp@users.sourceforge.net> | 2018-03-14 21:51:13 (GMT) |
|---|---|---|
| committer | dgp <dgp@users.sourceforge.net> | 2018-03-14 21:51:13 (GMT) |
| commit | 72c721f3e71c327c2e70def50fb04f48c4527442 (patch) | |
| tree | 5be4eacb21d2c81585f74ef9cc9864e6a13bf086 | |
| parent | 6d4320b5fbe2afd973751c4b5de70a89e0d62214 (diff) | |
| parent | 1bf60bab3cd706cba08c15df5a6a5267d918aeae (diff) | |
| download | tcl-72c721f3e71c327c2e70def50fb04f48c4527442.zip tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.gz tcl-72c721f3e71c327c2e70def50fb04f48c4527442.tar.bz2 | |
merge 8.6
| -rw-r--r-- | generic/tclCmdMZ.c | 37 | ||||
| -rw-r--r-- | generic/tclCompCmdsSZ.c | 240 | ||||
| -rw-r--r-- | generic/tclExecute.c | 32 | ||||
| -rw-r--r-- | generic/tclInt.h | 2 | ||||
| -rw-r--r-- | generic/tclUtil.c | 255 | ||||
| -rw-r--r-- | tests/string.test | 6 |
6 files changed, 378 insertions, 194 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index d63a985..a3a79f8 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2353,7 +2353,7 @@ StringRplcCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_UniChar *ustring; - int first, last, length; + int first, last, length, end; if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?"); @@ -2361,20 +2361,38 @@ StringRplcCmd( } ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - length--; + end = length - 1; - if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || - TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){ + if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK){ return TCL_ERROR; } - if ((last < first) || (last < 0) || (first > length)) { + /* + * The following test screens out most empty substrings as + * candidates for replacement. When they are detected, no + * replacement is done, and the result is the original string, + */ + if ((last < 0) || /* Range ends before start of string */ + (first > end) || /* Range begins after end of string */ + (last < first)) { /* Range begins after it starts */ + + /* + * BUT!!! when (end < 0) -- an empty original string -- we can + * have (first <= end < 0 <= last) and an empty string is permitted + * to be replaced. + */ Tcl_SetObjResult(interp, objv[1]); } else { Tcl_Obj *resultPtr; + /* + * We are re-fetching in case the string argument is same value as + * an index argument, and shimmering cost us our ustring. + */ + ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - length--; + end = length-1; if (first < 0) { first = 0; @@ -2384,9 +2402,9 @@ StringRplcCmd( if (objc == 5) { Tcl_AppendObjToObj(resultPtr, objv[4]); } - if (last < length) { + if (last < end) { Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, - length - last); + end - last); } Tcl_SetObjResult(interp, resultPtr); } @@ -3267,8 +3285,7 @@ StringTrimCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - triml = TclTrimLeft(string1, length1, string2, length2); - trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); + triml = TclTrim(string1, length1, string2, length2, &trimr); Tcl_SetObjResult(interp, Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 5cd1c3b..c13376b 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -995,147 +995,197 @@ TclCompileStringReplaceCmd( * compiled. */ CompileEnv *envPtr) /* Holds the resulting instructions. */ { - Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL; + Tcl_Token *tokenPtr, *valueTokenPtr; DefineLineInformation; /* TIP #280 */ - int idx1, idx2; + int first, last; if (parsePtr->numWords < 4 || parsePtr->numWords > 5) { return TCL_ERROR; } + + /* Bytecode to compute/push string argument being replaced */ valueTokenPtr = TokenAfter(parsePtr->tokenPtr); - if (parsePtr->numWords == 5) { - tokenPtr = TokenAfter(valueTokenPtr); - tokenPtr = TokenAfter(tokenPtr); - replacementTokenPtr = TokenAfter(tokenPtr); - } + CompileWord(envPtr, valueTokenPtr, interp, 1); + /* + * Check for first index known and useful at compile time. + */ tokenPtr = TokenAfter(valueTokenPtr); - if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, - &idx1) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER, + &first) != TCL_OK) { goto genericReplace; } + /* - * Token parsed as an index value. Indices before the string are - * treated as index of start of string. + * Check for last index known and useful at compile time. */ - tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, - &idx2) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER, + &last) != TCL_OK) { goto genericReplace; } - /* - * Token parsed as an index value. Indices after the string are - * treated as index of end of string. - */ -/* TODO...... */ - /* - * We handle these replacements specially: first character (where - * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything - * else and the semantics get rather screwy. + /* + * [string replace] is an odd bird. For many arguments it is + * a conventional substring replacer. However it also goes out + * of its way to become a no-op for many cases where it would be + * replacing an empty substring. Precisely, it is a no-op when * - * TODO: These seem to be very narrow cases. They are not even - * covered by the test suite, and any programming that ends up - * here could have been coded by the programmer using [string range] - * and [string cat]. [*] Not clear at all to me that the bytecode - * generated here is worthwhile. + * (last < first) OR + * (last < 0) OR + * (end < first) * - * [*] Except for the empty string exceptions. UGGGGHHHH. + * For some compile-time values we can detect these cases, and + * compile direct to bytecode implementing the no-op. */ - if (idx1 == 0 && idx2 == 0) { - int notEq, end; + if ((last == TCL_INDEX_BEFORE) /* Know (last < 0) */ + || (first == TCL_INDEX_AFTER) /* Know (first > end) */ /* - * Just working with the first character. + * Tricky to determine when runtime (last < first) can be + * certainly known based on the encoded values. Consider the + * cases... + * + * (first <= TCL_INDEX_END) && + * (last == TCL_INDEX_AFTER) => cannot tell REJECT + * (last <= TCL_INDEX END) && (last < first) => ACCEPT + * else => cannot tell REJECT */ - - CompileWord(envPtr, valueTokenPtr, interp, 1); - if (replacementTokenPtr == NULL) { - /* Drop first */ - OP44( STR_RANGE_IMM, 1, TCL_INDEX_END); - return TCL_OK; - } - /* Replace first */ - CompileWord(envPtr, replacementTokenPtr, interp, 4); - + || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END) + && (last < first)) /* Know (last < first) */ /* - * NOTE: The following tower of bullshit is present because - * [string replace] was boneheadedly defined not to replace - * empty strings, so we actually have to detect the empty - * string case and treat it differently. + * (first == TCL_INDEX_BEFORE) && + * (last == TCL_INDEX_AFTER) => (first < last) REJECT + * (last <= TCL_INDEX_END) => cannot tell REJECT + * else => (first < last) REJECT + * + * else [[first >= TCL_INDEX_START]] && + * (last == TCL_INDEX_AFTER) => cannot tell REJECT + * (last <= TCL_INDEX_END) => cannot tell REJECT + * else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT */ - - OP4( OVER, 1); - PUSH( ""); - OP( STR_EQ); - JUMP1( JUMP_FALSE, notEq); - OP( POP); - JUMP1( JUMP, end); - FIXJUMP1(notEq); - TclAdjustStackDepth(1, envPtr); - OP4( REVERSE, 2); - OP44( STR_RANGE_IMM, 1, TCL_INDEX_END); - OP1( STR_CONCAT1, 2); - FIXJUMP1(end); + || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START) + && (last < first))) { /* Know (last < first) */ + if (parsePtr->numWords == 5) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); + OP( POP); /* Pop newString */ + } + /* Original string argument now on TOS as result */ return TCL_OK; + } - } else if (idx1 == TCL_INDEX_END && idx2 == TCL_INDEX_END) { - int notEq, end; - - /* - * Just working with the last character. - */ + if (parsePtr->numWords == 5) { + /* + * When we have a string replacement, we have to take care about + * not replacing empty substrings that [string replace] promises + * not to replace + * + * The remaining index values might be suitable for conventional + * string replacement, but only if they cannot possibly meet the + * conditions described above at runtime. If there's a chance they + * might, we would have to emit bytecode to check and at that point + * we're paying more in bytecode execution time than would make + * things worthwhile. Trouble is we are very limited in + * how much we can detect that at compile time. After decoding, + * we need, first: + * + * (first <= end) + * + * The encoded indices (first <= TCL_INDEX END) and + * (first == TCL_INDEX_BEFORE) always meets this condition, but + * any other encoded first index has some list for which it fails. + * + * We also need, second: + * + * (last >= 0) + * + * The encoded indices (last >= TCL_INDEX_START) and + * (last == TCL_INDEX_AFTER) always meet this condition but any + * other encoded last index has some list for which it fails. + * + * Finally we need, third: + * + * (first <= last) + * + * Considered in combination with the constraints we already have, + * we see that we can proceed when (first == TCL_INDEX_BEFORE) + * or (last == TCL_INDEX_AFTER). These also permit simplification + * of the prefix|replace|suffix construction. The other constraints, + * though, interfere with getting a guarantee that first <= last. + */ - CompileWord(envPtr, valueTokenPtr, interp, 1); - if (replacementTokenPtr == NULL) { - /* Drop last */ - OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1); - return TCL_OK; + if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) { + /* empty prefix */ + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); + OP4( REVERSE, 2); + if (last == TCL_INDEX_AFTER) { + OP( POP); /* Pop original */ + } else { + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + OP1( STR_CONCAT1, 2); } - /* Replace last */ - CompileWord(envPtr, replacementTokenPtr, interp, 4); - - /* More bullshit; see NOTE above. */ + return TCL_OK; + } - OP4( OVER, 1); - PUSH( ""); - OP( STR_EQ); - JUMP1( JUMP_FALSE, notEq); - OP( POP); - JUMP1( JUMP, end); - FIXJUMP1(notEq); - TclAdjustStackDepth(1, envPtr); - OP4( REVERSE, 2); - OP44( STR_RANGE_IMM, 0, TCL_INDEX_END-1); - OP4( REVERSE, 2); + if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) { + OP44( STR_RANGE_IMM, 0, first-1); + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); OP1( STR_CONCAT1, 2); - FIXJUMP1(end); return TCL_OK; + } + + /* FLOW THROUGH TO genericReplace */ } else { - /* - * Need to process indices at runtime. This could be because the - * indices are not constants, or because we need to resolve them to - * absolute indices to work out if a replacement is going to happen. - * In any case, to runtime it is. + /* + * When we have no replacement string to worry about, we may + * have more luck, because the forbidden empty string replacements + * are harmless when they are replaced by another empty string. */ + if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) { + /* empty prefix - build suffix only */ + + if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) { + /* empty suffix too => empty result */ + OP( POP); /* Pop original */ + PUSH ( ""); + return TCL_OK; + } + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + return TCL_OK; + } else { + if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) { + /* empty suffix - build prefix only */ + OP44( STR_RANGE_IMM, 0, first-1); + return TCL_OK; + } + OP( DUP); + OP44( STR_RANGE_IMM, 0, first-1); + OP4( REVERSE, 2); + OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END); + OP1( STR_CONCAT1, 2); + return TCL_OK; + } + } + genericReplace: - CompileWord(envPtr, valueTokenPtr, interp, 1); tokenPtr = TokenAfter(valueTokenPtr); CompileWord(envPtr, tokenPtr, interp, 2); tokenPtr = TokenAfter(tokenPtr); CompileWord(envPtr, tokenPtr, interp, 3); - if (replacementTokenPtr != NULL) { - CompileWord(envPtr, replacementTokenPtr, interp, 4); + if (parsePtr->numWords == 5) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 4); } else { PUSH( ""); } OP( STR_REPLACE); return TCL_OK; - } } int diff --git a/generic/tclExecute.c b/generic/tclExecute.c index ef64ac1..aab9092 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5701,18 +5701,18 @@ TEBCresume( { Tcl_UniChar *ustring1, *ustring2, *ustring3, *end, *p; - int length3; + int length3, endIdx; Tcl_Obj *value3Ptr; case INST_STR_REPLACE: value3Ptr = POP_OBJECT(); valuePtr = OBJ_AT_DEPTH(2); - length = Tcl_GetCharLength(valuePtr) - 1; + endIdx = Tcl_GetCharLength(valuePtr) - 1; TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr))); - if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, + if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, endIdx, &fromIdx) != TCL_OK - || TclGetIntForIndexM(interp, OBJ_AT_TOS, length, + || TclGetIntForIndexM(interp, OBJ_AT_TOS, endIdx, &toIdx) != TCL_OK) { TclDecrRefCount(value3Ptr); TRACE_ERROR(interp); @@ -5722,21 +5722,24 @@ TEBCresume( (void) POP_OBJECT(); TclDecrRefCount(OBJ_AT_TOS); (void) POP_OBJECT(); - if (fromIdx < 0) { - fromIdx = 0; - } - if (fromIdx > toIdx || fromIdx > length) { + if ((toIdx < 0) || + (fromIdx > endIdx) || + (toIdx < fromIdx)) { TRACE_APPEND(("\"%.30s\"\n", O2S(valuePtr))); TclDecrRefCount(value3Ptr); NEXT_INST_F(1, 0, 0); } - if (toIdx > length) { - toIdx = length; + if (fromIdx < 0) { + fromIdx = 0; + } + + if (toIdx > endIdx) { + toIdx = endIdx; } - if (fromIdx == 0 && toIdx == length) { + if (fromIdx == 0 && toIdx == endIdx) { TclDecrRefCount(OBJ_AT_TOS); OBJ_AT_TOS = value3Ptr; TRACE_APPEND(("\"%.30s\"\n", O2S(value3Ptr))); @@ -6019,12 +6022,7 @@ TEBCresume( value2Ptr = OBJ_AT_TOS; /* TrimSet */ string2 = TclGetStringFromObj(value2Ptr, &length2); string1 = TclGetStringFromObj(valuePtr, &length); - trim1 = TclTrimLeft(string1, length, string2, length2); - if (trim1 < length) { - trim2 = TclTrimRight(string1, length, string2, length2); - } else { - trim2 = 0; - } + trim1 = TclTrim(string1, length, string2, length2, &trim2); createTrimmedString: /* * Careful here; trim set often contains non-ASCII characters so we diff --git a/generic/tclInt.h b/generic/tclInt.h index ef48126..dc7909c 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3166,6 +3166,8 @@ MODULE_SCOPE void TclSubstParse(Tcl_Interp *interp, const char *bytes, MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr, int count, int *tokensLeftPtr, int line, int *clNextOuter, const char *outerScript); +MODULE_SCOPE int TclTrim(const char *bytes, int numBytes, + const char *trim, int numTrim, int *trimRight); MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 2bd6b51..6eab7b8 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1643,11 +1643,46 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * TclTrimRight -- + * UtfWellFormedEnd -- + * Checks the end of utf string is malformed, if yes - wraps bytes + * to the given buffer (as well-formed NTS string). The buffer + * argument should be initialized by the caller and ready to use. + * + * Results: + * The bytes with well-formed end of the string. * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the right side of the first string - * all characters found in the second string. + * Side effects: + * Buffer (DString) may be allocated, so must be released. + * + *---------------------------------------------------------------------- + */ + +static inline const char* +UtfWellFormedEnd( + Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ + const char *bytes, /* Pointer to the beginning of the string. */ + int length) /* Length of the string. */ +{ + const char *l = bytes + length; + const char *p = Tcl_UtfPrev(l, bytes); + + if (Tcl_UtfCharComplete(p, l - p)) { + return bytes; + } + /* + * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, + * avoid segfault by access violation out of range. + */ + Tcl_DStringAppend(buffer, bytes, length); + return Tcl_DStringValue(buffer); +} +/* + *---------------------------------------------------------------------- + * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the right side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the end of the string. @@ -1658,8 +1693,8 @@ Tcl_Backslash( *---------------------------------------------------------------------- */ -int -TclTrimRight( +static inline int +TrimRight( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1668,18 +1703,6 @@ TclTrimRight( const char *p = bytes + numBytes; int pInc; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimRight works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1720,15 +1743,46 @@ TclTrimRight( return numBytes - (p - bytes); } + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimRight(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} /* *---------------------------------------------------------------------- * * TclTrimLeft -- * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the left side of the first string - * all characters found in the second string. + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the left side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the start of the string. @@ -1739,8 +1793,8 @@ TclTrimRight( *---------------------------------------------------------------------- */ -int -TclTrimLeft( +static inline int +TrimLeft( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1748,18 +1802,6 @@ TclTrimLeft( { const char *p = bytes; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimLeft works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1796,10 +1838,99 @@ TclTrimLeft( p += pInc; numBytes -= pInc; - } while (numBytes); + } while (numBytes > 0); return p - bytes; } + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimLeft(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrim -- + * Finds the sub string (offset) to trim from both sides of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the start of the string + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrim( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim, /* ...and its length in bytes */ + int *trimRight) /* Offset from the end of the string. */ +{ + int trimLeft; + Tcl_DString bytesBuf, trimBuf; + + *trimRight = 0; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); + if (trimLeft > numBytes) { + trimLeft = numBytes; + } + numBytes -= trimLeft; + /* have to trim yet (first char was already verified within TrimLeft) */ + if (numBytes > 1) { + bytes += trimLeft; + *trimRight = TrimRight(bytes, numBytes, trim, numTrim); + if (*trimRight > numBytes) { + *trimRight = numBytes; + } + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return trimLeft; +} /* *---------------------------------------------------------------------- @@ -1867,30 +1998,20 @@ Tcl_Concat( result = ckalloc((unsigned) (bytesNeeded + argc)); for (p = result, i = 0; i < argc; i++) { - int trim, elemLength; + int triml, trimr, elemLength; const char *element; element = argv[i]; elemLength = strlen(argv[i]); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. @@ -2010,28 +2131,18 @@ Tcl_ConcatObj( Tcl_SetObjLength(resPtr, 0); for (i = 0; i < objc; i++) { - int trim; + int triml, trimr; element = TclGetStringFromObj(objv[i], &elemLength); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. diff --git a/tests/string.test b/tests/string.test index fc8dabb..d69fda4 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1378,6 +1378,12 @@ test string-14.16 {string replace} { test string-14.17 {string replace} { string replace abcdefghijklmnop end end-1 } {abcdefghijklmnop} +test string-14.18 {string replace} { + string replace abcdefghijklmnop 10 9 XXX +} {abcdefghijklmnop} +test string-14.19 {string replace} { + string replace {} -1 0 A +} A test string-15.1 {string tolower too few args} { list [catch {string tolower} msg] $msg |
