diff options
author | dgp <dgp@users.sourceforge.net> | 2018-03-14 23:47:46 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2018-03-14 23:47:46 (GMT) |
commit | e6e44fb3b82931ccf40081ca73839865ea9b5848 (patch) | |
tree | 9a9f5d6617e8c0a52b588fbe3eca6b9ffeb2b12f /generic | |
parent | 6fcbcd9f12d583cca2771540c31441750c5beb7e (diff) | |
parent | d0bce3e99feecf727c865701f12b9c74d2b8a35c (diff) | |
download | tcl-e6e44fb3b82931ccf40081ca73839865ea9b5848.zip tcl-e6e44fb3b82931ccf40081ca73839865ea9b5848.tar.gz tcl-e6e44fb3b82931ccf40081ca73839865ea9b5848.tar.bz2 |
Merge rest of 8.7 except the botched apply/backout dance.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdMZ.c | 29 | ||||
-rw-r--r-- | generic/tclExecute.c | 84 | ||||
-rw-r--r-- | generic/tclInt.h | 5 | ||||
-rw-r--r-- | generic/tclProcess.c | 2 | ||||
-rw-r--r-- | generic/tclStringObj.c | 145 | ||||
-rw-r--r-- | generic/tclUtil.c | 255 |
6 files changed, 346 insertions, 174 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ba96d7c..0b65758 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2305,7 +2305,6 @@ StringRplcCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *ustring; int first, last, length, end; if (objc < 4 || objc > 5) { @@ -2313,7 +2312,7 @@ StringRplcCmd( return TCL_ERROR; } - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); + length = Tcl_GetCharLength(objv[1]); end = length - 1; if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || @@ -2339,26 +2338,17 @@ StringRplcCmd( } else { Tcl_Obj *resultPtr; - /* - * We are re-fetching in case the string argument is same value as - * an index argument, and shimmering cost us our ustring. - */ - - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - end = length-1; - if (first < 0) { first = 0; } - - resultPtr = Tcl_NewUnicodeObj(ustring, first); - if (objc == 5) { - Tcl_AppendObjToObj(resultPtr, objv[4]); - } - if (last < end) { - Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, - end - last); + if (last > end) { + last = end; } + + resultPtr = TclStringReplace(interp, objv[1], first, + last + 1 - first, (objc == 5) ? objv[4] : NULL, + TCL_STRING_IN_PLACE); + Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; @@ -3228,8 +3218,7 @@ StringTrimCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - triml = TclTrimLeft(string1, length1, string2, length2); - trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); + triml = TclTrim(string1, length1, string2, length2, &trimr); Tcl_SetObjResult(interp, Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 59f6826..f294272 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5467,82 +5467,9 @@ TEBCresume( NEXT_INST_F(1, 0, 0); } - length3 = Tcl_GetCharLength(value3Ptr); + objResultPtr = TclStringReplace(interp, valuePtr, fromIdx, + toIdx - fromIdx + 1, value3Ptr, TCL_STRING_IN_PLACE); - /* - * See if we can splice in place. This happens when the number of - * characters being replaced is the same as the number of characters - * in the string to be inserted. - */ - - if (length3 - 1 == toIdx - fromIdx) { - unsigned char *bytes1, *bytes2; - - if (Tcl_IsShared(valuePtr)) { - objResultPtr = Tcl_DuplicateObj(valuePtr); - } else { - objResultPtr = valuePtr; - } - if (TclIsPureByteArray(objResultPtr) - && TclIsPureByteArray(value3Ptr)) { - bytes1 = Tcl_GetByteArrayFromObj(objResultPtr, NULL); - bytes2 = Tcl_GetByteArrayFromObj(value3Ptr, NULL); - memcpy(bytes1 + fromIdx, bytes2, length3); - } else { - ustring1 = Tcl_GetUnicodeFromObj(objResultPtr, NULL); - ustring2 = Tcl_GetUnicodeFromObj(value3Ptr, NULL); - memcpy(ustring1 + fromIdx, ustring2, - length3 * sizeof(Tcl_UniChar)); - } - Tcl_InvalidateStringRep(objResultPtr); - TclDecrRefCount(value3Ptr); - TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); - if (objResultPtr == valuePtr) { - NEXT_INST_F(1, 0, 0); - } else { - NEXT_INST_F(1, 1, 1); - } - } - - /* - * Get the unicode representation; this is where we guarantee to lose - * bytearrays. - */ - - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - length--; - - /* - * Remove substring using copying. - */ - - objResultPtr = NULL; - if (fromIdx > 0) { - objResultPtr = Tcl_NewUnicodeObj(ustring1, fromIdx); - } - if (length3 > 0) { - if (objResultPtr) { - Tcl_AppendObjToObj(objResultPtr, value3Ptr); - } else if (Tcl_IsShared(value3Ptr)) { - objResultPtr = Tcl_DuplicateObj(value3Ptr); - } else { - objResultPtr = value3Ptr; - } - } - if (toIdx < length) { - if (objResultPtr) { - Tcl_AppendUnicodeToObj(objResultPtr, ustring1 + toIdx + 1, - length - toIdx); - } else { - objResultPtr = Tcl_NewUnicodeObj(ustring1 + toIdx + 1, - length - toIdx); - } - } - if (objResultPtr == NULL) { - /* This has to be the case [string replace $s 0 end {}] */ - /* which has result {} which is same as value3Ptr. */ - objResultPtr = value3Ptr; - } if (objResultPtr == value3Ptr) { /* See [Bug 82e7f67325] */ TclDecrRefCount(OBJ_AT_TOS); @@ -5717,12 +5644,7 @@ TEBCresume( value2Ptr = OBJ_AT_TOS; /* TrimSet */ string2 = TclGetStringFromObj(value2Ptr, &length2); string1 = TclGetStringFromObj(valuePtr, &length); - trim1 = TclTrimLeft(string1, length, string2, length2); - if (trim1 < length) { - trim2 = TclTrimRight(string1, length, string2, length2); - } else { - trim2 = 0; - } + trim1 = TclTrim(string1, length, string2, length2, &trim2); createTrimmedString: /* * Careful here; trim set often contains non-ASCII characters so we diff --git a/generic/tclInt.h b/generic/tclInt.h index e7794c7..81b1c05 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3198,6 +3198,8 @@ MODULE_SCOPE void TclSubstParse(Tcl_Interp *interp, const char *bytes, MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr, int count, int *tokensLeftPtr, int line, int *clNextOuter, const char *outerScript); +MODULE_SCOPE int TclTrim(const char *bytes, int numBytes, + const char *trim, int numTrim, int *trimRight); MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, @@ -4004,6 +4006,9 @@ MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, int last); MODULE_SCOPE Tcl_Obj * TclStringRepeat(Tcl_Interp *interp, Tcl_Obj *objPtr, int count, int flags); +MODULE_SCOPE Tcl_Obj * TclStringReplace(Tcl_Interp *interp, Tcl_Obj *objPtr, + int first, int count, Tcl_Obj *insertPtr, + int flags); MODULE_SCOPE Tcl_Obj * TclStringReverse(Tcl_Obj *objPtr, int flags); /* Flag values for the [string] ensemble functions. */ diff --git a/generic/tclProcess.c b/generic/tclProcess.c index 8d98a23..7187ee4 100644 --- a/generic/tclProcess.c +++ b/generic/tclProcess.c @@ -210,7 +210,7 @@ WaitProcessStatus( const char *msg; pid = Tcl_WaitPid(pid, &waitStatus, options); - if ((pid == 0)) { + if (pid == 0) { /* * No change. */ diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 26a3a28..9913160 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -38,6 +38,7 @@ #include "tommath.h" #include "tclStringRep.h" +#include "assert.h" /* * Prototypes for functions defined later in this file: */ @@ -3531,6 +3532,150 @@ TclStringReverse( /* *--------------------------------------------------------------------------- * + * TclStringReplace -- + * + * Implements the inner engine of the [string replace] command. + * + * The result is a concatenation of a prefix from objPtr, characters + * 0 through first-1, the insertPtr string value, and a suffix from + * objPtr, characters from first + count to the end. The effect is + * as if the inner substring of characters first through first+count-1 + * are removed and replaced with insertPtr. + * If insertPtr is NULL, it is treated as an empty string. + * When passed the flag TCL_STRING_IN_PLACE, this routine will try + * to do the work within objPtr, so long as no sharing forbids it. + * Without that request, or as needed, a new Tcl value will be allocated + * to be the result. + * + * Results: + * A Tcl value that is the result of the substring replacement. + * May return NULL in case of an error. When NULL is returned and + * interp is non-NULL, error information is left in interp + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclStringReplace( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* String to act upon */ + int first, /* First index to replace */ + int count, /* How many chars to replace */ + Tcl_Obj *insertPtr, /* Replacement string, may be NULL */ + int flags) /* TCL_STRING_IN_PLACE => attempt in-place */ +{ + int inPlace = flags & TCL_STRING_IN_PLACE; + Tcl_Obj *result; + + /* Caller is expected to pass sensible arguments */ + assert ( count >= 0 ) ; + assert ( first >= 0 ) ; + + /* Replace nothing with nothing */ + if ((insertPtr == NULL) && (count == 0)) { + if (inPlace) { + return objPtr; + } else { + return Tcl_DuplicateObj(objPtr); + } + } + + /* + * The caller very likely had to call Tcl_GetCharLength() or similar + * to be able to process index values. This means it is like that + * objPtr is either a proper "bytearray" or a "string" or else it has + * a known and short string rep. + */ + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (insertPtr == NULL) { + /* Replace something with nothing. */ + + assert ( first <= numBytes ) ; + assert ( count <= numBytes ) ; + assert ( first + count <= numBytes ) ; + + result = Tcl_NewByteArrayObj(NULL, numBytes - count);/* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Replace everything */ + if ((first == 0) && (count == numBytes)) { + return insertPtr; + } + + if (TclIsPureByteArray(insertPtr)) { + int newBytes; + unsigned char *iBytes + = Tcl_GetByteArrayFromObj(insertPtr, &newBytes); + + if (count == newBytes && inPlace && !Tcl_IsShared(objPtr)) { + /* + * Removal count and replacement count are equal. + * Other conditions permit. Do in-place splice. + */ + + memcpy(bytes + first, iBytes, count); + Tcl_InvalidateStringRep(objPtr); + return objPtr; + } + + if (newBytes > INT_MAX - (numBytes - count)) { + if (interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "max size for a Tcl value (%d bytes) exceeded", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + } + return NULL; + } + result = Tcl_NewByteArrayObj(NULL, numBytes - count + newBytes); + /* PANIC? */ + TclAppendBytesToByteArray(result, bytes, first); + TclAppendBytesToByteArray(result, iBytes, newBytes); + TclAppendBytesToByteArray(result, bytes + first + count, + numBytes - count - first); + return result; + } + + /* Flow through to try other approaches below */ + } + + /* + * TODO: Figure out how not to generate a Tcl_UniChar array rep + * when it can be determined objPtr->bytes points to a string of + * all single-byte characters so we can index it directly. + */ + + /* The traditional implementation... */ + { + int numChars; + Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + + /* TODO: Is there an in-place option worth pursuing here? */ + + result = Tcl_NewUnicodeObj(ustring, first); + if (insertPtr) { + Tcl_AppendObjToObj(result, insertPtr); + } + if (first + count < numChars) { + Tcl_AppendUnicodeToObj(result, ustring + first + count, + numChars - first - count); + } + + return result; + } +} + +/* + *--------------------------------------------------------------------------- + * * FillUnicodeRep -- * * Populate the Unicode internal rep with the Unicode form of its string diff --git a/generic/tclUtil.c b/generic/tclUtil.c index d1b81a9..0ba6c8e 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1647,11 +1647,46 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * - * TclTrimRight -- + * UtfWellFormedEnd -- + * Checks the end of utf string is malformed, if yes - wraps bytes + * to the given buffer (as well-formed NTS string). The buffer + * argument should be initialized by the caller and ready to use. + * + * Results: + * The bytes with well-formed end of the string. * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the right side of the first string - * all characters found in the second string. + * Side effects: + * Buffer (DString) may be allocated, so must be released. + * + *---------------------------------------------------------------------- + */ + +static inline const char* +UtfWellFormedEnd( + Tcl_DString *buffer, /* Buffer used to hold well-formed string. */ + const char *bytes, /* Pointer to the beginning of the string. */ + int length) /* Length of the string. */ +{ + const char *l = bytes + length; + const char *p = Tcl_UtfPrev(l, bytes); + + if (Tcl_UtfCharComplete(p, l - p)) { + return bytes; + } + /* + * Malformed utf-8 end, be sure we've NTS to safe compare of end-character, + * avoid segfault by access violation out of range. + */ + Tcl_DStringAppend(buffer, bytes, length); + return Tcl_DStringValue(buffer); +} +/* + *---------------------------------------------------------------------- + * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the right side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the end of the string. @@ -1662,8 +1697,8 @@ Tcl_Backslash( *---------------------------------------------------------------------- */ -int -TclTrimRight( +static inline int +TrimRight( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1673,18 +1708,6 @@ TclTrimRight( int pInc; Tcl_UniChar ch1 = 0, ch2 = 0; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimRight works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1723,15 +1746,46 @@ TclTrimRight( return numBytes - (p - bytes); } + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimRight(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} /* *---------------------------------------------------------------------- * * TclTrimLeft -- * - * Takes two counted strings in the Tcl encoding which must both be null - * terminated. Conceptually trims from the left side of the first string - * all characters found in the second string. + * Takes two counted strings in the Tcl encoding. Conceptually + * finds the sub string (offset) to trim from the left side of the + * first string all characters found in the second string. * * Results: * The number of bytes to be removed from the start of the string. @@ -1742,8 +1796,8 @@ TclTrimRight( *---------------------------------------------------------------------- */ -int -TclTrimLeft( +static inline int +TrimLeft( const char *bytes, /* String to be trimmed... */ int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ @@ -1752,18 +1806,6 @@ TclTrimLeft( const char *p = bytes; Tcl_UniChar ch1 = 0, ch2 = 0; - if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { - Tcl_Panic("TclTrimLeft works only on null-terminated strings"); - } - - /* - * Empty strings -> nothing to do. - */ - - if ((numBytes == 0) || (numTrim == 0)) { - return 0; - } - /* * Outer loop: iterate over string to be trimmed. */ @@ -1798,10 +1840,99 @@ TclTrimLeft( p += pInc; numBytes -= pInc; - } while (numBytes); + } while (numBytes > 0); return p - bytes; } + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + int res; + Tcl_DString bytesBuf, trimBuf; + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + res = TrimLeft(bytes, numBytes, trim, numTrim); + if (res > numBytes) { + res = numBytes; + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return res; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrim -- + * Finds the sub string (offset) to trim from both sides of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the start of the string + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrim( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim, /* ...and its length in bytes */ + int *trimRight) /* Offset from the end of the string. */ +{ + int trimLeft; + Tcl_DString bytesBuf, trimBuf; + + *trimRight = 0; + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + Tcl_DStringInit(&bytesBuf); + Tcl_DStringInit(&trimBuf); + bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes); + trim = UtfWellFormedEnd(&trimBuf, trim, numTrim); + + trimLeft = TrimLeft(bytes, numBytes, trim, numTrim); + if (trimLeft > numBytes) { + trimLeft = numBytes; + } + numBytes -= trimLeft; + /* have to trim yet (first char was already verified within TrimLeft) */ + if (numBytes > 1) { + bytes += trimLeft; + *trimRight = TrimRight(bytes, numBytes, trim, numTrim); + if (*trimRight > numBytes) { + *trimRight = numBytes; + } + } + + Tcl_DStringFree(&bytesBuf); + Tcl_DStringFree(&trimBuf); + + return trimLeft; +} /* *---------------------------------------------------------------------- @@ -1869,30 +2000,20 @@ Tcl_Concat( result = ckalloc((unsigned) (bytesNeeded + argc)); for (p = result, i = 0; i < argc; i++) { - int trim, elemLength; + int triml, trimr, elemLength; const char *element; element = argv[i]; elemLength = strlen(argv[i]); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. @@ -2012,28 +2133,18 @@ Tcl_ConcatObj( Tcl_SetObjLength(resPtr, 0); for (i = 0; i < objc; i++) { - int trim; + int triml, trimr; element = TclGetStringFromObj(objv[i], &elemLength); - /* - * Trim away the leading whitespace. - */ - - trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - element += trim; - elemLength -= trim; - - /* - * Trim away the trailing whitespace. Do not permit trimming to expose - * a final backslash character. - */ + /* Trim away the leading/trailing whitespace. */ + triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, + CONCAT_WS_SIZE, &trimr); + element += triml; + elemLength -= triml + trimr; - trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE); - trim -= trim && (element[elemLength - trim - 1] == '\\'); - elemLength -= trim; + /* Do not permit trimming to expose a final backslash character. */ + elemLength += trimr && (element[elemLength - 1] == '\\'); /* * If we're left with empty element after trimming, do nothing. |