diff options
author | dgp <dgp@users.sourceforge.net> | 2011-04-13 20:27:29 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2011-04-13 20:27:29 (GMT) |
commit | ef9aca4d1c097c0bb5c50d64ed819c38baef6387 (patch) | |
tree | ed1d89e9c50ba82b9724bfc18033907549896ec3 /generic/tclUtil.c | |
parent | bccb266ff70f020e3d62ef686b1bd7e79b2245ae (diff) | |
download | tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.zip tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.gz tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.bz2 |
[Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*].
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r-- | generic/tclUtil.c | 239 |
1 files changed, 167 insertions, 72 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3565165..2f2deec 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -937,6 +937,142 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the right side of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the end of the string. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes + numBytes; + int pInc; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimRight works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + const char *q = trim; + int bytesLeft = numTrim; + + p = Tcl_UtfPrev(p, bytes); + pInc = TclUtfToUniChar(p, &ch1); + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is last non-trimmed char */ + break; + } + pInc = 0; + } while (p > bytes); + + return numBytes - (p - bytes) - pInc; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrimLeft -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the left side of the + * first string all characters found in the second string. + * + * Results: + * An integer index into the first string, pointing to the first + * character not to be trimmed. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimLeft works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + int pInc = TclUtfToUniChar(p, &ch1); + const char *q = trim; + int bytesLeft = numTrim; + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is first non-trimmed char */ + break; + } + + p += pInc; + numBytes -= pInc; + } while (numBytes); + + return p - bytes; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_Concat -- * * Concatenate a set of strings into a single large string. @@ -964,6 +1100,9 @@ Tcl_Concat( for (totalSize = 1, i = 0; i < argc; i++) { totalSize += strlen(argv[i]) + 1; + if (totalSize <= 0) { + Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); + } } result = (char *) ckalloc((unsigned) totalSize); if (argc == 0) { @@ -1029,19 +1168,13 @@ Tcl_ConcatObj( int objc, /* Number of objects to concatenate. */ Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */ { - int allocSize, finalSize, length, elemLength, i; - char *p; - char *element; - char *concatStr; + int i, needSpace = 0; Tcl_Obj *objPtr, *resPtr; /* * Check first to see if all the items are of list type or empty. If so, * we will concat them together as lists, and return a list object. This - * is only valid when the lists have no current string representation, - * since we don't know what the original type was. An original string rep - * may have lost some whitespace info when converted which could be - * important. + * is only valid when the lists are in canonical form. */ for (i = 0; i < objc; i++) { @@ -1100,79 +1233,41 @@ Tcl_ConcatObj( * the slow way, using the string representations. */ - allocSize = 0; + TclNewObj(resPtr); for (i = 0; i < objc; i++) { + int trim, elemLength; + const char *element; + objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &length); - if ((element != NULL) && (length > 0)) { - allocSize += (length + 1); - } - } - if (allocSize == 0) { - allocSize = 1; /* enough for the NULL byte at end */ - } - - /* - * Allocate storage for the concatenated result. Note that allocSize is - * one more than the total number of characters, and so includes room for - * the terminating NULL byte. - */ - - concatStr = ckalloc((unsigned) allocSize); + element = TclGetStringFromObj(objPtr, &elemLength); - /* - * Now concatenate the elements. Clip white space off the front and back - * to generate a neater result, and ignore any empty elements. Also put a - * null byte at the end. - */ + /* Trim away the leading whitespace */ + trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6); + element += trim; + elemLength -= trim; - finalSize = 0; - if (objc == 0) { - *concatStr = '\0'; - } else { - p = concatStr; - for (i = 0; i < objc; i++) { - objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &elemLength); - while ((elemLength > 0) && (UCHAR(*element) < 127) - && isspace(UCHAR(*element))) { /* INTL: ISO C space. */ - element++; - elemLength--; - } + /* + * Trim away the trailing whitespace. Do not permit trimming + * to expose a final backslash character. + */ - /* - * Trim trailing white space. But, be careful not to trim a space - * character if it is preceded by a backslash: in this case it - * could be significant. - */ + trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6); + trim -= trim && (element[elemLength - trim - 1] == '\\'); + elemLength -= trim; - while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127) - && isspace(UCHAR(element[elemLength-1])) - /* INTL: ISO C space. */ - && ((elemLength < 2) || (element[elemLength-2] != '\\'))) { - elemLength--; - } - if (elemLength == 0) { - continue; /* nothing left of this element */ - } - memcpy(p, element, (size_t) elemLength); - p += elemLength; - *p = ' '; - p++; - finalSize += (elemLength + 1); + /* If we're left with empty element after trimming, do nothing */ + if (elemLength == 0) { + continue; } - if (p != concatStr) { - p[-1] = 0; - finalSize -= 1; /* we overwrote the final ' ' */ - } else { - *p = 0; + + /* Append to the result with space if needed */ + if (needSpace) { + Tcl_AppendToObj(resPtr, " ", 1); } + Tcl_AppendToObj(resPtr, element, elemLength); + needSpace = 1; } - - TclNewObj(objPtr); - objPtr->bytes = concatStr; - objPtr->length = finalSize; - return objPtr; + return resPtr; } /* |