summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2011-04-13 20:27:29 (GMT)
committerdgp <dgp@users.sourceforge.net>2011-04-13 20:27:29 (GMT)
commitef9aca4d1c097c0bb5c50d64ed819c38baef6387 (patch)
treeed1d89e9c50ba82b9724bfc18033907549896ec3 /generic/tclUtil.c
parentbccb266ff70f020e3d62ef686b1bd7e79b2245ae (diff)
downloadtcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.zip
tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.gz
tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.bz2
[Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*].
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c239
1 files changed, 167 insertions, 72 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3565165..2f2deec 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -937,6 +937,142 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the right side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the end of the string.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes + numBytes;
+ int pInc;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimRight works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ p = Tcl_UtfPrev(p, bytes);
+ pInc = TclUtfToUniChar(p, &ch1);
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is last non-trimmed char */
+ break;
+ }
+ pInc = 0;
+ } while (p > bytes);
+
+ return numBytes - (p - bytes) - pInc;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimLeft --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the left side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * An integer index into the first string, pointing to the first
+ * character not to be trimmed.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimLeft works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ int pInc = TclUtfToUniChar(p, &ch1);
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is first non-trimmed char */
+ break;
+ }
+
+ p += pInc;
+ numBytes -= pInc;
+ } while (numBytes);
+
+ return p - bytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_Concat --
*
* Concatenate a set of strings into a single large string.
@@ -964,6 +1100,9 @@ Tcl_Concat(
for (totalSize = 1, i = 0; i < argc; i++) {
totalSize += strlen(argv[i]) + 1;
+ if (totalSize <= 0) {
+ Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
+ }
}
result = (char *) ckalloc((unsigned) totalSize);
if (argc == 0) {
@@ -1029,19 +1168,13 @@ Tcl_ConcatObj(
int objc, /* Number of objects to concatenate. */
Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */
{
- int allocSize, finalSize, length, elemLength, i;
- char *p;
- char *element;
- char *concatStr;
+ int i, needSpace = 0;
Tcl_Obj *objPtr, *resPtr;
/*
* Check first to see if all the items are of list type or empty. If so,
* we will concat them together as lists, and return a list object. This
- * is only valid when the lists have no current string representation,
- * since we don't know what the original type was. An original string rep
- * may have lost some whitespace info when converted which could be
- * important.
+ * is only valid when the lists are in canonical form.
*/
for (i = 0; i < objc; i++) {
@@ -1100,79 +1233,41 @@ Tcl_ConcatObj(
* the slow way, using the string representations.
*/
- allocSize = 0;
+ TclNewObj(resPtr);
for (i = 0; i < objc; i++) {
+ int trim, elemLength;
+ const char *element;
+
objPtr = objv[i];
- element = TclGetStringFromObj(objPtr, &length);
- if ((element != NULL) && (length > 0)) {
- allocSize += (length + 1);
- }
- }
- if (allocSize == 0) {
- allocSize = 1; /* enough for the NULL byte at end */
- }
-
- /*
- * Allocate storage for the concatenated result. Note that allocSize is
- * one more than the total number of characters, and so includes room for
- * the terminating NULL byte.
- */
-
- concatStr = ckalloc((unsigned) allocSize);
+ element = TclGetStringFromObj(objPtr, &elemLength);
- /*
- * Now concatenate the elements. Clip white space off the front and back
- * to generate a neater result, and ignore any empty elements. Also put a
- * null byte at the end.
- */
+ /* Trim away the leading whitespace */
+ trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6);
+ element += trim;
+ elemLength -= trim;
- finalSize = 0;
- if (objc == 0) {
- *concatStr = '\0';
- } else {
- p = concatStr;
- for (i = 0; i < objc; i++) {
- objPtr = objv[i];
- element = TclGetStringFromObj(objPtr, &elemLength);
- while ((elemLength > 0) && (UCHAR(*element) < 127)
- && isspace(UCHAR(*element))) { /* INTL: ISO C space. */
- element++;
- elemLength--;
- }
+ /*
+ * Trim away the trailing whitespace. Do not permit trimming
+ * to expose a final backslash character.
+ */
- /*
- * Trim trailing white space. But, be careful not to trim a space
- * character if it is preceded by a backslash: in this case it
- * could be significant.
- */
+ trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6);
+ trim -= trim && (element[elemLength - trim - 1] == '\\');
+ elemLength -= trim;
- while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127)
- && isspace(UCHAR(element[elemLength-1]))
- /* INTL: ISO C space. */
- && ((elemLength < 2) || (element[elemLength-2] != '\\'))) {
- elemLength--;
- }
- if (elemLength == 0) {
- continue; /* nothing left of this element */
- }
- memcpy(p, element, (size_t) elemLength);
- p += elemLength;
- *p = ' ';
- p++;
- finalSize += (elemLength + 1);
+ /* If we're left with empty element after trimming, do nothing */
+ if (elemLength == 0) {
+ continue;
}
- if (p != concatStr) {
- p[-1] = 0;
- finalSize -= 1; /* we overwrote the final ' ' */
- } else {
- *p = 0;
+
+ /* Append to the result with space if needed */
+ if (needSpace) {
+ Tcl_AppendToObj(resPtr, " ", 1);
}
+ Tcl_AppendToObj(resPtr, element, elemLength);
+ needSpace = 1;
}
-
- TclNewObj(objPtr);
- objPtr->bytes = concatStr;
- objPtr->length = finalSize;
- return objPtr;
+ return resPtr;
}
/*