[Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*].

author: dgp <dgp@users.sourceforge.net> 2011-04-13 20:37:26 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2011-04-13 20:37:26 (GMT)
commit: b0983fcf09398d8b401682d0bb44cffb8aece6aa (patch)
tree: 38a0d9601b1e4a9c405bf5137cc4a1495bf10200 /generic/tclUtil.c
parent: e6726b10df85cd2118583f109d2ff9e1445c97b1 (diff)
parent: 5943816c50941c1e263edef911db953d772fd4bd (diff)
download: tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.zip
tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.tar.gz
tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.tar.bz2
1 files changed, 167 insertions, 77 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 46ddf85..c67b6af 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -945,6 +945,142 @@ Tcl_Backslash(
 /*
  *----------------------------------------------------------------------
  *
+ * TclTrimRight --
+ *	Takes two counted strings in the Tcl encoding which must both be
+ *	null terminated.  Conceptually trims from the right side of the
+ *	first string all characters found in the second string.
+ *
+ * Results:
+ *	The number of bytes to be removed from the end of the string.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimRight(
+    const char *bytes,	/* String to be trimmed... */
+    int numBytes,	/* ...and its length in bytes */
+    const char *trim,	/* String of trim characters... */
+    int numTrim)	/* ...and its length in bytes */
+{
+    const char *p = bytes + numBytes;
+    int pInc;
+
+    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+	Tcl_Panic("TclTrimRight works only on null-terminated strings");
+    }
+
+    /* Empty strings -> nothing to do */
+    if ((numBytes == 0) || (numTrim == 0)) {
+	return 0;
+    }
+
+    /* Outer loop: iterate over string to be trimmed */
+    do {
+	Tcl_UniChar ch1;
+	const char *q = trim;
+	int bytesLeft = numTrim;
+
+	p = Tcl_UtfPrev(p, bytes);
+ 	pInc = TclUtfToUniChar(p, &ch1);
+
+	/* Inner loop: scan trim string for match to current character */
+	do {
+	    Tcl_UniChar ch2;
+	    int qInc = TclUtfToUniChar(q, &ch2);
+
+	    if (ch1 == ch2) {
+		break;
+	    }
+
+	    q += qInc;
+	    bytesLeft -= qInc;
+	} while (bytesLeft);
+
+	if (bytesLeft == 0) {
+	    /* No match; trim task done; *p is last non-trimmed char */
+	    break;
+	}
+	pInc = 0;
+    } while (p > bytes);
+
+    return numBytes - (p - bytes) - pInc;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimLeft --
+ *	Takes two counted strings in the Tcl encoding which must both be
+ *	null terminated.  Conceptually trims from the left side of the
+ *	first string all characters found in the second string.
+ *
+ * Results:
+ *	An integer index into the first string, pointing to the first
+ *	character not to be trimmed.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimLeft(
+    const char *bytes,	/* String to be trimmed... */
+    int numBytes,	/* ...and its length in bytes */
+    const char *trim,	/* String of trim characters... */
+    int numTrim)	/* ...and its length in bytes */
+{
+    const char *p = bytes;
+
+    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
+    }
+
+    /* Empty strings -> nothing to do */
+    if ((numBytes == 0) || (numTrim == 0)) {
+	return 0;
+    }
+
+    /* Outer loop: iterate over string to be trimmed */
+    do {
+	Tcl_UniChar ch1;
+	int pInc = TclUtfToUniChar(p, &ch1);
+	const char *q = trim;
+	int bytesLeft = numTrim;
+
+	/* Inner loop: scan trim string for match to current character */
+	do {
+	    Tcl_UniChar ch2;
+	    int qInc = TclUtfToUniChar(q, &ch2);
+
+	    if (ch1 == ch2) {
+		break;
+	    }
+
+	    q += qInc;
+	    bytesLeft -= qInc;
+	} while (bytesLeft);
+
+	if (bytesLeft == 0) {
+	    /* No match; trim task done; *p is first non-trimmed char */
+	    break;
+	}
+
+	p += pInc;
+	numBytes -= pInc;
+    } while (numBytes);
+
+    return p - bytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_Concat --
  *
  *	Concatenate a set of strings into a single large string.
@@ -972,6 +1108,9 @@ Tcl_Concat(
 
     for (totalSize = 1, i = 0; i < argc; i++) {
 	totalSize += strlen(argv[i]) + 1;
+	if (totalSize <= 0) {
+	    Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
+	}
     }
     result = ckalloc(totalSize);
     if (argc == 0) {
@@ -1037,19 +1176,13 @@ Tcl_ConcatObj(
     int objc,			/* Number of objects to concatenate. */
     Tcl_Obj *const objv[])	/* Array of objects to concatenate. */
 {
-    int allocSize, finalSize, length, elemLength, i;
-    char *p;
-    const char *element;
-    char *concatStr;
+    int i, needSpace = 0;
     Tcl_Obj *objPtr, *resPtr;
 
     /*
      * Check first to see if all the items are of list type or empty. If so,
      * we will concat them together as lists, and return a list object. This
-     * is only valid when the lists have no current string representation,
-     * since we don't know what the original type was. An original string rep
-     * may have lost some whitespace info when converted which could be
-     * important.
+     * is only valid when the lists are in canonical form.
      */
 
     for (i = 0;  i < objc;  i++) {
@@ -1108,84 +1241,41 @@ Tcl_ConcatObj(
      * the slow way, using the string representations.
      */
 
-    allocSize = 0;
+    TclNewObj(resPtr);
     for (i = 0;  i < objc;  i++) {
-	int oldAllocSize = allocSize;
-
+	int trim, elemLength;
+	const char *element;
+	
 	objPtr = objv[i];
-	element = TclGetStringFromObj(objPtr, &length);
-	if ((element != NULL) && (length > 0)) {
-	    allocSize += (length + 1);
-	    if (allocSize < oldAllocSize) {
-		Tcl_Panic("too much memory required");
-	    }
-	}
-    }
-    if (allocSize == 0) {
-	allocSize = 1;		/* enough for the NULL byte at end */
-    }
-
-    /*
-     * Allocate storage for the concatenated result. Note that allocSize is
-     * one more than the total number of characters, and so includes room for
-     * the terminating NULL byte.
-     */
-
-    concatStr = ckalloc(allocSize);
+	element = TclGetStringFromObj(objPtr, &elemLength);
 
-    /*
-     * Now concatenate the elements. Clip white space off the front and back
-     * to generate a neater result, and ignore any empty elements. Also put a
-     * null byte at the end.
-     */
+	/* Trim away the leading whitespace */
+	trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6);
+	element += trim;
+	elemLength -= trim;
 
-    finalSize = 0;
-    if (objc == 0) {
-	*concatStr = '\0';
-    } else {
-	p = concatStr;
-	for (i = 0;  i < objc;  i++) {
-	    objPtr = objv[i];
-	    element = TclGetStringFromObj(objPtr, &elemLength);
-	    while ((elemLength > 0) && (UCHAR(*element) < 127)
-		    && isspace(UCHAR(*element))) { /* INTL: ISO C space. */
-		element++;
-		elemLength--;
-	    }
+	/*
+	 * Trim away the trailing whitespace.  Do not permit trimming
+	 * to expose a final backslash character.
+	 */
 
-	    /*
-	     * Trim trailing white space. But, be careful not to trim a space
-	     * character if it is preceded by a backslash: in this case it
-	     * could be significant.
-	     */
+	trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6);
+	trim -= trim && (element[elemLength - trim - 1] == '\\');
+	elemLength -= trim;
 
-	    while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127)
-		    && isspace(UCHAR(element[elemLength-1]))
-						/* INTL: ISO C space. */
-		    && ((elemLength < 2) || (element[elemLength-2] != '\\'))) {
-		elemLength--;
-	    }
-	    if (elemLength == 0) {
-		continue;	/* nothing left of this element */
-	    }
-	    memcpy(p, element, (size_t) elemLength);
-	    p += elemLength;
-	    *p = ' ';
-	    p++;
-	    finalSize += (elemLength + 1);
+	/* If we're left with empty element after trimming, do nothing */
+	if (elemLength == 0) {
+	    continue;
 	}
-	if (p != concatStr) {
-	    p[-1] = 0;
-	    finalSize -= 1;	/* we overwrote the final ' ' */
-	} else {
-	    *p = 0;
+
+	/* Append to the result with space if needed */
+	if (needSpace) {
+	    Tcl_AppendToObj(resPtr, " ", 1);
 	}
+	Tcl_AppendToObj(resPtr, element, elemLength);
+	needSpace = 1;
     }
-
-    TclNewObj(objPtr);
-    objPtr->bytes = concatStr;
-    objPtr->length = finalSize;
-    return objPtr;
+    return resPtr;
 }
 
 /*
author	dgp <dgp@users.sourceforge.net>	2011-04-13 20:37:26 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2011-04-13 20:37:26 (GMT)
commit	b0983fcf09398d8b401682d0bb44cffb8aece6aa (patch)
tree	38a0d9601b1e4a9c405bf5137cc4a1495bf10200 /generic/tclUtil.c
parent	e6726b10df85cd2118583f109d2ff9e1445c97b1 (diff)
parent	5943816c50941c1e263edef911db953d772fd4bd (diff)
download	tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.zip tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.tar.gz tcl-b0983fcf09398d8b401682d0bb44cffb8aece6aa.tar.bz2