summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2011-04-13 20:27:29 (GMT)
committerdgp <dgp@users.sourceforge.net>2011-04-13 20:27:29 (GMT)
commitef9aca4d1c097c0bb5c50d64ed819c38baef6387 (patch)
treeed1d89e9c50ba82b9724bfc18033907549896ec3
parentbccb266ff70f020e3d62ef686b1bd7e79b2245ae (diff)
downloadtcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.zip
tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.gz
tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.bz2
[Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*].
-rw-r--r--ChangeLog11
-rw-r--r--generic/tclCmdMZ.c127
-rw-r--r--generic/tclInt.h4
-rw-r--r--generic/tclUtil.c239
4 files changed, 196 insertions, 185 deletions
diff --git a/ChangeLog b/ChangeLog
index e90ee9d..508a72d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-04-13 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/tclUtil.c: Rewrite of Tcl_Concat*() routines to
+ prevent segfaults on buffer overflow. Build them out of existing
+ primitives already coded to handle overflow properly. Uses the
+ new TclTrim*() routines. [Bug 3285375]
+
+ * generic/tclCmdMZ.c: New internal utility routines TclTrimLeft()
+ * generic/tclInt.h: and TclTrimRight(). Refactor the
+ * generic/tclUtil.c: [string trim*] implementations to use them.
+
2011-04-13 Miguel Sofer <msofer@users.sf.net>
* generic/tclVar.c: fix for [Bug 2662380], crash caused by
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index cf74db5..7c3855c 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -3108,10 +3108,8 @@ StringTrimCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch, trim;
- register const char *p, *end;
- const char *check, *checkEnd, *string1, *string2;
- int offset, length1, length2;
+ const char *string1, *string2;
+ int triml, trimr, length1, length2;
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
@@ -3123,58 +3121,12 @@ StringTrimCmd(
return TCL_ERROR;
}
string1 = TclGetStringFromObj(objv[1], &length1);
- checkEnd = string2 + length2;
- /*
- * The outer loop iterates over the string. The inner loop iterates over
- * the trim characters. The loops terminate as soon as a non-trim
- * character is discovered and string1 is left pointing at the first
- * non-trim character.
- */
-
- end = string1 + length1;
- for (p = string1; p < end; p += offset) {
- offset = TclUtfToUniChar(p, &ch);
-
- for (check = string2; ; ) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- string1 += offset;
- break;
- }
- }
- }
-
- /*
- * The outer loop iterates over the string. The inner loop iterates over
- * the trim characters. The loops terminate as soon as a non-trim
- * character is discovered and length1 marks the last non-trim character.
- */
-
- end = string1;
- for (p = string1 + length1; p > end; ) {
- p = Tcl_UtfPrev(p, string1);
- offset = TclUtfToUniChar(p, &ch);
- check = string2;
- while (1) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- break;
- }
- }
- }
+ triml = TclTrimLeft(string1, length1, string2, length2);
+ trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);
- Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
return TCL_OK;
}
@@ -3204,10 +3156,8 @@ StringTrimLCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch, trim;
- register const char *p, *end;
- const char *check, *checkEnd, *string1, *string2;
- int offset, length1, length2;
+ const char *string1, *string2;
+ int trim, length1, length2;
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
@@ -3219,34 +3169,10 @@ StringTrimLCmd(
return TCL_ERROR;
}
string1 = TclGetStringFromObj(objv[1], &length1);
- checkEnd = string2 + length2;
-
- /*
- * The outer loop iterates over the string. The inner loop iterates over
- * the trim characters. The loops terminate as soon as a non-trim
- * character is discovered and string1 is left pointing at the first
- * non-trim character.
- */
-
- end = string1 + length1;
- for (p = string1; p < end; p += offset) {
- offset = TclUtfToUniChar(p, &ch);
- for (check = string2; ; ) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- string1 += offset;
- break;
- }
- }
- }
+ trim = TclTrimLeft(string1, length1, string2, length2);
- Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(string1+trim, length1-trim));
return TCL_OK;
}
@@ -3276,10 +3202,8 @@ StringTrimRCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch, trim;
- register const char *p, *end;
- const char *check, *checkEnd, *string1, *string2;
- int offset, length1, length2;
+ const char *string1, *string2;
+ int trim, length1, length2;
if (objc == 3) {
string2 = TclGetStringFromObj(objv[2], &length2);
@@ -3291,33 +3215,10 @@ StringTrimRCmd(
return TCL_ERROR;
}
string1 = TclGetStringFromObj(objv[1], &length1);
- checkEnd = string2 + length2;
- /*
- * The outer loop iterates over the string. The inner loop iterates over
- * the trim characters. The loops terminate as soon as a non-trim
- * character is discovered and length1 marks the last non-trim character.
- */
-
- end = string1;
- for (p = string1 + length1; p > end; ) {
- p = Tcl_UtfPrev(p, string1);
- offset = TclUtfToUniChar(p, &ch);
- check = string2;
- while (1) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- break;
- }
- }
- }
+ trim = TclTrimRight(string1, length1, string2, length2);
- Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1-trim));
return TCL_OK;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index c966610..b8a4dfa 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2784,6 +2784,10 @@ MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
int *clNextOuter, CONST char *outerScript);
MODULE_SCOPE void TclTransferResult(Tcl_Interp *sourceInterp, int result,
Tcl_Interp *targetInterp);
+MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes,
+ const char *trim, int numTrim);
+MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes,
+ const char *trim, int numTrim);
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3565165..2f2deec 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -937,6 +937,142 @@ Tcl_Backslash(
/*
*----------------------------------------------------------------------
*
+ * TclTrimRight --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the right side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * The number of bytes to be removed from the end of the string.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimRight(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes + numBytes;
+ int pInc;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimRight works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ p = Tcl_UtfPrev(p, bytes);
+ pInc = TclUtfToUniChar(p, &ch1);
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is last non-trimmed char */
+ break;
+ }
+ pInc = 0;
+ } while (p > bytes);
+
+ return numBytes - (p - bytes) - pInc;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclTrimLeft --
+ * Takes two counted strings in the Tcl encoding which must both be
+ * null terminated. Conceptually trims from the left side of the
+ * first string all characters found in the second string.
+ *
+ * Results:
+ * An integer index into the first string, pointing to the first
+ * character not to be trimmed.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclTrimLeft(
+ const char *bytes, /* String to be trimmed... */
+ int numBytes, /* ...and its length in bytes */
+ const char *trim, /* String of trim characters... */
+ int numTrim) /* ...and its length in bytes */
+{
+ const char *p = bytes;
+
+ if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
+ Tcl_Panic("TclTrimLeft works only on null-terminated strings");
+ }
+
+ /* Empty strings -> nothing to do */
+ if ((numBytes == 0) || (numTrim == 0)) {
+ return 0;
+ }
+
+ /* Outer loop: iterate over string to be trimmed */
+ do {
+ Tcl_UniChar ch1;
+ int pInc = TclUtfToUniChar(p, &ch1);
+ const char *q = trim;
+ int bytesLeft = numTrim;
+
+ /* Inner loop: scan trim string for match to current character */
+ do {
+ Tcl_UniChar ch2;
+ int qInc = TclUtfToUniChar(q, &ch2);
+
+ if (ch1 == ch2) {
+ break;
+ }
+
+ q += qInc;
+ bytesLeft -= qInc;
+ } while (bytesLeft);
+
+ if (bytesLeft == 0) {
+ /* No match; trim task done; *p is first non-trimmed char */
+ break;
+ }
+
+ p += pInc;
+ numBytes -= pInc;
+ } while (numBytes);
+
+ return p - bytes;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_Concat --
*
* Concatenate a set of strings into a single large string.
@@ -964,6 +1100,9 @@ Tcl_Concat(
for (totalSize = 1, i = 0; i < argc; i++) {
totalSize += strlen(argv[i]) + 1;
+ if (totalSize <= 0) {
+ Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
+ }
}
result = (char *) ckalloc((unsigned) totalSize);
if (argc == 0) {
@@ -1029,19 +1168,13 @@ Tcl_ConcatObj(
int objc, /* Number of objects to concatenate. */
Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */
{
- int allocSize, finalSize, length, elemLength, i;
- char *p;
- char *element;
- char *concatStr;
+ int i, needSpace = 0;
Tcl_Obj *objPtr, *resPtr;
/*
* Check first to see if all the items are of list type or empty. If so,
* we will concat them together as lists, and return a list object. This
- * is only valid when the lists have no current string representation,
- * since we don't know what the original type was. An original string rep
- * may have lost some whitespace info when converted which could be
- * important.
+ * is only valid when the lists are in canonical form.
*/
for (i = 0; i < objc; i++) {
@@ -1100,79 +1233,41 @@ Tcl_ConcatObj(
* the slow way, using the string representations.
*/
- allocSize = 0;
+ TclNewObj(resPtr);
for (i = 0; i < objc; i++) {
+ int trim, elemLength;
+ const char *element;
+
objPtr = objv[i];
- element = TclGetStringFromObj(objPtr, &length);
- if ((element != NULL) && (length > 0)) {
- allocSize += (length + 1);
- }
- }
- if (allocSize == 0) {
- allocSize = 1; /* enough for the NULL byte at end */
- }
-
- /*
- * Allocate storage for the concatenated result. Note that allocSize is
- * one more than the total number of characters, and so includes room for
- * the terminating NULL byte.
- */
-
- concatStr = ckalloc((unsigned) allocSize);
+ element = TclGetStringFromObj(objPtr, &elemLength);
- /*
- * Now concatenate the elements. Clip white space off the front and back
- * to generate a neater result, and ignore any empty elements. Also put a
- * null byte at the end.
- */
+ /* Trim away the leading whitespace */
+ trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6);
+ element += trim;
+ elemLength -= trim;
- finalSize = 0;
- if (objc == 0) {
- *concatStr = '\0';
- } else {
- p = concatStr;
- for (i = 0; i < objc; i++) {
- objPtr = objv[i];
- element = TclGetStringFromObj(objPtr, &elemLength);
- while ((elemLength > 0) && (UCHAR(*element) < 127)
- && isspace(UCHAR(*element))) { /* INTL: ISO C space. */
- element++;
- elemLength--;
- }
+ /*
+ * Trim away the trailing whitespace. Do not permit trimming
+ * to expose a final backslash character.
+ */
- /*
- * Trim trailing white space. But, be careful not to trim a space
- * character if it is preceded by a backslash: in this case it
- * could be significant.
- */
+ trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6);
+ trim -= trim && (element[elemLength - trim - 1] == '\\');
+ elemLength -= trim;
- while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127)
- && isspace(UCHAR(element[elemLength-1]))
- /* INTL: ISO C space. */
- && ((elemLength < 2) || (element[elemLength-2] != '\\'))) {
- elemLength--;
- }
- if (elemLength == 0) {
- continue; /* nothing left of this element */
- }
- memcpy(p, element, (size_t) elemLength);
- p += elemLength;
- *p = ' ';
- p++;
- finalSize += (elemLength + 1);
+ /* If we're left with empty element after trimming, do nothing */
+ if (elemLength == 0) {
+ continue;
}
- if (p != concatStr) {
- p[-1] = 0;
- finalSize -= 1; /* we overwrote the final ' ' */
- } else {
- *p = 0;
+
+ /* Append to the result with space if needed */
+ if (needSpace) {
+ Tcl_AppendToObj(resPtr, " ", 1);
}
+ Tcl_AppendToObj(resPtr, element, elemLength);
+ needSpace = 1;
}
-
- TclNewObj(objPtr);
- objPtr->bytes = concatStr;
- objPtr->length = finalSize;
- return objPtr;
+ return resPtr;
}
/*