From ef9aca4d1c097c0bb5c50d64ed819c38baef6387 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 13 Apr 2011 20:27:29 +0000 Subject: [Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*]. --- ChangeLog | 11 +++ generic/tclCmdMZ.c | 127 ++++------------------------ generic/tclInt.h | 4 + generic/tclUtil.c | 239 +++++++++++++++++++++++++++++++++++++---------------- 4 files changed, 196 insertions(+), 185 deletions(-) diff --git a/ChangeLog b/ChangeLog index e90ee9d..508a72d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2011-04-13 Don Porter + + * generic/tclUtil.c: Rewrite of Tcl_Concat*() routines to + prevent segfaults on buffer overflow. Build them out of existing + primitives already coded to handle overflow properly. Uses the + new TclTrim*() routines. [Bug 3285375] + + * generic/tclCmdMZ.c: New internal utility routines TclTrimLeft() + * generic/tclInt.h: and TclTrimRight(). Refactor the + * generic/tclUtil.c: [string trim*] implementations to use them. + 2011-04-13 Miguel Sofer * generic/tclVar.c: fix for [Bug 2662380], crash caused by diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index cf74db5..7c3855c 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -3108,10 +3108,8 @@ StringTrimCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int triml, trimr, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3123,58 +3121,12 @@ StringTrimCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and string1 is left pointing at the first - * non-trim character. - */ - - end = string1 + length1; - for (p = string1; p < end; p += offset) { - offset = TclUtfToUniChar(p, &ch); - - for (check = string2; ; ) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - string1 += offset; - break; - } - } - } - - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and length1 marks the last non-trim character. - */ - - end = string1; - for (p = string1 + length1; p > end; ) { - p = Tcl_UtfPrev(p, string1); - offset = TclUtfToUniChar(p, &ch); - check = string2; - while (1) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - break; - } - } - } + triml = TclTrimLeft(string1, length1, string2, length2); + trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, + Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); return TCL_OK; } @@ -3204,10 +3156,8 @@ StringTrimLCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int trim, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3219,34 +3169,10 @@ StringTrimLCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and string1 is left pointing at the first - * non-trim character. - */ - - end = string1 + length1; - for (p = string1; p < end; p += offset) { - offset = TclUtfToUniChar(p, &ch); - for (check = string2; ; ) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - string1 += offset; - break; - } - } - } + trim = TclTrimLeft(string1, length1, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1+trim, length1-trim)); return TCL_OK; } @@ -3276,10 +3202,8 @@ StringTrimRCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int trim, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3291,33 +3215,10 @@ StringTrimRCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and length1 marks the last non-trim character. - */ - - end = string1; - for (p = string1 + length1; p > end; ) { - p = Tcl_UtfPrev(p, string1); - offset = TclUtfToUniChar(p, &ch); - check = string2; - while (1) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - break; - } - } - } + trim = TclTrimRight(string1, length1, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1-trim)); return TCL_OK; } diff --git a/generic/tclInt.h b/generic/tclInt.h index c966610..b8a4dfa 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2784,6 +2784,10 @@ MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr, int *clNextOuter, CONST char *outerScript); MODULE_SCOPE void TclTransferResult(Tcl_Interp *sourceInterp, int result, Tcl_Interp *targetInterp); +MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, + const char *trim, int numTrim); +MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, + const char *trim, int numTrim); MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp, diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3565165..2f2deec 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -937,6 +937,142 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the right side of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the end of the string. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes + numBytes; + int pInc; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimRight works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + const char *q = trim; + int bytesLeft = numTrim; + + p = Tcl_UtfPrev(p, bytes); + pInc = TclUtfToUniChar(p, &ch1); + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is last non-trimmed char */ + break; + } + pInc = 0; + } while (p > bytes); + + return numBytes - (p - bytes) - pInc; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrimLeft -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the left side of the + * first string all characters found in the second string. + * + * Results: + * An integer index into the first string, pointing to the first + * character not to be trimmed. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimLeft works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + int pInc = TclUtfToUniChar(p, &ch1); + const char *q = trim; + int bytesLeft = numTrim; + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is first non-trimmed char */ + break; + } + + p += pInc; + numBytes -= pInc; + } while (numBytes); + + return p - bytes; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_Concat -- * * Concatenate a set of strings into a single large string. @@ -964,6 +1100,9 @@ Tcl_Concat( for (totalSize = 1, i = 0; i < argc; i++) { totalSize += strlen(argv[i]) + 1; + if (totalSize <= 0) { + Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); + } } result = (char *) ckalloc((unsigned) totalSize); if (argc == 0) { @@ -1029,19 +1168,13 @@ Tcl_ConcatObj( int objc, /* Number of objects to concatenate. */ Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */ { - int allocSize, finalSize, length, elemLength, i; - char *p; - char *element; - char *concatStr; + int i, needSpace = 0; Tcl_Obj *objPtr, *resPtr; /* * Check first to see if all the items are of list type or empty. If so, * we will concat them together as lists, and return a list object. This - * is only valid when the lists have no current string representation, - * since we don't know what the original type was. An original string rep - * may have lost some whitespace info when converted which could be - * important. + * is only valid when the lists are in canonical form. */ for (i = 0; i < objc; i++) { @@ -1100,79 +1233,41 @@ Tcl_ConcatObj( * the slow way, using the string representations. */ - allocSize = 0; + TclNewObj(resPtr); for (i = 0; i < objc; i++) { + int trim, elemLength; + const char *element; + objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &length); - if ((element != NULL) && (length > 0)) { - allocSize += (length + 1); - } - } - if (allocSize == 0) { - allocSize = 1; /* enough for the NULL byte at end */ - } - - /* - * Allocate storage for the concatenated result. Note that allocSize is - * one more than the total number of characters, and so includes room for - * the terminating NULL byte. - */ - - concatStr = ckalloc((unsigned) allocSize); + element = TclGetStringFromObj(objPtr, &elemLength); - /* - * Now concatenate the elements. Clip white space off the front and back - * to generate a neater result, and ignore any empty elements. Also put a - * null byte at the end. - */ + /* Trim away the leading whitespace */ + trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6); + element += trim; + elemLength -= trim; - finalSize = 0; - if (objc == 0) { - *concatStr = '\0'; - } else { - p = concatStr; - for (i = 0; i < objc; i++) { - objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &elemLength); - while ((elemLength > 0) && (UCHAR(*element) < 127) - && isspace(UCHAR(*element))) { /* INTL: ISO C space. */ - element++; - elemLength--; - } + /* + * Trim away the trailing whitespace. Do not permit trimming + * to expose a final backslash character. + */ - /* - * Trim trailing white space. But, be careful not to trim a space - * character if it is preceded by a backslash: in this case it - * could be significant. - */ + trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6); + trim -= trim && (element[elemLength - trim - 1] == '\\'); + elemLength -= trim; - while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127) - && isspace(UCHAR(element[elemLength-1])) - /* INTL: ISO C space. */ - && ((elemLength < 2) || (element[elemLength-2] != '\\'))) { - elemLength--; - } - if (elemLength == 0) { - continue; /* nothing left of this element */ - } - memcpy(p, element, (size_t) elemLength); - p += elemLength; - *p = ' '; - p++; - finalSize += (elemLength + 1); + /* If we're left with empty element after trimming, do nothing */ + if (elemLength == 0) { + continue; } - if (p != concatStr) { - p[-1] = 0; - finalSize -= 1; /* we overwrote the final ' ' */ - } else { - *p = 0; + + /* Append to the result with space if needed */ + if (needSpace) { + Tcl_AppendToObj(resPtr, " ", 1); } + Tcl_AppendToObj(resPtr, element, elemLength); + needSpace = 1; } - - TclNewObj(objPtr); - objPtr->bytes = concatStr; - objPtr->length = finalSize; - return objPtr; + return resPtr; } /* -- cgit v0.12