diff options
author | dgp <dgp@users.sourceforge.net> | 2011-04-13 20:27:29 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2011-04-13 20:27:29 (GMT) |
commit | ef9aca4d1c097c0bb5c50d64ed819c38baef6387 (patch) | |
tree | ed1d89e9c50ba82b9724bfc18033907549896ec3 /generic | |
parent | bccb266ff70f020e3d62ef686b1bd7e79b2245ae (diff) | |
download | tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.zip tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.gz tcl-ef9aca4d1c097c0bb5c50d64ed819c38baef6387.tar.bz2 |
[Bug 3285375]: Rewrite Tcl_Concat*() and [string trim*].
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdMZ.c | 127 | ||||
-rw-r--r-- | generic/tclInt.h | 4 | ||||
-rw-r--r-- | generic/tclUtil.c | 239 |
3 files changed, 185 insertions, 185 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index cf74db5..7c3855c 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -3108,10 +3108,8 @@ StringTrimCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int triml, trimr, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3123,58 +3121,12 @@ StringTrimCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and string1 is left pointing at the first - * non-trim character. - */ - - end = string1 + length1; - for (p = string1; p < end; p += offset) { - offset = TclUtfToUniChar(p, &ch); - - for (check = string2; ; ) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - string1 += offset; - break; - } - } - } - - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and length1 marks the last non-trim character. - */ - - end = string1; - for (p = string1 + length1; p > end; ) { - p = Tcl_UtfPrev(p, string1); - offset = TclUtfToUniChar(p, &ch); - check = string2; - while (1) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - break; - } - } - } + triml = TclTrimLeft(string1, length1, string2, length2); + trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, + Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); return TCL_OK; } @@ -3204,10 +3156,8 @@ StringTrimLCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int trim, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3219,34 +3169,10 @@ StringTrimLCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and string1 is left pointing at the first - * non-trim character. - */ - - end = string1 + length1; - for (p = string1; p < end; p += offset) { - offset = TclUtfToUniChar(p, &ch); - for (check = string2; ; ) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - string1 += offset; - break; - } - } - } + trim = TclTrimLeft(string1, length1, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1+trim, length1-trim)); return TCL_OK; } @@ -3276,10 +3202,8 @@ StringTrimRCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch, trim; - register const char *p, *end; - const char *check, *checkEnd, *string1, *string2; - int offset, length1, length2; + const char *string1, *string2; + int trim, length1, length2; if (objc == 3) { string2 = TclGetStringFromObj(objv[2], &length2); @@ -3291,33 +3215,10 @@ StringTrimRCmd( return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); - checkEnd = string2 + length2; - /* - * The outer loop iterates over the string. The inner loop iterates over - * the trim characters. The loops terminate as soon as a non-trim - * character is discovered and length1 marks the last non-trim character. - */ - - end = string1; - for (p = string1 + length1; p > end; ) { - p = Tcl_UtfPrev(p, string1); - offset = TclUtfToUniChar(p, &ch); - check = string2; - while (1) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - break; - } - } - } + trim = TclTrimRight(string1, length1, string2, length2); - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1-trim)); return TCL_OK; } diff --git a/generic/tclInt.h b/generic/tclInt.h index c966610..b8a4dfa 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2784,6 +2784,10 @@ MODULE_SCOPE int TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr, int *clNextOuter, CONST char *outerScript); MODULE_SCOPE void TclTransferResult(Tcl_Interp *sourceInterp, int result, Tcl_Interp *targetInterp); +MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, + const char *trim, int numTrim); +MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, + const char *trim, int numTrim); MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp, diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3565165..2f2deec 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -937,6 +937,142 @@ Tcl_Backslash( /* *---------------------------------------------------------------------- * + * TclTrimRight -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the right side of the + * first string all characters found in the second string. + * + * Results: + * The number of bytes to be removed from the end of the string. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimRight( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes + numBytes; + int pInc; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimRight works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + const char *q = trim; + int bytesLeft = numTrim; + + p = Tcl_UtfPrev(p, bytes); + pInc = TclUtfToUniChar(p, &ch1); + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is last non-trimmed char */ + break; + } + pInc = 0; + } while (p > bytes); + + return numBytes - (p - bytes) - pInc; +} + +/* + *---------------------------------------------------------------------- + * + * TclTrimLeft -- + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the left side of the + * first string all characters found in the second string. + * + * Results: + * An integer index into the first string, pointing to the first + * character not to be trimmed. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimLeft( + const char *bytes, /* String to be trimmed... */ + int numBytes, /* ...and its length in bytes */ + const char *trim, /* String of trim characters... */ + int numTrim) /* ...and its length in bytes */ +{ + const char *p = bytes; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimLeft works only on null-terminated strings"); + } + + /* Empty strings -> nothing to do */ + if ((numBytes == 0) || (numTrim == 0)) { + return 0; + } + + /* Outer loop: iterate over string to be trimmed */ + do { + Tcl_UniChar ch1; + int pInc = TclUtfToUniChar(p, &ch1); + const char *q = trim; + int bytesLeft = numTrim; + + /* Inner loop: scan trim string for match to current character */ + do { + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); + + if (ch1 == ch2) { + break; + } + + q += qInc; + bytesLeft -= qInc; + } while (bytesLeft); + + if (bytesLeft == 0) { + /* No match; trim task done; *p is first non-trimmed char */ + break; + } + + p += pInc; + numBytes -= pInc; + } while (numBytes); + + return p - bytes; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_Concat -- * * Concatenate a set of strings into a single large string. @@ -964,6 +1100,9 @@ Tcl_Concat( for (totalSize = 1, i = 0; i < argc; i++) { totalSize += strlen(argv[i]) + 1; + if (totalSize <= 0) { + Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); + } } result = (char *) ckalloc((unsigned) totalSize); if (argc == 0) { @@ -1029,19 +1168,13 @@ Tcl_ConcatObj( int objc, /* Number of objects to concatenate. */ Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */ { - int allocSize, finalSize, length, elemLength, i; - char *p; - char *element; - char *concatStr; + int i, needSpace = 0; Tcl_Obj *objPtr, *resPtr; /* * Check first to see if all the items are of list type or empty. If so, * we will concat them together as lists, and return a list object. This - * is only valid when the lists have no current string representation, - * since we don't know what the original type was. An original string rep - * may have lost some whitespace info when converted which could be - * important. + * is only valid when the lists are in canonical form. */ for (i = 0; i < objc; i++) { @@ -1100,79 +1233,41 @@ Tcl_ConcatObj( * the slow way, using the string representations. */ - allocSize = 0; + TclNewObj(resPtr); for (i = 0; i < objc; i++) { + int trim, elemLength; + const char *element; + objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &length); - if ((element != NULL) && (length > 0)) { - allocSize += (length + 1); - } - } - if (allocSize == 0) { - allocSize = 1; /* enough for the NULL byte at end */ - } - - /* - * Allocate storage for the concatenated result. Note that allocSize is - * one more than the total number of characters, and so includes room for - * the terminating NULL byte. - */ - - concatStr = ckalloc((unsigned) allocSize); + element = TclGetStringFromObj(objPtr, &elemLength); - /* - * Now concatenate the elements. Clip white space off the front and back - * to generate a neater result, and ignore any empty elements. Also put a - * null byte at the end. - */ + /* Trim away the leading whitespace */ + trim = TclTrimLeft(element, elemLength, " \f\v\r\t\n", 6); + element += trim; + elemLength -= trim; - finalSize = 0; - if (objc == 0) { - *concatStr = '\0'; - } else { - p = concatStr; - for (i = 0; i < objc; i++) { - objPtr = objv[i]; - element = TclGetStringFromObj(objPtr, &elemLength); - while ((elemLength > 0) && (UCHAR(*element) < 127) - && isspace(UCHAR(*element))) { /* INTL: ISO C space. */ - element++; - elemLength--; - } + /* + * Trim away the trailing whitespace. Do not permit trimming + * to expose a final backslash character. + */ - /* - * Trim trailing white space. But, be careful not to trim a space - * character if it is preceded by a backslash: in this case it - * could be significant. - */ + trim = TclTrimRight(element, elemLength, " \f\v\r\t\n", 6); + trim -= trim && (element[elemLength - trim - 1] == '\\'); + elemLength -= trim; - while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127) - && isspace(UCHAR(element[elemLength-1])) - /* INTL: ISO C space. */ - && ((elemLength < 2) || (element[elemLength-2] != '\\'))) { - elemLength--; - } - if (elemLength == 0) { - continue; /* nothing left of this element */ - } - memcpy(p, element, (size_t) elemLength); - p += elemLength; - *p = ' '; - p++; - finalSize += (elemLength + 1); + /* If we're left with empty element after trimming, do nothing */ + if (elemLength == 0) { + continue; } - if (p != concatStr) { - p[-1] = 0; - finalSize -= 1; /* we overwrote the final ' ' */ - } else { - *p = 0; + + /* Append to the result with space if needed */ + if (needSpace) { + Tcl_AppendToObj(resPtr, " ", 1); } + Tcl_AppendToObj(resPtr, element, elemLength); + needSpace = 1; } - - TclNewObj(objPtr); - objPtr->bytes = concatStr; - objPtr->length = finalSize; - return objPtr; + return resPtr; } /* |