diff options
author | dgp <dgp@users.sourceforge.net> | 2011-03-03 21:07:17 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2011-03-03 21:07:17 (GMT) |
commit | 3369c9129fa90ad0a083438bb7b066a1df3ee8d6 (patch) | |
tree | 94c6ec6f45a829d8ef54b008b42fda71118c1b35 | |
parent | e729c2775a2779c6a58e56fd171dbee06bcc53d9 (diff) | |
download | tcl-3369c9129fa90ad0a083438bb7b066a1df3ee8d6.zip tcl-3369c9129fa90ad0a083438bb7b066a1df3ee8d6.tar.gz tcl-3369c9129fa90ad0a083438bb7b066a1df3ee8d6.tar.bz2 |
Significant rewrite of the Tcl*(Scan|Convert)*Element() system, and revisions
to the callers. Needs more work on comments, and testing to check for any
performance impact in either direction. Fixes reported bug.
-rw-r--r-- | generic/tclDictObj.c | 58 | ||||
-rw-r--r-- | generic/tclIndexObj.c | 10 | ||||
-rw-r--r-- | generic/tclInt.h | 4 | ||||
-rw-r--r-- | generic/tclListObj.c | 67 | ||||
-rw-r--r-- | generic/tclUtil.c | 497 |
5 files changed, 394 insertions, 242 deletions
diff --git a/generic/tclDictObj.c b/generic/tclDictObj.c index 593108f..072f3fa 100644 --- a/generic/tclDictObj.c +++ b/generic/tclDictObj.c @@ -459,19 +459,27 @@ UpdateStringOfDict( Tcl_Obj *dictPtr) { #define LOCAL_SIZE 20 - int localFlags[LOCAL_SIZE], *flagPtr; + int localFlags[LOCAL_SIZE], *flagPtr = NULL; Dict *dict = dictPtr->internalRep.otherValuePtr; ChainEntry *cPtr; Tcl_Obj *keyPtr, *valuePtr; - int numElems, i, length; + int i, length, bytesNeeded = 0; char *elem, *dst; + const int maxFlags = UINT_MAX / sizeof(int); /* * This field is the most useful one in the whole hash structure, and it * is not exposed by any API function... */ - numElems = dict->table.numEntries * 2; + int numElems = dict->table.numEntries * 2; + + /* Handle empty list case first, simplifies what follows */ + if (numElems == 0) { + dictPtr->bytes = tclEmptyStringRep; + dictPtr->length = 0; + return; + } /* * Pass 1: estimate space, gather flags. @@ -479,55 +487,63 @@ UpdateStringOfDict( if (numElems <= LOCAL_SIZE) { flagPtr = localFlags; + } else if (numElems > maxFlags) { + Tcl_Panic("UpdateStringOfDict: size requirement exceeds limits"); } else { flagPtr = (int *) ckalloc((unsigned) numElems*sizeof(int)); } - dictPtr->length = 1; for (i=0,cPtr=dict->entryChainHead; i<numElems; i+=2,cPtr=cPtr->nextPtr) { /* * Assume that cPtr is never NULL since we know the number of array * elements already. */ + flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); keyPtr = (Tcl_Obj *) Tcl_GetHashKey(&dict->table, &cPtr->entry); elem = TclGetStringFromObj(keyPtr, &length); - dictPtr->length += Tcl_ScanCountedElement(elem, length, - &flagPtr[i]) + 1; + bytesNeeded += TclScanElement(elem, length, flagPtr+i); + if (bytesNeeded < 0) { + Tcl_Panic("UpdateStringOfDict: size requirement exceeds limits"); + } + flagPtr[i+1] = TCL_DONT_QUOTE_HASH; valuePtr = Tcl_GetHashValue(&cPtr->entry); elem = TclGetStringFromObj(valuePtr, &length); - dictPtr->length += Tcl_ScanCountedElement(elem, length, - &flagPtr[i+1]) + 1; + bytesNeeded += TclScanElement(elem, length, flagPtr+i+1); + if (bytesNeeded < 0) { + Tcl_Panic("UpdateStringOfDict: size requirement exceeds limits"); + } } + if (bytesNeeded > INT_MAX - numElems + 1) { + Tcl_Panic("UpdateStringOfDict: size requirement exceeds limits"); + } + bytesNeeded += numElems; /* * Pass 2: copy into string rep buffer. */ - dictPtr->bytes = ckalloc((unsigned) dictPtr->length); + dictPtr->length = bytesNeeded - 1; + dictPtr->bytes = ckalloc((unsigned) bytesNeeded); dst = dictPtr->bytes; for (i=0,cPtr=dict->entryChainHead; i<numElems; i+=2,cPtr=cPtr->nextPtr) { + flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); keyPtr = (Tcl_Obj *) Tcl_GetHashKey(&dict->table, &cPtr->entry); elem = TclGetStringFromObj(keyPtr, &length); - dst += Tcl_ConvertCountedElement(elem, length, dst, - flagPtr[i] | (i==0 ? 0 : TCL_DONT_QUOTE_HASH)); - *(dst++) = ' '; + dst += TclConvertElement(elem, length, dst, flagPtr[i]); + *dst++ = ' '; + flagPtr[i+1] |= TCL_DONT_QUOTE_HASH; valuePtr = Tcl_GetHashValue(&cPtr->entry); elem = TclGetStringFromObj(valuePtr, &length); - dst += Tcl_ConvertCountedElement(elem, length, dst, - flagPtr[i+1] | TCL_DONT_QUOTE_HASH); - *(dst++) = ' '; + dst += TclConvertElement(elem, length, dst, flagPtr[i+1]); + *dst++ = ' '; } + dictPtr->bytes[dictPtr->length] = '\0'; + if (flagPtr != localFlags) { ckfree((char *) flagPtr); } - if (dst == dictPtr->bytes) { - *dst = 0; - } else { - *(--dst) = 0; - } - dictPtr->length = dst - dictPtr->bytes; } /* diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c index af29363..dcedd4e 100644 --- a/generic/tclIndexObj.c +++ b/generic/tclIndexObj.c @@ -534,12 +534,13 @@ Tcl_WrongNumArgs( } else { elementStr = TclGetStringFromObj(origObjv[i], &elemLen); } - len = Tcl_ScanCountedElement(elementStr, elemLen, &flags); + flags = 0; + len = TclScanElement(elementStr, elemLen, &flags); if (MAY_QUOTE_WORD && len != elemLen) { char *quotedElementStr = TclStackAlloc(interp, (unsigned)len); - len = Tcl_ConvertCountedElement(elementStr, elemLen, + len = TclConvertElement(elementStr, elemLen, quotedElementStr, flags); Tcl_AppendToObj(objPtr, quotedElementStr, len); TclStackFree(interp, quotedElementStr); @@ -588,12 +589,13 @@ Tcl_WrongNumArgs( */ elementStr = TclGetStringFromObj(objv[i], &elemLen); - len = Tcl_ScanCountedElement(elementStr, elemLen, &flags); + flags = 0; + len = TclScanElement(elementStr, elemLen, &flags); if (MAY_QUOTE_WORD && len != elemLen) { char *quotedElementStr = TclStackAlloc(interp,(unsigned) len); - len = Tcl_ConvertCountedElement(elementStr, elemLen, + len = TclConvertElement(elementStr, elemLen, quotedElementStr, flags); Tcl_AppendToObj(objPtr, quotedElementStr, len); TclStackFree(interp, quotedElementStr); diff --git a/generic/tclInt.h b/generic/tclInt.h index 649d0cd..f6ed2d5 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2558,6 +2558,8 @@ MODULE_SCOPE void TclContinuationsEnterDerived(Tcl_Obj *objPtr, MODULE_SCOPE ContLineLoc* TclContinuationsGet(Tcl_Obj *objPtr); MODULE_SCOPE void TclContinuationsCopy(Tcl_Obj *objPtr, Tcl_Obj *originObjPtr); +MODULE_SCOPE int TclConvertElement(CONST char *src, int length, + char *dst, int flags); MODULE_SCOPE void TclDeleteNamespaceVars(Namespace *nsPtr); /* TIP #280 - Modified token based evulation, with line information. */ MODULE_SCOPE int TclEvalEx(Tcl_Interp *interp, const char *script, @@ -2766,6 +2768,8 @@ MODULE_SCOPE void TclRememberMutex(Tcl_Mutex *mutex); MODULE_SCOPE void TclRemoveScriptLimitCallbacks(Tcl_Interp *interp); MODULE_SCOPE int TclReToGlob(Tcl_Interp *interp, const char *reStr, int reStrLen, Tcl_DString *dsPtr, int *flagsPtr); +MODULE_SCOPE int TclScanElement(CONST char *string, int length, + int *flagPtr); MODULE_SCOPE void TclSetBgErrorHandler(Tcl_Interp *interp, Tcl_Obj *cmdPrefix); MODULE_SCOPE void TclSetBignumIntRep(Tcl_Obj *objPtr, diff --git a/generic/tclListObj.c b/generic/tclListObj.c index b2a951e..3c48a2f 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1842,74 +1842,73 @@ UpdateStringOfList( Tcl_Obj *listPtr) /* List object with string rep to update. */ { # define LOCAL_SIZE 20 - int localFlags[LOCAL_SIZE], *flagPtr; + int localFlags[LOCAL_SIZE], *flagPtr = NULL; List *listRepPtr = (List *) listPtr->internalRep.twoPtrValue.ptr1; int numElems = listRepPtr->elemCount; - register int i; + int i, length, bytesNeeded = 0; char *elem, *dst; - int length; Tcl_Obj **elemPtrs; + const int maxFlags = UINT_MAX / sizeof(int); /* - * Convert each element of the list to string form and then convert it to - * proper list element form, adding it to the result buffer. + * Mark the list as being canonical; although it will now have a string + * rep, it is one we derived through proper "canonical" quoting and so + * it's known to be free from nasties relating to [concat] and [eval]. */ + listRepPtr->canonicalFlag = 1; + + /* Handle empty list case first, so rest of the routine is simpler */ + + if (numElems == 0) { + listPtr->bytes = tclEmptyStringRep; + listPtr->length = 0; + return; + } + /* * Pass 1: estimate space, gather flags. */ if (numElems <= LOCAL_SIZE) { flagPtr = localFlags; + } else if (numElems > maxFlags) { + Tcl_Panic("UpdateStringOfList: size requirement exceeds limits"); } else { flagPtr = (int *) ckalloc((unsigned) numElems * sizeof(int)); } - listPtr->length = 1; elemPtrs = &listRepPtr->elements; for (i = 0; i < numElems; i++) { + flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); elem = TclGetStringFromObj(elemPtrs[i], &length); - listPtr->length += Tcl_ScanCountedElement(elem, length, flagPtr+i)+1; - - /* - * Check for continued sanity. [Bug 1267380] - */ - - if (listPtr->length < 1) { - Tcl_Panic("string representation size exceeds sane bounds"); + bytesNeeded += TclScanElement(elem, length, flagPtr+i); + if (bytesNeeded < 0) { + Tcl_Panic("UpdateStringOfList: size requirement exceeds limits"); } } + if (bytesNeeded > INT_MAX - numElems + 1) { + Tcl_Panic("UpdateStringOfList: size requirement exceeds limits"); + } + bytesNeeded += numElems; /* * Pass 2: copy into string rep buffer. */ - listPtr->bytes = ckalloc((unsigned) listPtr->length); + listPtr->length = bytesNeeded - 1; + listPtr->bytes = ckalloc((unsigned) bytesNeeded); dst = listPtr->bytes; for (i = 0; i < numElems; i++) { + flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); elem = TclGetStringFromObj(elemPtrs[i], &length); - dst += Tcl_ConvertCountedElement(elem, length, dst, - flagPtr[i] | (i==0 ? 0 : TCL_DONT_QUOTE_HASH)); - *dst = ' '; - dst++; + dst += TclConvertElement(elem, length, dst, flagPtr[i]); + *dst++ = ' '; } + listPtr->bytes[listPtr->length] = '\0'; + if (flagPtr != localFlags) { ckfree((char *) flagPtr); } - if (dst == listPtr->bytes) { - *dst = 0; - } else { - dst--; - *dst = 0; - } - listPtr->length = dst - listPtr->bytes; - - /* - * Mark the list as being canonical; although it has a string rep, it is - * one we derived through proper "canonical" quoting and so it's known to - * be free from nasties relating to [concat] and [eval]. - */ - - listRepPtr->canonicalFlag = 1; } /* diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 8296859..9358ede 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -645,6 +645,15 @@ Tcl_ScanElement( *---------------------------------------------------------------------- */ +#define COMPAT 1 + +#define CONVERT_NONE 0 +#define CONVERT_BRACE 2 +#define CONVERT_ESCAPE 4 +#define CONVERT_SAFE 16 +#define CONVERT_MASK (CONVERT_BRACE | CONVERT_ESCAPE) + + int Tcl_ScanCountedElement( CONST char *string, /* String to convert to Tcl list element. */ @@ -652,74 +661,73 @@ Tcl_ScanCountedElement( int *flagPtr) /* Where to store information to guide * Tcl_ConvertElement. */ { - int flags, nestingLevel; - register CONST char *p, *lastChar; + int flags = CONVERT_SAFE; + int numBytes = TclScanElement(string, length, &flags); - /* - * This function and Tcl_ConvertElement together do two things: - * - * 1. They produce a proper list, one that will yield back the argument - * strings when evaluated or when disassembled with Tcl_SplitList. This - * is the most important thing. - * - * 2. They try to produce legible output, which means minimizing the use - * of backslashes (using braces instead). However, there are some - * situations where backslashes must be used (e.g. an element like - * "{abc": the leading brace will have to be backslashed. For each - * element, one of three things must be done: - * - * (a) Use the element as-is (it doesn't contain any special - * characters). This is the most desirable option. - * - * (b) Enclose the element in braces, but leave the contents alone. - * This happens if the element contains embedded space, or if it - * contains characters with special interpretation ($, [, ;, or \), - * or if it starts with a brace or double-quote, or if there are no - * characters in the element. - * - * (c) Don't enclose the element in braces, but add backslashes to - * prevent special interpretation of special characters. This is a - * last resort used when the argument would normally fall under - * case (b) but contains unmatched braces. It also occurs if the - * last character of the argument is a backslash or if the element - * contains a backslash followed by newline. - * - * The function figures out how many bytes will be needed to store the - * result (actually, it overestimates). It also collects information about - * the element in the form of a flags word. - * - * Note: list elements produced by this function and - * Tcl_ConvertCountedElement must have the property that they can be - * enclosing in curly braces to make sub-lists. This means, for example, - * that we must not leave unmatched curly braces in the resulting list - * element. This property is necessary in order for functions like - * Tcl_DStringStartSublist to work. - */ + *flagPtr = flags; + return numBytes; +} - nestingLevel = 0; - flags = 0; - if (string == NULL) { - string = ""; - } - if (length == -1) { - length = strlen(string); +int +TclScanElement( + CONST char *string, /* String to convert to Tcl list element. */ + int length, /* Number of bytes in string, or -1. */ + int *flagPtr) /* Where to store information to guide + * Tcl_ConvertElement. */ +{ + CONST char *p = string; + int nestingLevel = 0; + int forbidNone = 0; + int requireEscape = 0; + int extra = 0; + int bytesNeeded; +#if COMPAT + int preferEscape = 0; + int preferBrace = 0; + int braceCount = 0; +#endif + + if ((p == NULL) || (length == 0)) { + *flagPtr = CONVERT_BRACE; + return 2; } - lastChar = string + length; - p = string; - if ((p == lastChar) || (*p == '{') || (*p == '"')) { - flags |= USE_BRACES; + + if ((*p == '{') || (*p == '"') || ((*p == '\0') && (length == -1))) { + forbidNone = 1; +#if COMPAT + preferBrace = 1; +#endif } - for (; p < lastChar; p++) { + + while (length) { switch (*p) { case '{': +#if COMPAT + braceCount++; +#endif + extra++; nestingLevel++; break; case '}': +#if COMPAT + braceCount++; +#endif + extra++; nestingLevel--; if (nestingLevel < 0) { - flags |= TCL_DONT_USE_BRACES; + requireEscape = 1; } break; + case ']': + case '"': +#if COMPAT + forbidNone = 1; + extra++; + preferEscape = 1; + break; +#else + /* FLOW THROUGH */ +#endif case '[': case '$': case ';': @@ -729,32 +737,103 @@ Tcl_ScanCountedElement( case '\r': case '\t': case '\v': - flags |= USE_BRACES; + forbidNone = 1; + extra++; +#if COMPAT + preferBrace = 1; +#endif break; case '\\': - if ((p+1 == lastChar) || (p[1] == '\n')) { - flags = TCL_DONT_USE_BRACES; - } else { - int size; - - Tcl_UtfBackslash(p, &size, NULL); - p += size-1; - flags |= USE_BRACES; + extra++; + if ((length == 1) || ((length == -1) && (p[1] == '\0'))) { + requireEscape = 1; + break; + } + if (p[1] == '\n') { + extra++; + requireEscape = 1; + length -= (length > 0); + p++; + break; } + if ((p[1] == '{') || (p[1] == '}')) { + extra++; + length -= (length > 0); + p++; + } + forbidNone = 1; +#if COMPAT + preferBrace = 1; +#endif + break; + case '\0': + if (length == -1) { + goto endOfString; + } + /* TODO: Panic on improper encoding? */ break; } + length -= (length > 0); + p++; } + + endOfString: if (nestingLevel != 0) { - flags = TCL_DONT_USE_BRACES; + requireEscape = 1; } - *flagPtr = flags; - /* - * Allow enough space to backslash every character plus leave two spaces - * for braces. - */ + bytesNeeded = p - string; + + if (requireEscape) { + bytesNeeded += extra; + if ((*string == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { + bytesNeeded++; + } + *flagPtr = CONVERT_ESCAPE; + goto overflowCheck; + } + if (*flagPtr & CONVERT_SAFE) { + if (extra < 2) { + extra = 2; + } + *flagPtr |= TCL_DONT_USE_BRACES; + } + if (forbidNone) { +#if COMPAT + if (preferEscape && !preferBrace) { + bytesNeeded += (extra - braceCount); + if ((*string == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { + bytesNeeded++; + } + if (*flagPtr & TCL_DONT_USE_BRACES) { + bytesNeeded += braceCount; + } + *flagPtr = CONVERT_MASK; + goto overflowCheck; + } +#endif + if (*flagPtr & TCL_DONT_USE_BRACES) { + bytesNeeded += extra; + if ((*string == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { + bytesNeeded++; + } + } else { + bytesNeeded += 2; + } + *flagPtr = CONVERT_BRACE; + goto overflowCheck; + } - return 2*(p-string) + 2; + if ((*string == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { + bytesNeeded += 2; + } + *flagPtr = CONVERT_NONE; + + overflowCheck: + if (bytesNeeded < 0) { + Tcl_Panic("TclScanElement: string length overflow"); + } + return bytesNeeded; } /* @@ -815,112 +894,139 @@ Tcl_ConvertCountedElement( char *dst, /* Place to put list-ified element. */ int flags) /* Flags produced by Tcl_ScanElement. */ { - register char *p = dst; - register CONST char *lastChar; + int numBytes = TclConvertElement(src, length, dst, flags); + dst[numBytes] = '\0'; + return numBytes; +} - /* - * See the comment block at the beginning of the Tcl_ScanElement code for - * details of how this works. - */ +int TclConvertElement( + register CONST char *src, /* Source information for list element. */ + int length, /* Number of bytes in src, or -1. */ + char *dst, /* Place to put list-ified element. */ + int flags) /* Flags produced by Tcl_ScanElement. */ +{ + int conversion = flags & CONVERT_MASK; + char *p = dst; - if (src && length == -1) { - length = strlen(src); + if ((flags & TCL_DONT_USE_BRACES) && (conversion & CONVERT_BRACE)) { + conversion = CONVERT_ESCAPE; } if ((src == NULL) || (length == 0)) { - p[0] = '{'; - p[1] = '}'; - p[2] = 0; - return 2; + src = tclEmptyStringRep; + length = 0; + conversion = CONVERT_BRACE; } - lastChar = src + length; if ((*src == '#') && !(flags & TCL_DONT_QUOTE_HASH)) { - flags |= USE_BRACES; + if (conversion == CONVERT_ESCAPE) { + p[0] = '\\'; + p[1] = '#'; + p += 2; + src++; + length--; + } else { + conversion = CONVERT_BRACE; + } } - if ((flags & USE_BRACES) && !(flags & TCL_DONT_USE_BRACES)) { + if (conversion == CONVERT_NONE) { + if (length == -1) { + /* TODO: INT_MAX overflow? */ + while (*src) { + *p++ = *src++; + } + return p - dst; + } else { + memcpy(dst, src, length); + return length; + } + } + if (conversion == CONVERT_BRACE) { *p = '{'; p++; - for (; src != lastChar; src++, p++) { - *p = *src; + if (length == -1) { + /* TODO: INT_MAX overflow? */ + while (*src) { + *p++ = *src++; + } + } else { + memcpy(p, src, length); + p += length; } *p = '}'; p++; - } else { - if ((*src == '#') && !(flags & TCL_DONT_QUOTE_HASH)) { - /* - * Leading '#' could be seen by [eval] as the start of a comment, - * if on the first element of a list, so quote it. - */ - - p[0] = '\\'; - p[1] = '#'; - p += 2; - src++; - } - for (; src != lastChar; src++) { - switch (*src) { - case ']': - case '[': - case '$': - case ';': - case ' ': - case '\\': - case '"': - *p = '\\'; - p++; - break; - case '{': - case '}': - /* - * It may not seem necessary to backslash braces, but it is. - * The reason for this is that the resulting list element may - * actually be an element of a sub-list enclosed in braces - * (e.g. if Tcl_DStringStartSublist has been invoked), so - * there may be a brace mismatch if the braces aren't - * backslashed. - */ + return p - dst; + } + /* conversion == CONVERT_ESCAPE or CONVERT_MASK */ - if (flags & TCL_DONT_USE_BRACES) { - *p = '\\'; - p++; - } - break; - case '\f': - *p = '\\'; - p++; - *p = 'f'; - p++; - continue; - case '\n': - *p = '\\'; - p++; - *p = 'n'; - p++; - continue; - case '\r': - *p = '\\'; - p++; - *p = 'r'; - p++; - continue; - case '\t': - *p = '\\'; - p++; - *p = 't'; - p++; - continue; - case '\v': + for ( ; length; src++, length -= (length > 0)) { + switch (*src) { + case ']': + case '[': + case '$': + case ';': + case ' ': + case '\\': + case '"': + *p = '\\'; + p++; + break; + case '{': + case '}': +#if COMPAT + if (conversion == CONVERT_ESCAPE) { +#endif *p = '\\'; p++; - *p = 'v'; - p++; - continue; +#if COMPAT } - *p = *src; +#endif + break; + case '\f': + *p = '\\'; + p++; + *p = 'f'; + p++; + continue; + case '\n': + *p = '\\'; + p++; + *p = 'n'; + p++; + continue; + case '\r': + *p = '\\'; + p++; + *p = 'r'; + p++; + continue; + case '\t': + *p = '\\'; + p++; + *p = 't'; p++; + continue; + case '\v': + *p = '\\'; + p++; + *p = 'v'; + p++; + continue; + case '\0': + if (length == -1) { + return p - dst; + } + /* + * If we reach this point, there's an embedded NULL in the + * string range being processed, which should not happen when + * the encoding rules for Tcl strings are properly followed. + * If the day ever comes when we stop tolerating such things, + * this is where to put the Tcl_Panic(). + */ + break; } + *p = *src; + p++; } - *p = '\0'; - return p-dst; + return p - dst; } /* @@ -949,11 +1055,20 @@ Tcl_Merge( CONST char * CONST *argv) /* Array of string values. */ { # define LOCAL_SIZE 20 - int localFlags[LOCAL_SIZE], *flagPtr; - int numChars; - char *result; - char *dst; - int i; + int localFlags[LOCAL_SIZE], *flagPtr = NULL; + int i, bytesNeeded = 0; + char *result, *dst; + const int maxFlags = UINT_MAX / sizeof(int); + + if (argc == 0) { + /* + * Handle empty list case first, so logic of the general case + * can be simpler. + */ + result = ckalloc(1); + result[0] = '\0'; + return result; + } /* * Pass 1: estimate space, gather flags. @@ -961,32 +1076,48 @@ Tcl_Merge( if (argc <= LOCAL_SIZE) { flagPtr = localFlags; + } else if (argc > maxFlags) { + /* + * We cannot allocate a large enough flag array to format this + * list in one pass. We could imagine converting this routine + * to a multi-pass implementation, but for sizeof(int) == 4, + * the limit is a max of 2^30 list elements and since each element + * is at least one byte formatted, and requires one byte space + * between it and the next one, that a minimum space requirement + * of 2^31 bytes, which is already INT_MAX. If we tried to format + * a list of > maxFlags elements, we're just going to overflow + * the size limits on the formatted string anyway, so just issue + * that same panic early. + */ + Tcl_Panic("Tcl_Merge: size requirement exceeds limits"); } else { flagPtr = (int *) ckalloc((unsigned) argc*sizeof(int)); } - numChars = 1; for (i = 0; i < argc; i++) { - numChars += Tcl_ScanElement(argv[i], &flagPtr[i]) + 1; + flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); + bytesNeeded += TclScanElement(argv[i], -1, &flagPtr[i]); + if (bytesNeeded < 0) { + Tcl_Panic("Tcl_Merge: size requirement exceeds limits"); + } } + if (bytesNeeded > INT_MAX - argc + 1) { + Tcl_Panic("Tcl_Merge: size requirement exceeds limits"); + } + bytesNeeded += argc; /* * Pass two: copy into the result area. */ - result = (char *) ckalloc((unsigned) numChars); + result = ckalloc((unsigned) bytesNeeded); dst = result; for (i = 0; i < argc; i++) { - numChars = Tcl_ConvertElement(argv[i], dst, - flagPtr[i] | (i==0 ? 0 : TCL_DONT_QUOTE_HASH)); - dst += numChars; + flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); + dst += TclConvertElement(argv[i], -1, dst, flagPtr[i]); *dst = ' '; dst++; } - if (dst == result) { - *dst = 0; - } else { - dst[-1] = 0; - } + dst[-1] = 0; if (flagPtr != localFlags) { ckfree((char *) flagPtr); @@ -1877,12 +2008,11 @@ Tcl_DStringAppendElement( CONST char *element) /* String to append. Must be * null-terminated. */ { - int newSize, flags, strSize; - char *dst; - - strSize = ((element== NULL) ? 0 : strlen(element)); - newSize = Tcl_ScanCountedElement(element, strSize, &flags) - + dsPtr->length + 1; + char *dst = dsPtr->string + dsPtr->length; + int needSpace = TclNeedSpace(dsPtr->string, dst); + int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; + int newSize = dsPtr->length + needSpace + + TclScanElement(element, -1, &flags); /* * Allocate a larger buffer for the string if the current one isn't large @@ -1903,6 +2033,7 @@ Tcl_DStringAppendElement( dsPtr->string = (char *) ckrealloc((void *) dsPtr->string, (size_t) dsPtr->spaceAvl); } + dst = dsPtr->string + dsPtr->length; } /* @@ -1910,8 +2041,7 @@ Tcl_DStringAppendElement( * the end, with a space, if needed. */ - dst = dsPtr->string + dsPtr->length; - if (TclNeedSpace(dsPtr->string, dst)) { + if (needSpace) { *dst = ' '; dst++; dsPtr->length++; @@ -1924,7 +2054,8 @@ Tcl_DStringAppendElement( flags |= TCL_DONT_QUOTE_HASH; } - dsPtr->length += Tcl_ConvertCountedElement(element, strSize, dst, flags); + dsPtr->length += TclConvertElement(element, -1, dst, flags); + dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; } |