diff options
Diffstat (limited to 'generic/tclUtil.c')
| -rw-r--r-- | generic/tclUtil.c | 2760 |
1 files changed, 1047 insertions, 1713 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 2219c84..bc1490e 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -4,18 +4,16 @@ * This file contains utility functions that are used by many Tcl * commands. * - * Copyright © 1987-1993 The Regents of the University of California. - * Copyright © 1994-1998 Sun Microsystems, Inc. - * Copyright © 2001 Kevin B. Kenny. All rights reserved. + * Copyright (c) 1987-1993 The Regents of the University of California. + * Copyright (c) 1994-1998 Sun Microsystems, Inc. + * Copyright (c) 2001 by Kevin B. Kenny. All rights reserved. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclParse.h" -#include "tclStringTrim.h" -#include "tclTomMath.h" +#include <float.h> #include <math.h> /* @@ -28,9 +26,9 @@ static ProcessGlobalValue executableName = { }; /* - * The following values are used in the flags arguments of Tcl*Scan*Element - * and Tcl*Convert*Element. The values TCL_DONT_USE_BRACES and - * TCL_DONT_QUOTE_HASH are defined in tcl.h, like so: + * The following values are used in the flags arguments of Tcl*Scan*Element and + * Tcl*Convert*Element. The values TCL_DONT_USE_BRACES and TCL_DONT_QUOTE_HASH + * are defined in tcl.h, like so: * #define TCL_DONT_USE_BRACES 1 #define TCL_DONT_QUOTE_HASH 8 @@ -42,11 +40,11 @@ static ProcessGlobalValue executableName = { * quoting not be used when converting the list * element. * TCL_DONT_QUOTE_HASH - 1 means the caller insists that a leading hash - * character ('#') should *not* be quoted. This - * is appropriate when the caller can guarantee - * the element is not the first element of a - * list, so [eval] cannot mis-parse the element - * as a comment. + * character ('#') should *not* be quoted. This + * is appropriate when the caller can guarantee + * the element is not the first element of a + * list, so [eval] cannot mis-parse the element + * as a comment. * * The remaining values which can be carried by the flags of these routines * are for internal use only. Make sure they do not overlap with the public @@ -56,8 +54,8 @@ static ProcessGlobalValue executableName = { * conversion is most appropriate for Tcl*Convert*Element() to perform, and * sets two bits of the flags value to indicate the mode selected. * - * CONVERT_NONE The element needs no quoting. Its literal string is - * suitable as is. + * CONVERT_NONE The element needs no quoting. Its literal string + * is suitable as is. * CONVERT_BRACE The conversion should be enclosing the literal string * in braces. * CONVERT_ESCAPE The conversion should be using backslashes to escape @@ -66,18 +64,18 @@ static ProcessGlobalValue executableName = { * the flags argument. * Also indicates a strange conversion mode where all * special characters are escaped with backslashes - * *except for braces*. This is a strange and unnecessary + * *except for braces*. This is a strange and unnecessary * case, but it's part of the historical way in which - * lists have been formatted in Tcl. To experiment with + * lists have been formatted in Tcl. To experiment with * removing this case, set the value of COMPAT to 0. * - * One last flag value is used only by callers of TclScanElement(). The flag + * One last flag value is used only by callers of TclScanElement(). The flag * value produced by a call to Tcl*Scan*Element() will never leave this bit * set. * - * CONVERT_ANY The caller of TclScanElement() declares it can make no - * promise about what public flags will be passed to the - * matching call of TclConvertElement(). As such, + * CONVERT_ANY The caller of TclScanElement() declares it can make + * no promise about what public flags will be passed to + * the matching call of TclConvertElement(). As such, * TclScanElement() has to determine the worst case * destination buffer length over all possibilities, and * in other cases this means an overestimate of the @@ -106,51 +104,42 @@ static Tcl_ThreadDataKey precisionKey; */ static void ClearHash(Tcl_HashTable *tablePtr); -static void FreeProcessGlobalValue(void *clientData); -static void FreeThreadHash(void *clientData); -static int GetEndOffsetFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, - Tcl_WideInt endValue, Tcl_WideInt *indexPtr); +static void FreeProcessGlobalValue(ClientData clientData); +static void FreeThreadHash(ClientData clientData); static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr); -static int GetWideForIndex(Tcl_Interp *interp, Tcl_Obj *objPtr, - Tcl_WideInt endValue, Tcl_WideInt *widePtr); -static int FindElement(Tcl_Interp *interp, const char *string, - Tcl_Size stringLength, const char *typeStr, - const char *typeCode, const char **elementPtr, - const char **nextPtr, Tcl_Size *sizePtr, - int *literalPtr); +static int SetEndOffsetFromAny(Tcl_Interp* interp, + Tcl_Obj* objPtr); +static void UpdateStringOfEndOffset(Tcl_Obj* objPtr); + /* * The following is the Tcl object type definition for an object that * represents a list index in the form, "end-offset". It is used as a - * performance optimization in Tcl_GetIntForIndex. The internal rep is - * stored directly in the wideValue, so no memory management is required - * for it. This is a caching internalrep, keeping the result of a parse - * around. This type is only created from a pre-existing string, so an - * updateStringProc will never be called and need not exist. The type - * is unregistered, so has no need of a setFromAnyProc either. + * performance optimization in TclGetIntForIndex. The internal rep is an + * integer, so no memory management is required for it. */ -static const Tcl_ObjType endOffsetType = { +Tcl_ObjType tclEndOffsetType = { "end-offset", /* name */ NULL, /* freeIntRepProc */ NULL, /* dupIntRepProc */ - NULL, /* updateStringProc */ - NULL /* setFromAnyProc */ + UpdateStringOfEndOffset, /* updateStringProc */ + SetEndOffsetFromAny }; /* * * STRING REPRESENTATION OF LISTS * * * * - * The next several routines implement the conversions of strings to and from - * Tcl lists. To understand their operation, the rules of parsing and - * generating the string representation of lists must be known. Here we - * describe them in one place. + * The next several routines implement the conversions of strings to and + * from Tcl lists. To understand their operation, the rules of parsing + * and generating the string representation of lists must be known. Here + * we describe them in one place. * - * A list is made up of zero or more elements. Any string is a list if it is - * made up of alternating substrings of element-separating ASCII whitespace - * and properly formatted elements. + * A list is made up of zero or more elements. Any string is a list if + * it is made up of alternating substrings of element-separating ASCII + * whitespace and properly formatted elements. * - * The ASCII characters which can make up the whitespace between list elements - * are: + * The ASCII characters which can make up the whitespace between list + * elements are: * * \u0009 \t TAB * \u000A \n NEWLINE @@ -169,68 +158,69 @@ static const Tcl_ObjType endOffsetType = { * * Unlike command parsing, the BACKSLASH NEWLINE sequence is not * considered to be a whitespace character. * - * * Other Unicode whitespace characters (recognized by [string is space] - * or Tcl_UniCharIsSpace()) do not play any role as element separators - * in Tcl lists. + * * Other Unicode whitespace characters (recognized by + * [string is space] or Tcl_UniCharIsSpace()) do not play any role + * as element separators in Tcl lists. * * * The NUL byte ought not appear, as it is not in strings properly * encoded for Tcl, but if it is present, it is not treated as - * separating whitespace, or a string terminator. It is just another - * character in a list element. + * separating whitespace, or a string terminator. It is just + * another character in a list element. * - * The interpretation of a formatted substring as a list element follows rules - * similar to the parsing of the words of a command in a Tcl script. Backslash - * substitution plays a key role, and is defined exactly as it is in command - * parsing. The same routine, TclParseBackslash() is used in both command - * parsing and list parsing. + * The interpretation of a formatted substring as a list element follows + * rules similar to the parsing of the words of a command in a Tcl script. + * Backslash substitution plays a key role, and is defined exactly as it is + * in command parsing. The same routine, TclParseBackslash() is used in both + * command parsing and list parsing. * - * NOTE: This means that if and when backslash substitution rules ever change - * for command parsing, the interpretation of strings as lists also changes. + * NOTE: This means that if and when backslash substitution rules ever + * change for command parsing, the interpretation of strings as lists also + * changes. * * Backslash substitution replaces an "escape sequence" of one or more * characters starting with * \u005c \ BACKSLASH - * with a single character. The one character escape sequence case happens only - * when BACKSLASH is the last character in the string. In all other cases, the - * escape sequence is at least two characters long. + * with a single character. The one character escape sequence case happens + * only when BACKSLASH is the last character in the string. In all other + * cases, the escape sequence is at least two characters long. * - * The formatted substrings are interpreted as element values according to the - * following cases: + * The formatted substrings are interpreted as element values according to + * the following cases: * * * If the first character of a formatted substring is * \u007b { OPEN BRACE * then the end of the substring is the matching * \u007d } CLOSE BRACE - * character, where matching is determined by counting nesting levels, and - * not including any brace characters that are contained within a backslash - * escape sequence in the nesting count. Having found the matching brace, - * all characters between the braces are the string value of the element. - * If no matching close brace is found before the end of the string, the - * string is not a Tcl list. If the character following the close brace is - * not an element separating whitespace character, or the end of the string, - * then the string is not a Tcl list. - * - * NOTE: this differs from a brace-quoted word in the parsing of a Tcl - * command only in its treatment of the backslash-newline sequence. In a - * list element, the literal characters in the backslash-newline sequence - * become part of the element value. In a script word, conversion to a - * single SPACE character is done. + * character, where matching is determined by counting nesting levels, + * and not including any brace characters that are contained within a + * backslash escape sequence in the nesting count. Having found the + * matching brace, all characters between the braces are the string + * value of the element. If no matching close brace is found before the + * end of the string, the string is not a Tcl list. If the character + * following the close brace is not an element separating whitespace + * character, or the end of the string, then the string is not a Tcl list. + * + * NOTE: this differs from a brace-quoted word in the parsing of a + * Tcl command only in its treatment of the backslash-newline sequence. + * In a list element, the literal characters in the backslash-newline + * sequence become part of the element value. In a script word, + * conversion to a single SPACE character is done. * * NOTE: Most list element values can be represented by a formatted - * substring using brace quoting. The exceptions are any element value that - * includes an unbalanced brace not in a backslash escape sequence, and any - * value that ends with a backslash not itself in a backslash escape - * sequence. + * substring using brace quoting. The exceptions are any element value + * that includes an unbalanced brace not in a backslash escape sequence, + * and any value that ends with a backslash not itself in a backslash + * escape sequence. * * * If the first character of a formatted substring is * \u0022 " QUOTE * then the end of the substring is the next QUOTE character, not counting * any QUOTE characters that are contained within a backslash escape - * sequence. If no next QUOTE is found before the end of the string, the - * string is not a Tcl list. If the character following the closing QUOTE is - * not an element separating whitespace character, or the end of the string, - * then the string is not a Tcl list. Having found the limits of the - * substring, the element value is produced by performing backslash + * sequence. If no next QUOTE is found before the end of the string, the + * string is not a Tcl list. If the character following the closing QUOTE + * is not an element separating whitespace character, or the end of the + * string, then the string is not a Tcl list. Having found the limits + * of the substring, the element value is produced by performing backslash * substitution on the character sequence between the open and close QUOTEs. * * NOTE: Any element value can be represented by this style of formatting, @@ -241,20 +231,20 @@ static const Tcl_ObjType endOffsetType = { * of the substring, the element value is produced by performing backslash * substitution on it. * - * NOTE: Any element value can be represented by this style of formatting, + * NOTE: Any element value can be represented by this style of formatting, * given suitable choice of backslash escape sequences, with one exception. * The empty string cannot be represented as a list element without the use * of either braces or quotes to delimit it. * * This collection of parsing rules is implemented in the routine - * FindElement(). + * TclFindElement(). * - * In order to produce lists that can be parsed by these rules, we need the - * ability to distinguish between characters that are part of a list element - * value from characters providing syntax that define the structure of the - * list. This means that our code that generates lists must at a minimum be - * able to produce escape sequences for the 10 characters identified above - * that have significance to a list parser. + * In order to produce lists that can be parsed by these rules, we need + * the ability to distinguish between characters that are part of a list + * element value from characters providing syntax that define the structure + * of the list. This means that our code that generates lists must at a + * minimum be able to produce escape sequences for the 10 characters + * identified above that have significance to a list parser. * * * * CANONICAL LISTS * * * * * * @@ -263,18 +253,18 @@ static const Tcl_ObjType endOffsetType = { * generated by Tcl. Such list values are often said to be in "canonical * form": * - * * When any canonical list is evaluated as a Tcl script, it is a script of - * either zero commands (an empty list) or exactly one command. The command - * word is exactly the first element of the list, and each argument word is - * exactly one of the following elements of the list. This means that any - * characters that have special meaning during script evaluation need - * special treatment when canonical lists are produced: + * * When any canonical list is evaluated as a Tcl script, it is a script + * of either zero commands (an empty list) or exactly one command. The + * command word is exactly the first element of the list, and each argument + * word is exactly one of the following elements of the list. This means + * that any characters that have special meaning during script evaluation + * need special treatment when canonical lists are produced: * * * Whitespace between elements may not include NEWLINE. * * The command terminating character, * \u003b ; SEMICOLON - * must be BRACEd, QUOTEd, or escaped so that it does not terminate the - * command prematurely. + * must be BRACEd, QUOTEd, or escaped so that it does not terminate + * the command prematurely. * * Any of the characters that begin substitutions in scripts, * \u0024 $ DOLLAR * \u005b [ OPEN BRACKET @@ -284,10 +274,11 @@ static const Tcl_ObjType endOffsetType = { * \u0023 # HASH * that HASH character must be BRACEd, QUOTEd, or escaped so that it * does not convert the command into a comment. - * * Any list element that contains the character sequence BACKSLASH - * NEWLINE cannot be formatted with BRACEs. The BACKSLASH character - * must be represented by an escape sequence, and unless QUOTEs are - * used, the NEWLINE must be as well. + * * Any list element that contains the character sequence + * BACKSLASH NEWLINE cannot be formatted with BRACEs. The + * BACKSLASH character must be represented by an escape + * sequence, and unless QUOTEs are used, the NEWLINE must + * be as well. * * * It is also guaranteed that one can use a canonical list as a building * block of a larger script within command substitution, as in this example: @@ -298,10 +289,10 @@ static const Tcl_ObjType endOffsetType = { * * * Finally it is guaranteed that enclosing a canonical list in braces * produces a new value that is also a canonical list. This new list has - * length 1, and its only element is the original canonical list. This same - * guarantee also makes it possible to construct scripts where an argument - * word is given a list value by enclosing the canonical form of that list - * in braces: + * length 1, and its only element is the original canonical list. This + * same guarantee also makes it possible to construct scripts where an + * argument word is given a list value by enclosing the canonical form + * of that list in braces: * set script "puts {[list $one $two $three]}"; eval $script * This sort of coding was once fairly common, though it's become more * idiomatic to see the following instead: @@ -310,54 +301,54 @@ static const Tcl_ObjType endOffsetType = { * balance when counting those braces that are not in escape sequences. * * Within these constraints, the canonical list generation routines - * TclScanElement() and TclConvertElement() attempt to generate the string for - * any list that is easiest to read. When an element value is itself + * TclScanElement() and TclConvertElement() attempt to generate the string + * for any list that is easiest to read. When an element value is itself * acceptable as the formatted substring, it is usually used (CONVERT_NONE). - * When some quoting or escaping is required, use of BRACEs (CONVERT_BRACE) is - * usually preferred over the use of escape sequences (CONVERT_ESCAPE). There - * are some exceptions to both of these preferences for reasons of code - * simplicity, efficiency, and continuation of historical habits. Canonical - * lists never use the QUOTE formatting to delimit their elements because that - * form of quoting does not nest, which makes construction of nested lists far - * too much trouble. Canonical lists always use only a single SPACE character - * for element-separating whitespace. + * When some quoting or escaping is required, use of BRACEs (CONVERT_BRACE) + * is usually preferred over the use of escape sequences (CONVERT_ESCAPE). + * There are some exceptions to both of these preferences for reasons of + * code simplicity, efficiency, and continuation of historical habits. + * Canonical lists never use the QUOTE formatting to delimit their elements + * because that form of quoting does not nest, which makes construction of + * nested lists far too much trouble. Canonical lists always use only a + * single SPACE character for element-separating whitespace. * * * * FUTURE CONSIDERATIONS * * * * * When a list element requires quoting or escaping due to a CLOSE BRACKET * character or an internal QUOTE character, a strange formatting mode is - * recommended. For example, if the value "a{b]c}d" is converted by the usual - * modes: + * recommended. For example, if the value "a{b]c}d" is converted by the + * usual modes: * * CONVERT_BRACE: a{b]c}d => {a{b]c}d} * CONVERT_ESCAPE: a{b]c}d => a\{b\]c\}d * - * we get perfectly usable formatted list elements. However, this is not what - * Tcl releases have been producing. Instead, we have: + * we get perfectly usable formatted list elements. However, this is not + * what Tcl releases have been producing. Instead, we have: * * CONVERT_MASK: a{b]c}d => a{b\]c}d * - * where the CLOSE BRACKET is escaped, but the BRACEs are not. The same effect - * can be seen replacing ] with " in this example. There does not appear to be - * any functional or aesthetic purpose for this strange additional mode. The - * sole purpose I can see for preserving it is to keep generating the same - * formatted lists programmers have become accustomed to, and perhaps written - * tests to expect. That is, compatibility only. The additional code - * complexity required to support this mode is significant. The lines of code - * supporting it are delimited in the routines below with #if COMPAT - * directives. This makes it easy to experiment with eliminating this - * formatting mode simply with "#define COMPAT 0" above. I believe this is - * worth considering. - * - * Another consideration is the treatment of QUOTE characters in list - * elements. TclConvertElement() must have the ability to produce the escape - * sequence \" so that when a list element begins with a QUOTE we do not - * confuse that first character with a QUOTE used as list syntax to define - * list structure. However, that is the only place where QUOTE characters need - * quoting. In this way, handling QUOTE could really be much more like the way - * we handle HASH which also needs quoting and escaping only in particular - * situations. Following up this could increase the set of list elements that - * can use the CONVERT_NONE formatting mode. + * where the CLOSE BRACKET is escaped, but the BRACEs are not. The same + * effect can be seen replacing ] with " in this example. There does not + * appear to be any functional or aesthetic purpose for this strange + * additional mode. The sole purpose I can see for preserving it is to + * keep generating the same formatted lists programmers have become accustomed + * to, and perhaps written tests to expect. That is, compatibility only. + * The additional code complexity required to support this mode is significant. + * The lines of code supporting it are delimited in the routines below with + * #if COMPAT directives. This makes it easy to experiment with eliminating + * this formatting mode simply with "#define COMPAT 0" above. I believe + * this is worth considering. + * + * Another consideration is the treatment of QUOTE characters in list elements. + * TclConvertElement() must have the ability to produce the escape sequence + * \" so that when a list element begins with a QUOTE we do not confuse + * that first character with a QUOTE used as list syntax to define list + * structure. However, that is the only place where QUOTE characters need + * quoting. In this way, handling QUOTE could really be much more like + * the way we handle HASH which also needs quoting and escaping only in + * particular situations. Following up this could increase the set of + * list elements that can use the CONVERT_NONE formatting mode. * * More speculative is that the demands of canonical list form require brace * balance for the list as a whole, while the current implementation achieves @@ -375,15 +366,15 @@ static const Tcl_ObjType endOffsetType = { * * Given 'bytes' pointing to 'numBytes' bytes, scan through them and * count the number of whitespace runs that could be list element - * separators. If 'numBytes' is TCL_INDEX_NONE, scan to the terminating - * '\0'. Not a full list parser. Typically used to get a quick and dirty - * overestimate of length size in order to allocate space for an actual - * list parser to operate with. + * separators. If 'numBytes' is -1, scan to the terminating '\0'. + * Not a full list parser. Typically used to get a quick and dirty + * overestimate of length size in order to allocate space for an + * actual list parser to operate with. * * Results: - * Returns the largest number of list elements that could possibly be in - * this string, interpreted as a Tcl list. If 'endPtr' is not NULL, - * writes a pointer to the end of the string scanned there. + * Returns the largest number of list elements that could possibly + * be in this string, interpreted as a Tcl list. If 'endPtr' is not + * NULL, writes a pointer to the end of the string scanned there. * * Side effects: * None. @@ -391,62 +382,47 @@ static const Tcl_ObjType endOffsetType = { *---------------------------------------------------------------------- */ -Tcl_Size +int TclMaxListLength( - const char *bytes, - Tcl_Size numBytes, - const char **endPtr) + CONST char *bytes, + int numBytes, + CONST char **endPtr) { - Tcl_Size count = 0; + int count = 0; - if ((numBytes == 0) || ((numBytes == TCL_INDEX_NONE) && (*bytes == '\0'))) { + if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { /* Empty string case - quick exit */ goto done; } - /* - * No list element before leading white space. - */ - - count += 1 - TclIsSpaceProcM(*bytes); - - /* - * Count white space runs as potential element separators. - */ + /* No list element before leading white space */ + count += 1 - TclIsSpaceProc(*bytes); + /* Count white space runs as potential element separators */ while (numBytes) { - if ((numBytes == TCL_INDEX_NONE) && (*bytes == '\0')) { + if ((numBytes == -1) && (*bytes == '\0')) { break; } - if (TclIsSpaceProcM(*bytes)) { - /* - * Space run started; bump count. - */ - + if (TclIsSpaceProc(*bytes)) { + /* Space run started; bump count */ count++; do { bytes++; - numBytes -= (numBytes != TCL_INDEX_NONE); - } while (numBytes && TclIsSpaceProcM(*bytes)); - if ((numBytes == 0) || ((numBytes == TCL_INDEX_NONE) && (*bytes == '\0'))) { + numBytes -= (numBytes != -1); + } while (numBytes && TclIsSpaceProc(*bytes)); + if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { break; } - - /* - * (*bytes) is non-space; return to counting state. - */ + /* (*bytes) is non-space; return to counting state */ } bytes++; - numBytes -= (numBytes != TCL_INDEX_NONE); + numBytes -= (numBytes != -1); } - /* - * No list element following trailing white space. - */ - - count -= TclIsSpaceProcM(bytes[-1]); + /* No list element following trailing white space */ + count -= TclIsSpaceProc(bytes[-1]); - done: + done: if (endPtr) { *endPtr = bytes; } @@ -473,18 +449,18 @@ TclMaxListLength( * that's part of the element. If this is the last argument in the list, * then *nextPtr will point just after the last character in the list * (i.e., at the character at list+listLength). If sizePtr is non-NULL, - * *sizePtr is filled in with the number of bytes in the element. If the - * element is in braces, then *elementPtr will point to the character + * *sizePtr is filled in with the number of bytes in the element. If + * the element is in braces, then *elementPtr will point to the character * after the opening brace and *sizePtr will not include either of the * braces. If there isn't an element in the list, *sizePtr will be zero, * and both *elementPtr and *nextPtr will point just after the last * character in the list. If literalPtr is non-NULL, *literalPtr is set - * to a boolean value indicating whether the substring returned as the - * values of **elementPtr and *sizePtr is the literal value of a list - * element. If not, a call to TclCopyAndCollapse() is needed to produce - * the actual value of the list element. Note: this function does NOT - * collapse backslash sequences, but uses *literalPtr to tell callers - * when it is required for them to do so. + * to a boolean value indicating whether the substring returned as + * the values of **elementPtr and *sizePtr is the literal value of + * a list element. If not, a call to TclCopyAndCollapse() is needed + * to produce the actual value of the list element. Note: this function + * does NOT collapse backslash sequences, but uses *literalPtr to tell + * callers when it is required for them to do so. * * Side effects: * None. @@ -497,16 +473,16 @@ TclFindElement( Tcl_Interp *interp, /* Interpreter to use for error reporting. If * NULL, then no error message is left after * errors. */ - const char *list, /* Points to the first byte of a string + CONST char *list, /* Points to the first byte of a string * containing a Tcl list with zero or more * elements (possibly in braces). */ - Tcl_Size listLength, /* Number of bytes in the list's string. */ - const char **elementPtr, /* Where to put address of first significant + int listLength, /* Number of bytes in the list's string. */ + CONST char **elementPtr, /* Where to put address of first significant * character in first element of list. */ - const char **nextPtr, /* Fill in with location of character just + CONST char **nextPtr, /* Fill in with location of character just * after all white space following end of * argument (next arg or end of list). */ - Tcl_Size *sizePtr, /* If non-zero, fill in with size of + int *sizePtr, /* If non-zero, fill in with size of * element. */ int *literalPtr) /* If non-zero, fill in with non-zero/zero to * indicate that the substring of *sizePtr @@ -515,85 +491,24 @@ TclFindElement( * does not/does require a call to * TclCopyAndCollapse() by the caller. */ { - return FindElement(interp, list, listLength, "list", "LIST", elementPtr, - nextPtr, sizePtr, literalPtr); -} - -int -TclFindDictElement( - Tcl_Interp *interp, /* Interpreter to use for error reporting. If - * NULL, then no error message is left after - * errors. */ - const char *dict, /* Points to the first byte of a string - * containing a Tcl dictionary with zero or - * more keys and values (possibly in - * braces). */ - Tcl_Size dictLength, /* Number of bytes in the dict's string. */ - const char **elementPtr, /* Where to put address of first significant - * character in the first element (i.e., key - * or value) of dict. */ - const char **nextPtr, /* Fill in with location of character just - * after all white space following end of - * element (next arg or end of list). */ - Tcl_Size *sizePtr, /* If non-zero, fill in with size of - * element. */ - int *literalPtr) /* If non-zero, fill in with non-zero/zero to - * indicate that the substring of *sizePtr - * bytes starting at **elementPtr is/is not - * the literal key or value and therefore - * does not/does require a call to - * TclCopyAndCollapse() by the caller. */ -{ - return FindElement(interp, dict, dictLength, "dict", "DICTIONARY", - elementPtr, nextPtr, sizePtr, literalPtr); -} - -static int -FindElement( - Tcl_Interp *interp, /* Interpreter to use for error reporting. If - * NULL, then no error message is left after - * errors. */ - const char *string, /* Points to the first byte of a string - * containing a Tcl list or dictionary with - * zero or more elements (possibly in - * braces). */ - Tcl_Size stringLength, /* Number of bytes in the string. */ - const char *typeStr, /* The name of the type of thing we are - * parsing, for error messages. */ - const char *typeCode, /* The type code for thing we are parsing, for - * error messages. */ - const char **elementPtr, /* Where to put address of first significant - * character in first element. */ - const char **nextPtr, /* Fill in with location of character just - * after all white space following end of - * argument (next arg or end of list/dict). */ - Tcl_Size *sizePtr, /* If non-zero, fill in with size of - * element. */ - int *literalPtr) /* If non-zero, fill in with non-zero/zero to - * indicate that the substring of *sizePtr - * bytes starting at **elementPtr is/is not - * the literal list/dict element and therefore - * does not/does require a call to - * TclCopyAndCollapse() by the caller. */ -{ - const char *p = string; - const char *elemStart; /* Points to first byte of first element. */ - const char *limit; /* Points just after list/dict's last byte. */ - Tcl_Size openBraces = 0; /* Brace nesting level during parse. */ + CONST char *p = list; + CONST char *elemStart; /* Points to first byte of first element. */ + CONST char *limit; /* Points just after list's last byte. */ + int openBraces = 0; /* Brace nesting level during parse. */ int inQuotes = 0; - Tcl_Size size = 0; - Tcl_Size numChars; + int size = 0; /* lint. */ + int numChars; int literal = 1; - const char *p2; + CONST char *p2; /* * Skim off leading white space and check for an opening brace or quote. - * We treat embedded NULLs in the list/dict as bytes belonging to a list - * element (or dictionary key or value). + * We treat embedded NULLs in the list as bytes belonging to a list + * element. */ - limit = (string + stringLength); - while ((p < limit) && (TclIsSpaceProcM(*p))) { + limit = (list + listLength); + while ((p < limit) && (TclIsSpaceProc(*p))) { p++; } if (p == limit) { /* no element found */ @@ -638,7 +553,7 @@ FindElement( } else if (openBraces == 1) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProcM(*p)) { + if ((p >= limit) || TclIsSpaceProc(*p)) { goto done; } @@ -648,15 +563,13 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) + while ((p2 < limit) && (!TclIsSpaceProc(*p2)) && (p2 < p+20)) { p2++; } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "%s element in braces followed by \"%.*s\" " - "instead of space", typeStr, (int) (p2-p), p)); - Tcl_SetErrorCode(interp, "TCL", "VALUE", typeCode, "JUNK", - (void *)NULL); + "list element in braces followed by \"%.*s\" " + "instead of space", (int) (p2-p), p)); } return TCL_ERROR; } @@ -672,10 +585,9 @@ FindElement( /* * A backslash sequence not within a brace quoted element * means the value of the element is different from the - * substring we are parsing. A call to TclCopyAndCollapse() is - * needed to produce the element value. Inform the caller. + * substring we are parsing. A call to TclCopyAndCollapse() + * is needed to produce the element value. Inform the caller. */ - literal = 0; } TclParseBackslash(p, limit - p, &numChars, NULL); @@ -683,6 +595,23 @@ FindElement( break; /* + * Space: ignore if element is in braces or quotes; otherwise + * terminate element. + */ + + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + if ((openBraces == 0) && !inQuotes) { + size = (p - elemStart); + goto done; + } + break; + + /* * Double-quote: if element is in quotes then terminate it. */ @@ -690,7 +619,7 @@ FindElement( if (inQuotes) { size = (p - elemStart); p++; - if ((p >= limit) || TclIsSpaceProcM(*p)) { + if ((p >= limit) || TclIsSpaceProc(*p)) { goto done; } @@ -700,56 +629,36 @@ FindElement( if (interp != NULL) { p2 = p; - while ((p2 < limit) && (!TclIsSpaceProcM(*p2)) + while ((p2 < limit) && (!TclIsSpaceProc(*p2)) && (p2 < p+20)) { p2++; } Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "%s element in quotes followed by \"%.*s\" " - "instead of space", typeStr, (int) (p2-p), p)); - Tcl_SetErrorCode(interp, "TCL", "VALUE", typeCode, "JUNK", - (void *)NULL); + "list element in quotes followed by \"%.*s\" " + "instead of space", (int) (p2-p), p)); } return TCL_ERROR; } break; - - default: - if (TclIsSpaceProcM(*p)) { - /* - * Space: ignore if element is in braces or quotes; - * otherwise terminate element. - */ - if ((openBraces == 0) && !inQuotes) { - size = (p - elemStart); - goto done; - } - } - break; - } p++; } /* - * End of list/dict: terminate element. + * End of list: terminate element. */ if (p == limit) { if (openBraces != 0) { if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "unmatched open brace in %s", typeStr)); - Tcl_SetErrorCode(interp, "TCL", "VALUE", typeCode, "BRACE", - (void *)NULL); + Tcl_SetResult(interp, "unmatched open brace in list", + TCL_STATIC); } return TCL_ERROR; } else if (inQuotes) { if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "unmatched open quote in %s", typeStr)); - Tcl_SetErrorCode(interp, "TCL", "VALUE", typeCode, "QUOTE", - (void *)NULL); + Tcl_SetResult(interp, "unmatched open quote in list", + TCL_STATIC); } return TCL_ERROR; } @@ -757,7 +666,7 @@ FindElement( } done: - while ((p < limit) && (TclIsSpaceProcM(*p))) { + while ((p < limit) && (TclIsSpaceProc(*p))) { p++; } *elementPtr = elemStart; @@ -780,9 +689,9 @@ FindElement( * * Results: * Count bytes get copied from src to dst. Along the way, backslash - * sequences are substituted in the copy. After scanning count bytes from - * src, a null character is placed at the end of dst. Returns the number - * of bytes that got written to dst. + * sequences are substituted in the copy. After scanning count bytes + * from src, a null character is placed at the end of dst. Returns + * the number of bytes that got written to dst. * * Side effects: * None. @@ -790,23 +699,20 @@ FindElement( *---------------------------------------------------------------------- */ -Tcl_Size +int TclCopyAndCollapse( - Tcl_Size count, /* Number of byte to copy from src. */ - const char *src, /* Copy from here... */ + int count, /* Number of byte to copy from src. */ + CONST char *src, /* Copy from here... */ char *dst) /* ... to here. */ { - Tcl_Size newCount = 0; + int newCount = 0; while (count > 0) { char c = *src; - if (c == '\\') { - char buf[4] = ""; - Tcl_Size numRead; - Tcl_Size backslashCount = TclParseBackslash(src, count, &numRead, buf); + int numRead; + int backslashCount = TclParseBackslash(src, count, &numRead, dst); - memcpy(dst, buf, backslashCount); dst += backslashCount; newCount += backslashCount; src += numRead; @@ -851,62 +757,60 @@ TclCopyAndCollapse( *---------------------------------------------------------------------- */ -#undef Tcl_SplitList int Tcl_SplitList( Tcl_Interp *interp, /* Interpreter to use for error reporting. If * NULL, no error message is left. */ - const char *list, /* Pointer to string with list structure. */ - Tcl_Size *argcPtr, /* Pointer to location to fill in with the + CONST char *list, /* Pointer to string with list structure. */ + int *argcPtr, /* Pointer to location to fill in with the * number of elements in the list. */ - const char ***argvPtr) /* Pointer to place to store pointer to array + CONST char ***argvPtr) /* Pointer to place to store pointer to array * of pointers to list elements. */ { - const char **argv, *end, *element; + CONST char **argv, *end, *element; char *p; - int result; - Tcl_Size length, size, i, elSize; + int length, size, i, result, elSize; /* - * Allocate enough space to work in. A (const char *) for each (possible) - * list element plus one more for terminating NULL, plus as many bytes as - * in the original string value, plus one more for a terminating '\0'. - * Space used to hold element separating white space in the original - * string gets re-purposed to hold '\0' characters in the argv array. + * Allocate enough space to work in. A (CONST char *) for each + * (possible) list element plus one more for terminating NULL, + * plus as many bytes as in the original string value, plus one + * more for a terminating '\0'. Space used to hold element separating + * white space in the original string gets re-purposed to hold '\0' + * characters in the argv array. */ - size = TclMaxListLength(list, TCL_INDEX_NONE, &end) + 1; + size = TclMaxListLength(list, -1, &end) + 1; length = end - list; - argv = (const char **)ckalloc((size * sizeof(char *)) + length + 1); + argv = (CONST char **) ckalloc((unsigned) + ((size * sizeof(char *)) + length + 1)); for (i = 0, p = ((char *) argv) + size*sizeof(char *); *list != 0; i++) { - const char *prevList = list; + CONST char *prevList = list; int literal; result = TclFindElement(interp, list, length, &element, &list, &elSize, &literal); length -= (list - prevList); if (result != TCL_OK) { - ckfree(argv); + ckfree((char *) argv); return result; } if (*element == 0) { break; } if (i >= size) { - ckfree(argv); + ckfree((char *) argv); if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "internal error in Tcl_SplitList", -1)); - Tcl_SetErrorCode(interp, "TCL", "INTERNAL", "Tcl_SplitList", - (void *)NULL); + Tcl_SetResult(interp, "internal error in Tcl_SplitList", + TCL_STATIC); } return TCL_ERROR; } argv[i] = p; if (literal) { - memcpy(p, element, elSize); + memcpy(p, element, (size_t) elSize); p += elSize; *p = 0; p++; @@ -931,9 +835,9 @@ Tcl_SplitList( * enclosing braces) to make the string into a valid Tcl list element. * * Results: - * The return value is an overestimate of the number of bytes that will - * be needed by Tcl_ConvertElement to produce a valid list element from - * src. The word at *flagPtr is filled in with a value needed by + * The return value is an overestimate of the number of bytes that + * will be needed by Tcl_ConvertElement to produce a valid list element + * from src. The word at *flagPtr is filled in with a value needed by * Tcl_ConvertElement when doing the actual conversion. * * Side effects: @@ -942,13 +846,13 @@ Tcl_SplitList( *---------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ScanElement( - const char *src, /* String to convert to list element. */ - int *flagPtr) /* Where to store information to guide - * Tcl_ConvertCountedElement. */ + register CONST char *src, /* String to convert to list element. */ + register int *flagPtr) /* Where to store information to guide + * Tcl_ConvertCountedElement. */ { - return Tcl_ScanCountedElement(src, TCL_INDEX_NONE, flagPtr); + return Tcl_ScanCountedElement(src, -1, flagPtr); } /* @@ -959,14 +863,14 @@ Tcl_ScanElement( * This function is a companion function to Tcl_ConvertCountedElement. It * scans a string to see what needs to be done to it (e.g. add * backslashes or enclosing braces) to make the string into a valid Tcl - * list element. If length is TCL_INDEX_NONE, then the string is scanned - * from src up to the first null byte. + * list element. If length is -1, then the string is scanned from src up + * to the first null byte. * * Results: - * The return value is an overestimate of the number of bytes that will - * be needed by Tcl_ConvertCountedElement to produce a valid list element - * from src. The word at *flagPtr is filled in with a value needed by - * Tcl_ConvertCountedElement when doing the actual conversion. + * The return value is an overestimate of the number of bytes that + * will be needed by Tcl_ConvertCountedElement to produce a valid list + * element from src. The word at *flagPtr is filled in with a value + * needed by Tcl_ConvertCountedElement when doing the actual conversion. * * Side effects: * None. @@ -974,15 +878,15 @@ Tcl_ScanElement( *---------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ScanCountedElement( - const char *src, /* String to convert to Tcl list element. */ - Tcl_Size length, /* Number of bytes in src, or TCL_INDEX_NONE. */ + CONST char *src, /* String to convert to Tcl list element. */ + int length, /* Number of bytes in src, or -1. */ int *flagPtr) /* Where to store information to guide * Tcl_ConvertElement. */ { - char flags = CONVERT_ANY; - Tcl_Size numBytes = TclScanElement(src, length, &flags); + int flags = CONVERT_ANY; + int numBytes = TclScanElement(src, length, &flags); *flagPtr = flags; return numBytes; @@ -993,24 +897,24 @@ Tcl_ScanCountedElement( * * TclScanElement -- * - * This function is a companion function to TclConvertElement. It scans a - * string to see what needs to be done to it (e.g. add backslashes or - * enclosing braces) to make the string into a valid Tcl list element. If - * length is TCL_INDEX_NONE, then the string is scanned from src up to the first null - * byte. A NULL value for src is treated as an empty string. The incoming - * value of *flagPtr is a report from the caller what additional flags it - * will pass to TclConvertElement(). + * This function is a companion function to TclConvertElement. It + * scans a string to see what needs to be done to it (e.g. add + * backslashes or enclosing braces) to make the string into a valid Tcl + * list element. If length is -1, then the string is scanned from src up + * to the first null byte. A NULL value for src is treated as an + * empty string. The incoming value of *flagPtr is a report from the + * caller what additional flags it will pass to TclConvertElement(). * * Results: - * The recommended formatting mode for the element is determined and a - * value is written to *flagPtr indicating that recommendation. This + * The recommended formatting mode for the element is determined and + * a value is written to *flagPtr indicating that recommendation. This * recommendation is combined with the incoming flag values in *flagPtr * set by the caller to determine how many bytes will be needed by * TclConvertElement() in which to write the formatted element following - * the recommendation modified by the flag values. This number of bytes - * is the return value of the routine. In some situations it may be an - * overestimate, but so long as the caller passes the same flags to - * TclConvertElement(), it will be large enough. + * the recommendation modified by the flag values. This number of bytes + * is the return value of the routine. In some situations it may be + * an overestimate, but so long as the caller passes the same flags + * to TclConvertElement(), it will be large enough. * * Side effects: * None. @@ -1018,93 +922,69 @@ Tcl_ScanCountedElement( *---------------------------------------------------------------------- */ -TCL_HASH_TYPE +int TclScanElement( - const char *src, /* String to convert to Tcl list element. */ - Tcl_Size length, /* Number of bytes in src, or TCL_INDEX_NONE. */ - char *flagPtr) /* Where to store information to guide + CONST char *src, /* String to convert to Tcl list element. */ + int length, /* Number of bytes in src, or -1. */ + int *flagPtr) /* Where to store information to guide * Tcl_ConvertElement. */ { - const char *p = src; - Tcl_Size nestingLevel = 0; /* Brace nesting count */ + CONST char *p = src; + int nestingLevel = 0; /* Brace nesting count */ int forbidNone = 0; /* Do not permit CONVERT_NONE mode. Something - * needs protection or escape. */ + needs protection or escape. */ int requireEscape = 0; /* Force use of CONVERT_ESCAPE mode. For some * reason bare or brace-quoted form fails. */ - Tcl_Size extra = 0; /* Count of number of extra bytes needed for + int extra = 0; /* Count of number of extra bytes needed for * formatted element, assuming we use escape * sequences in formatting. */ - TCL_HASH_TYPE bytesNeeded; /* Buffer length computed to complete the + int bytesNeeded; /* Buffer length computed to complete the * element formatting in the selected mode. */ #if COMPAT int preferEscape = 0; /* Use preferences to track whether to use */ int preferBrace = 0; /* CONVERT_MASK mode. */ int braceCount = 0; /* Count of all braces '{' '}' seen. */ -#endif /* COMPAT */ - - if ((p == NULL) || (length == 0) || ((*p == '\0') && (length == TCL_INDEX_NONE))) { - /* - * Empty string element must be brace quoted. - */ +#endif + if ((p == NULL) || (length == 0) || ((*p == '\0') && (length == -1))) { + /* Empty string element must be brace quoted. */ *flagPtr = CONVERT_BRACE; return 2; } -#if COMPAT - /* - * We have an established history in TclConvertElement() when quoting - * because of a leading hash character to force what would be the - * CONVERT_MASK mode into the CONVERT_BRACE mode. That is, we format - * the element #{a"b} like this: - * {#{a"b}} - * and not like this: - * \#{a\"b} - * This is inconsistent with [list x{a"b}], but we will not change that now. - * Set that preference here so that we compute a tight size requirement. - */ - if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { - preferBrace = 1; - } -#endif - if ((*p == '{') || (*p == '"')) { /* * Must escape or protect so leading character of value is not * misinterpreted as list element delimiting syntax. */ - forbidNone = 1; #if COMPAT preferBrace = 1; -#endif /* COMPAT */ +#endif } while (length) { - if (CHAR_TYPE(*p) != TYPE_NORMAL) { switch (*p) { - case '{': /* TYPE_BRACE */ + case '{': #if COMPAT braceCount++; -#endif /* COMPAT */ +#endif extra++; /* Escape '{' => '\{' */ nestingLevel++; break; - case '}': /* TYPE_BRACE */ + case '}': #if COMPAT braceCount++; -#endif /* COMPAT */ +#endif extra++; /* Escape '}' => '\}' */ - if (nestingLevel-- < 1) { - /* - * Unbalanced braces! Cannot format with brace quoting. - */ - + nestingLevel--; + if (nestingLevel < 0) { + /* Unbalanced braces! Cannot format with brace quoting. */ requireEscape = 1; } break; - case ']': /* TYPE_CLOSE_BRACK */ - case '"': /* TYPE_SPACE */ + case ']': + case '"': #if COMPAT forbidNone = 1; extra++; /* Escapes all just prepend a backslash */ @@ -1112,33 +992,32 @@ TclScanElement( break; #else /* FLOW THROUGH */ -#endif /* COMPAT */ - case '[': /* TYPE_SUBS */ - case '$': /* TYPE_SUBS */ - case ';': /* TYPE_COMMAND_END */ +#endif + case '[': + case '$': + case ';': + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': forbidNone = 1; extra++; /* Escape sequences all one byte longer. */ #if COMPAT preferBrace = 1; -#endif /* COMPAT */ +#endif break; - case '\\': /* TYPE_SUBS */ + case '\\': extra++; /* Escape '\' => '\\' */ - if ((length == 1) || ((length == TCL_INDEX_NONE) && (p[1] == '\0'))) { - /* - * Final backslash. Cannot format with brace quoting. - */ - + if ((length == 1) || ((length == -1) && (p[1] == '\0'))) { + /* Final backslash. Cannot format with brace quoting. */ requireEscape = 1; break; } if (p[1] == '\n') { extra++; /* Escape newline => '\n', one byte longer */ - - /* - * Backslash newline sequence. Brace quoting not permitted. - */ - + /* Backslash newline sequence. Brace quoting not permitted. */ requireEscape = 1; length -= (length > 0); p++; @@ -1152,56 +1031,35 @@ TclScanElement( forbidNone = 1; #if COMPAT preferBrace = 1; -#endif /* COMPAT */ +#endif break; - case '\0': /* TYPE_SUBS */ - if (length == TCL_INDEX_NONE) { + case '\0': + if (length == -1) { goto endOfString; } /* TODO: Panic on improper encoding? */ break; - default: - if (TclIsSpaceProcM(*p)) { - forbidNone = 1; - extra++; /* Escape sequences all one byte longer. */ -#if COMPAT - preferBrace = 1; -#endif - } - break; } - } length -= (length > 0); p++; } - endOfString: + endOfString: if (nestingLevel != 0) { - /* - * Unbalanced braces! Cannot format with brace quoting. - */ - + /* Unbalanced braces! Cannot format with brace quoting. */ requireEscape = 1; } - /* - * We need at least as many bytes as are in the element value... - */ - + /* We need at least as many bytes as are in the element value... */ bytesNeeded = p - src; if (requireEscape) { /* - * We must use escape sequences. Add all the extra bytes needed to - * have room to create them. + * We must use escape sequences. Add all the extra bytes needed + * to have room to create them. */ - bytesNeeded += extra; - - /* - * Make room to escape leading #, if needed. - */ - + /* Make room to escape leading #, if needed. */ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { bytesNeeded++; } @@ -1211,13 +1069,12 @@ TclScanElement( if (*flagPtr & CONVERT_ANY) { /* * The caller has not let us know what flags it will pass to - * TclConvertElement() so compute the max size we might need for any - * possible choice. Normally the formatting using escape sequences is - * the longer one, and a minimum "extra" value of 2 makes sure we - * don't request too small a buffer in those edge cases where that's - * not true. + * TclConvertElement() so compute the max size we might need for + * any possible choice. Normally the formatting using escape + * sequences is the longer one, and a minimum "extra" value of 2 + * makes sure we don't request too small a buffer in those edge + * cases where that's not true. */ - if (extra < 2) { extra = 2; } @@ -1225,79 +1082,60 @@ TclScanElement( *flagPtr |= TCL_DONT_USE_BRACES; } if (forbidNone) { - /* - * We must request some form of quoting of escaping... - */ - + /* We must request some form of quoting of escaping... */ #if COMPAT if (preferEscape && !preferBrace) { /* - * If we are quoting solely due to ] or internal " characters use - * the CONVERT_MASK mode where we escape all special characters - * except for braces. "extra" counted space needed to escape - * braces too, so subtract "braceCount" to get our actual needs. + * If we are quoting solely due to ] or internal " characters + * use the CONVERT_MASK mode where we escape all special + * characters except for braces. "extra" counted space needed + * to escape braces too, so substract "braceCount" to get our + * actual needs. */ - bytesNeeded += (extra - braceCount); /* Make room to escape leading #, if needed. */ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { bytesNeeded++; } - /* * If the caller reports it will direct TclConvertElement() to * use full escapes on the element, add back the bytes needed to * escape the braces. */ - if (*flagPtr & TCL_DONT_USE_BRACES) { bytesNeeded += braceCount; } *flagPtr = CONVERT_MASK; goto overflowCheck; } -#endif /* COMPAT */ +#endif if (*flagPtr & TCL_DONT_USE_BRACES) { /* * If the caller reports it will direct TclConvertElement() to * use escapes, add the extra bytes needed to have room for them. */ - bytesNeeded += extra; - - /* - * Make room to escape leading #, if needed. - */ - + /* Make room to escape leading #, if needed. */ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { bytesNeeded++; } } else { - /* - * Add 2 bytes for room for the enclosing braces. - */ - + /* Add 2 bytes for room for the enclosing braces. */ bytesNeeded += 2; } *flagPtr = CONVERT_BRACE; goto overflowCheck; } - /* - * So far, no need to quote or escape anything. - */ - + /* So far, no need to quote or escape anything. */ if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { - /* - * If we need to quote a leading #, make room to enclose in braces. - */ - + /* If we need to quote a leading #, make room to enclose in braces. */ bytesNeeded += 2; } *flagPtr = CONVERT_NONE; - overflowCheck: - if (bytesNeeded > INT_MAX) { + overflowCheck: + if (bytesNeeded < 0) { Tcl_Panic("TclScanElement: string length overflow"); } return bytesNeeded; @@ -1324,13 +1162,13 @@ TclScanElement( *---------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ConvertElement( - const char *src, /* Source information for list element. */ - char *dst, /* Place to put list-ified element. */ - int flags) /* Flags produced by Tcl_ScanElement. */ + register CONST char *src, /* Source information for list element. */ + register char *dst, /* Place to put list-ified element. */ + register int flags) /* Flags produced by Tcl_ScanElement. */ { - return Tcl_ConvertCountedElement(src, TCL_INDEX_NONE, dst, flags); + return Tcl_ConvertCountedElement(src, -1, dst, flags); } /* @@ -1354,14 +1192,14 @@ Tcl_ConvertElement( *---------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ConvertCountedElement( - const char *src, /* Source information for list element. */ - Tcl_Size length, /* Number of bytes in src, or TCL_INDEX_NONE. */ + register CONST char *src, /* Source information for list element. */ + int length, /* Number of bytes in src, or -1. */ char *dst, /* Place to put list-ified element. */ int flags) /* Flags produced by Tcl_ScanElement. */ { - Tcl_Size numBytes = TclConvertElement(src, length, dst, flags); + int numBytes = TclConvertElement(src, length, dst, flags); dst[numBytes] = '\0'; return numBytes; } @@ -1371,9 +1209,9 @@ Tcl_ConvertCountedElement( * * TclConvertElement -- * - * This is a companion function to TclScanElement. Given the information - * produced by TclScanElement, this function converts a string to a list - * element equal to that string. + * This is a companion function to TclScanElement. Given the + * information produced by TclScanElement, this function converts + * a string to a list element equal to that string. * * Results: * Information is copied to *dst in the form of a list element identical @@ -1387,38 +1225,28 @@ Tcl_ConvertCountedElement( *---------------------------------------------------------------------- */ -Tcl_Size -TclConvertElement( - const char *src, /* Source information for list element. */ - Tcl_Size length, /* Number of bytes in src, or TCL_INDEX_NONE. */ +int TclConvertElement( + register CONST char *src, /* Source information for list element. */ + int length, /* Number of bytes in src, or -1. */ char *dst, /* Place to put list-ified element. */ int flags) /* Flags produced by Tcl_ScanElement. */ { int conversion = flags & CONVERT_MASK; char *p = dst; - /* - * Let the caller demand we use escape sequences rather than braces. - */ - + /* Let the caller demand we use escape sequences rather than braces. */ if ((flags & TCL_DONT_USE_BRACES) && (conversion & CONVERT_BRACE)) { conversion = CONVERT_ESCAPE; } - /* - * No matter what the caller demands, empty string must be braced! - */ - - if ((src == NULL) || (length == 0) || (*src == '\0' && length == TCL_INDEX_NONE)) { - p[0] = '{'; - p[1] = '}'; - return 2; + /* No matter what the caller demands, empty string must be braced! */ + if ((src == NULL) || (length == 0) || ((*src == '\0') && (length == -1))) { + src = tclEmptyStringRep; + length = 0; + conversion = CONVERT_BRACE; } - /* - * Escape leading hash as needed and requested. - */ - + /* Escape leading hash as needed and requested. */ if ((*src == '#') && !(flags & TCL_DONT_QUOTE_HASH)) { if (conversion == CONVERT_ESCAPE) { p[0] = '\\'; @@ -1431,12 +1259,9 @@ TclConvertElement( } } - /* - * No escape or quoting needed. Copy the literal string value. - */ - + /* No escape or quoting needed. Copy the literal string value. */ if (conversion == CONVERT_NONE) { - if (length == TCL_INDEX_NONE) { + if (length == -1) { /* TODO: INT_MAX overflow? */ while (*src) { *p++ = *src++; @@ -1448,14 +1273,11 @@ TclConvertElement( } } - /* - * Formatted string is original string enclosed in braces. - */ - + /* Formatted string is original string enclosed in braces. */ if (conversion == CONVERT_BRACE) { *p = '{'; p++; - if (length == TCL_INDEX_NONE) { + if (length == -1) { /* TODO: INT_MAX overflow? */ while (*src) { *p++ = *src++; @@ -1466,15 +1288,12 @@ TclConvertElement( } *p = '}'; p++; - return (p - dst); + return p - dst; } /* conversion == CONVERT_ESCAPE or CONVERT_MASK */ - /* - * Formatted string is original string converted to escape sequences. - */ - + /* Formatted string is original string converted to escape sequences. */ for ( ; length; src++, length -= (length > 0)) { switch (*src) { case ']': @@ -1490,12 +1309,13 @@ TclConvertElement( case '{': case '}': #if COMPAT - if (conversion == CONVERT_ESCAPE) -#endif /* COMPAT */ - { + if (conversion == CONVERT_ESCAPE) { +#endif *p = '\\'; p++; +#if COMPAT } +#endif break; case '\f': *p = '\\'; @@ -1528,24 +1348,22 @@ TclConvertElement( p++; continue; case '\0': - if (length == TCL_INDEX_NONE) { - return (p - dst); + if (length == -1) { + return p - dst; } - /* - * If we reach this point, there's an embedded NULL in the string - * range being processed, which should not happen when the - * encoding rules for Tcl strings are properly followed. If the - * day ever comes when we stop tolerating such things, this is - * where to put the Tcl_Panic(). + * If we reach this point, there's an embedded NULL in the + * string range being processed, which should not happen when + * the encoding rules for Tcl strings are properly followed. + * If the day ever comes when we stop tolerating such things, + * this is where to put the Tcl_Panic(). */ - break; } *p = *src; p++; } - return (p - dst); + return p - dst; } /* @@ -1570,25 +1388,21 @@ TclConvertElement( char * Tcl_Merge( - Tcl_Size argc, /* How many strings to merge. */ - const char *const *argv) /* Array of string values. */ + int argc, /* How many strings to merge. */ + CONST char * CONST *argv) /* Array of string values. */ { -#define LOCAL_SIZE 64 - char localFlags[LOCAL_SIZE], *flagPtr = NULL; - Tcl_Size i; - unsigned int bytesNeeded = 0; +# define LOCAL_SIZE 20 + int localFlags[LOCAL_SIZE], *flagPtr = NULL; + int i, bytesNeeded = 0; char *result, *dst; + const int maxFlags = UINT_MAX / sizeof(int); - /* - * Handle empty list case first, so logic of the general case can be - * simpler. - */ - - if (argc <= 0) { - if (argc < 0) { - Tcl_Panic("Tcl_Merge called with negative argc (%d)", argc); - } - result = (char *)ckalloc(1); + if (argc == 0) { + /* + * Handle empty list case first, so logic of the general case + * can be simpler. + */ + result = ckalloc(1); result[0] = '\0'; return result; } @@ -1599,17 +1413,31 @@ Tcl_Merge( if (argc <= LOCAL_SIZE) { flagPtr = localFlags; + } else if (argc > maxFlags) { + /* + * We cannot allocate a large enough flag array to format this + * list in one pass. We could imagine converting this routine + * to a multi-pass implementation, but for sizeof(int) == 4, + * the limit is a max of 2^30 list elements and since each element + * is at least one byte formatted, and requires one byte space + * between it and the next one, that a minimum space requirement + * of 2^31 bytes, which is already INT_MAX. If we tried to format + * a list of > maxFlags elements, we're just going to overflow + * the size limits on the formatted string anyway, so just issue + * that same panic early. + */ + Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } else { - flagPtr = (char *)ckalloc(argc); + flagPtr = (int *) ckalloc((unsigned) argc*sizeof(int)); } for (i = 0; i < argc; i++) { flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); - bytesNeeded += TclScanElement(argv[i], TCL_INDEX_NONE, &flagPtr[i]); - if (bytesNeeded > INT_MAX) { + bytesNeeded += TclScanElement(argv[i], -1, &flagPtr[i]); + if (bytesNeeded < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } } - if (bytesNeeded + argc > INT_MAX + 1U) { + if (bytesNeeded > INT_MAX - argc + 1) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } bytesNeeded += argc; @@ -1618,23 +1446,22 @@ Tcl_Merge( * Pass two: copy into the result area. */ - result = (char *)ckalloc(bytesNeeded); + result = ckalloc((unsigned) bytesNeeded); dst = result; for (i = 0; i < argc; i++) { flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); - dst += TclConvertElement(argv[i], TCL_INDEX_NONE, dst, flagPtr[i]); + dst += TclConvertElement(argv[i], -1, dst, flagPtr[i]); *dst = ' '; dst++; } dst[-1] = 0; if (flagPtr != localFlags) { - ckfree(flagPtr); + ckfree((char *) flagPtr); } return result; } -#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 /* *---------------------------------------------------------------------- * @@ -1656,26 +1483,25 @@ Tcl_Merge( char Tcl_Backslash( - const char *src, /* Points to the backslash character of a + CONST char *src, /* Points to the backslash character of a * backslash sequence. */ int *readPtr) /* Fill in with number of characters read from * src, unless NULL. */ { - char buf[4] = ""; - Tcl_UniChar ch = 0; + char buf[TCL_UTF_MAX]; + Tcl_UniChar ch; Tcl_UtfBackslash(src, readPtr, buf); TclUtfToUniChar(buf, &ch); return (char) ch; } -#endif /* !TCL_NO_DEPRECATED */ /* *---------------------------------------------------------------------- * * TclTrimRight -- - * Takes two counted strings in the Tcl encoding. Conceptually - * finds the sub string (offset) to trim from the right side of the + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the right side of the * first string all characters found in the second string. * * Results: @@ -1687,62 +1513,52 @@ Tcl_Backslash( *---------------------------------------------------------------------- */ -Tcl_Size +int TclTrimRight( const char *bytes, /* String to be trimmed... */ - Tcl_Size numBytes, /* ...and its length in bytes */ - /* Calls to TclUtfToUniChar() in this routine - * rely on (bytes[numBytes] == '\0'). */ + int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ - Tcl_Size numTrim) /* ...and its length in bytes */ - /* Calls to TclUtfToUniChar() in this routine - * rely on (trim[numTrim] == '\0'). */ + int numTrim) /* ...and its length in bytes */ { - const char *pp, *p = bytes + numBytes; - int ch1, ch2; + const char *p = bytes + numBytes; + int pInc; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimRight works only on null-terminated strings"); + } /* Empty strings -> nothing to do */ if ((numBytes == 0) || (numTrim == 0)) { return 0; } - /* - * Outer loop: iterate over string to be trimmed. - */ - + /* Outer loop: iterate over string to be trimmed */ do { + Tcl_UniChar ch1; const char *q = trim; - Tcl_Size pInc = 0, bytesLeft = numTrim; + int bytesLeft = numTrim; - pp = Tcl_UtfPrev(p, bytes); - do { - pp += pInc; - pInc = TclUtfToUniChar(pp, &ch1); - } while (pp + pInc < p); - - /* - * Inner loop: scan trim string for match to current character. - */ + p = Tcl_UtfPrev(p, bytes); + pInc = TclUtfToUniChar(p, &ch1); + /* Inner loop: scan trim string for match to current character */ do { - pInc = TclUtfToUniChar(q, &ch2); + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { break; } - q += pInc; - bytesLeft -= pInc; + q += qInc; + bytesLeft -= qInc; } while (bytesLeft); if (bytesLeft == 0) { - /* - * No match; trim task done; *p is last non-trimmed char. - */ - + /* No match; trim task done; *p is last non-trimmed char */ + p += pInc; break; } - p = pp; } while (p > bytes); return numBytes - (p - bytes); @@ -1752,9 +1568,8 @@ TclTrimRight( *---------------------------------------------------------------------- * * TclTrimLeft -- - * - * Takes two counted strings in the Tcl encoding. Conceptually - * finds the sub string (offset) to trim from the left side of the + * Takes two counted strings in the Tcl encoding which must both be + * null terminated. Conceptually trims from the left side of the * first string all characters found in the second string. * * Results: @@ -1766,40 +1581,35 @@ TclTrimRight( *---------------------------------------------------------------------- */ -Tcl_Size +int TclTrimLeft( const char *bytes, /* String to be trimmed... */ - Tcl_Size numBytes, /* ...and its length in bytes */ - /* Calls to TclUtfToUniChar() in this routine - * rely on (bytes[numBytes] == '\0'). */ + int numBytes, /* ...and its length in bytes */ const char *trim, /* String of trim characters... */ - Tcl_Size numTrim) /* ...and its length in bytes */ - /* Calls to TclUtfToUniChar() in this routine - * rely on (trim[numTrim] == '\0'). */ + int numTrim) /* ...and its length in bytes */ { const char *p = bytes; - int ch1, ch2; + + if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { + Tcl_Panic("TclTrimLeft works only on null-terminated strings"); + } /* Empty strings -> nothing to do */ if ((numBytes == 0) || (numTrim == 0)) { return 0; } - /* - * Outer loop: iterate over string to be trimmed. - */ - + /* Outer loop: iterate over string to be trimmed */ do { - Tcl_Size pInc = TclUtfToUniChar(p, &ch1); + Tcl_UniChar ch1; + int pInc = TclUtfToUniChar(p, &ch1); const char *q = trim; - Tcl_Size bytesLeft = numTrim; - - /* - * Inner loop: scan trim string for match to current character. - */ + int bytesLeft = numTrim; + /* Inner loop: scan trim string for match to current character */ do { - Tcl_Size qInc = TclUtfToUniChar(q, &ch2); + Tcl_UniChar ch2; + int qInc = TclUtfToUniChar(q, &ch2); if (ch1 == ch2) { break; @@ -1810,16 +1620,13 @@ TclTrimLeft( } while (bytesLeft); if (bytesLeft == 0) { - /* - * No match; trim task done; *p is first non-trimmed char. - */ - + /* No match; trim task done; *p is first non-trimmed char */ break; } p += pInc; numBytes -= pInc; - } while (numBytes > 0); + } while (numBytes); return p - bytes; } @@ -1827,62 +1634,6 @@ TclTrimLeft( /* *---------------------------------------------------------------------- * - * TclTrim -- - * Finds the sub string (offset) to trim from both sides of the - * first string all characters found in the second string. - * - * Results: - * The number of bytes to be removed from the start of the string - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -Tcl_Size -TclTrim( - const char *bytes, /* String to be trimmed... */ - Tcl_Size numBytes, /* ...and its length in bytes */ - /* Calls in this routine - * rely on (bytes[numBytes] == '\0'). */ - const char *trim, /* String of trim characters... */ - Tcl_Size numTrim, /* ...and its length in bytes */ - /* Calls in this routine - * rely on (trim[numTrim] == '\0'). */ - Tcl_Size *trimRightPtr) /* Offset from the end of the string. */ -{ - Tcl_Size trimLeft = 0, trimRight = 0; - - /* Empty strings -> nothing to do */ - if ((numBytes > 0) && (numTrim > 0)) { - - /* When bytes is NUL-terminated, returns 0 <= trimLeft <= numBytes */ - trimLeft = TclTrimLeft(bytes, numBytes, trim, numTrim); - numBytes -= trimLeft; - - /* If we did not trim the whole string, it starts with a character - * that we will not trim. Skip over it. */ - if (numBytes > 0) { - int ch; - const char *first = bytes + trimLeft; - bytes += TclUtfToUniChar(first, &ch); - numBytes -= (bytes - first); - - if (numBytes > 0) { - /* When bytes is NUL-terminated, returns - * 0 <= trimRight <= numBytes */ - trimRight = TclTrimRight(bytes, numBytes, trim, numTrim); - } - } - } - *trimRightPtr = trimRight; - return trimLeft; -} - -/* - *---------------------------------------------------------------------- - * * Tcl_Concat -- * * Concatenate a set of strings into a single large string. @@ -1900,83 +1651,72 @@ TclTrim( */ /* The whitespace characters trimmed during [concat] operations */ -#define CONCAT_WS_SIZE (int) (sizeof(CONCAT_TRIM_SET "") - 1) +#define CONCAT_WS " \f\v\r\t\n" +#define CONCAT_WS_SIZE (int) (sizeof(CONCAT_WS "") - 1) char * Tcl_Concat( - Tcl_Size argc, /* Number of strings to concatenate. */ - const char *const *argv) /* Array of strings to concatenate. */ + int argc, /* Number of strings to concatenate. */ + CONST char * CONST *argv) /* Array of strings to concatenate. */ { - Tcl_Size i, needSpace = 0, bytesNeeded = 0; + int i, needSpace = 0, bytesNeeded = 0; char *result, *p; - /* - * Dispose of the empty result corner case first to simplify later code. - */ - + /* Dispose of the empty result corner case first to simplify later code */ if (argc == 0) { result = (char *) ckalloc(1); result[0] = '\0'; return result; } - /* - * First allocate the result buffer at the size required. - */ - + /* First allocate the result buffer at the size required */ for (i = 0; i < argc; i++) { bytesNeeded += strlen(argv[i]); if (bytesNeeded < 0) { Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); } } - - /* - * All element bytes + (argc - 1) spaces + 1 terminating NULL. - */ if (bytesNeeded + argc - 1 < 0) { /* - * Panic test could be tighter, but not going to bother for this - * legacy routine. + * Panic test could be tighter, but not going to bother for + * this legacy routine. */ - Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); } - - result = (char *)ckalloc(bytesNeeded + argc); + /* All element bytes + (argc - 1) spaces + 1 terminating NULL */ + result = (char *) ckalloc((unsigned) (bytesNeeded + argc)); for (p = result, i = 0; i < argc; i++) { - Tcl_Size triml, trimr, elemLength; + int trim, elemLength; const char *element; element = argv[i]; elemLength = strlen(argv[i]); - /* Trim away the leading/trailing whitespace. */ - triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE, &trimr); - element += triml; - elemLength -= triml + trimr; - - /* Do not permit trimming to expose a final backslash character. */ - elemLength += trimr && (element[elemLength - 1] == '\\'); + /* Trim away the leading whitespace */ + trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE); + element += trim; + elemLength -= trim; /* - * If we're left with empty element after trimming, do nothing. + * Trim away the trailing whitespace. Do not permit trimming + * to expose a final backslash character. */ + trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE); + trim -= trim && (element[elemLength - trim - 1] == '\\'); + elemLength -= trim; + + /* If we're left with empty element after trimming, do nothing */ if (elemLength == 0) { continue; } - /* - * Append to the result with space if needed. - */ - + /* Append to the result with space if needed */ if (needSpace) { *p++ = ' '; } - memcpy(p, element, elemLength); + memcpy(p, element, (size_t) elemLength); p += elemLength; needSpace = 1; } @@ -2004,11 +1744,10 @@ Tcl_Concat( Tcl_Obj * Tcl_ConcatObj( - Tcl_Size objc, /* Number of objects to concatenate. */ - Tcl_Obj *const objv[]) /* Array of objects to concatenate. */ + int objc, /* Number of objects to concatenate. */ + Tcl_Obj *CONST objv[]) /* Array of objects to concatenate. */ { - int needSpace = 0; - Tcl_Size i, bytesNeeded = 0, elemLength; + int i, elemLength, needSpace = 0, bytesNeeded = 0; const char *element; Tcl_Obj *objPtr, *resPtr; @@ -2019,55 +1758,63 @@ Tcl_ConcatObj( */ for (i = 0; i < objc; i++) { - Tcl_Size length; + int length; objPtr = objv[i]; if (TclListObjIsCanonical(objPtr)) { continue; } - TclGetStringFromObj(objPtr, &length); + Tcl_GetStringFromObj(objPtr, &length); if (length > 0) { break; } } if (i == objc) { + Tcl_Obj **listv; + int listc; + resPtr = NULL; for (i = 0; i < objc; i++) { + /* + * Tcl_ListObjAppendList could be used here, but this saves us a + * bit of type checking (since we've already done it). Use of + * INT_MAX tells us to always put the new stuff on the end. It + * will be set right in Tcl_ListObjReplace. + * Note that all objs at this point are either lists or have an + * empty string rep. + */ + objPtr = objv[i]; - if (!TclListObjIsCanonical(objPtr)) { + if (objPtr->bytes && objPtr->length == 0) { continue; } - if (resPtr) { - Tcl_Obj *elemPtr = NULL; - - Tcl_ListObjIndex(NULL, objPtr, 0, &elemPtr); - if (elemPtr == NULL) { - continue; - } - if (Tcl_GetString(elemPtr)[0] == '#' || TCL_OK - != Tcl_ListObjAppendList(NULL, resPtr, objPtr)) { - /* Abandon ship! */ - Tcl_DecrRefCount(resPtr); - goto slow; + TclListObjGetElements(NULL, objPtr, &listc, &listv); + if (listc) { + if (resPtr) { + if (TCL_OK != Tcl_ListObjReplace(NULL, resPtr, + INT_MAX, 0, listc, listv)) { + /* Abandon ship! */ + Tcl_DecrRefCount(resPtr); + goto slow; + } + } else { + resPtr = TclListObjCopy(NULL, objPtr); } - } else { - resPtr = TclListObjCopy(NULL, objPtr); } } if (!resPtr) { - TclNewObj(resPtr); + resPtr = Tcl_NewObj(); } return resPtr; } - slow: /* * Something cannot be determined to be safe, so build the concatenation * the slow way, using the string representations. - * - * First try to preallocate the size required. */ + slow: + /* First try to pre-allocate the size required */ for (i = 0; i < objc; i++) { element = TclGetStringFromObj(objv[i], &elemLength); bytesNeeded += elemLength; @@ -2075,43 +1822,40 @@ Tcl_ConcatObj( break; } } - /* - * Does not matter if this fails, will simply try later to build up the - * string with each Append reallocating as needed with the usual string - * append algorithm. When that fails it will report the error. + * Does not matter if this fails, will simply try later to build up + * the string with each Append reallocating as needed with the usual + * string append algorithm. When that fails it will report the error. */ - TclNewObj(resPtr); - (void) Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1); + Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1); Tcl_SetObjLength(resPtr, 0); for (i = 0; i < objc; i++) { - Tcl_Size triml, trimr; + int trim; element = TclGetStringFromObj(objv[i], &elemLength); - /* Trim away the leading/trailing whitespace. */ - triml = TclTrim(element, elemLength, CONCAT_TRIM_SET, - CONCAT_WS_SIZE, &trimr); - element += triml; - elemLength -= triml + trimr; - - /* Do not permit trimming to expose a final backslash character. */ - elemLength += trimr && (element[elemLength - 1] == '\\'); + /* Trim away the leading whitespace */ + trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE); + element += trim; + elemLength -= trim; /* - * If we're left with empty element after trimming, do nothing. + * Trim away the trailing whitespace. Do not permit trimming + * to expose a final backslash character. */ + trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE); + trim -= trim && (element[elemLength - trim - 1] == '\\'); + elemLength -= trim; + + /* If we're left with empty element after trimming, do nothing */ if (elemLength == 0) { continue; } - /* - * Append to the result with space if needed. - */ - + /* Append to the result with space if needed */ if (needSpace) { Tcl_AppendToObj(resPtr, " ", 1); } @@ -2121,7 +1865,6 @@ Tcl_ConcatObj( return resPtr; } -#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 /* *---------------------------------------------------------------------- * @@ -2140,16 +1883,15 @@ Tcl_ConcatObj( *---------------------------------------------------------------------- */ -#undef Tcl_StringMatch int Tcl_StringMatch( - const char *str, /* String. */ - const char *pattern) /* Pattern, which may contain special + CONST char *str, /* String. */ + CONST char *pattern) /* Pattern, which may contain special * characters. */ { return Tcl_StringCaseMatch(str, pattern, 0); } -#endif /* TCL_NO_DEPRECATED */ + /* *---------------------------------------------------------------------- * @@ -2171,13 +1913,14 @@ Tcl_StringMatch( int Tcl_StringCaseMatch( - const char *str, /* String. */ - const char *pattern, /* Pattern, which may contain special + CONST char *str, /* String. */ + CONST char *pattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { int p, charLen; - int ch1 = 0, ch2 = 0; + CONST char *pstart = pattern; + Tcl_UniChar ch1, ch2; while (1) { p = *pattern; @@ -2218,10 +1961,10 @@ Tcl_StringCaseMatch( */ if (UCHAR(*pattern) < 0x80) { - ch2 = (int) + ch2 = (Tcl_UniChar) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); } else { - TclUtfToUniChar(pattern, &ch2); + Tcl_UtfToUniChar(pattern, &ch2); if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } @@ -2287,15 +2030,15 @@ Tcl_StringCaseMatch( */ if (p == '[') { - int startChar = 0, endChar = 0; + Tcl_UniChar startChar, endChar; pattern++; if (UCHAR(*str) < 0x80) { - ch1 = (int) + ch1 = (Tcl_UniChar) (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); str++; } else { - str += TclUtfToUniChar(str, &ch1); + str += Tcl_UtfToUniChar(str, &ch1); if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } @@ -2305,11 +2048,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - startChar = (int) (nocase + startChar = (Tcl_UniChar) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += TclUtfToUniChar(pattern, &startChar); + pattern += Tcl_UtfToUniChar(pattern, &startChar); if (nocase) { startChar = Tcl_UniCharToLower(startChar); } @@ -2320,11 +2063,11 @@ Tcl_StringCaseMatch( return 0; } if (UCHAR(*pattern) < 0x80) { - endChar = (int) (nocase + endChar = (Tcl_UniChar) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += TclUtfToUniChar(pattern, &endChar); + pattern += Tcl_UtfToUniChar(pattern, &endChar); if (nocase) { endChar = Tcl_UniCharToLower(endChar); } @@ -2341,13 +2084,10 @@ Tcl_StringCaseMatch( break; } } - /* If we reach here, we matched. Need to move past closing ] */ while (*pattern != ']') { if (*pattern == '\0') { - /* We ran out of pattern after matching something in - * (unclosed!) brackets. So long as we ran out of string - * at the same time, we have a match. Otherwise, not. */ - return (*str == '\0'); + pattern = Tcl_UtfPrev(pattern, pstart); + break; } pattern++; } @@ -2406,13 +2146,12 @@ Tcl_StringCaseMatch( int TclByteArrayMatch( - const unsigned char *string,/* String. */ - Tcl_Size strLen, /* Length of String */ - const unsigned char *pattern, - /* Pattern, which may contain special - * characters. */ - Tcl_Size ptnLen, /* Length of Pattern */ - TCL_UNUSED(int) /*flags*/) + const unsigned char *string, /* String. */ + int strLen, /* Length of String */ + const unsigned char *pattern, /* Pattern, which may contain special + * characters. */ + int ptnLen, /* Length of Pattern */ + int flags) { const unsigned char *stringEnd, *patternEnd; unsigned char p; @@ -2519,7 +2258,6 @@ TclByteArrayMatch( /* * Matches ranges of form [a-z] or [z-a]. */ - break; } } else if (startChar == ch1) { @@ -2566,9 +2304,9 @@ TclByteArrayMatch( * * TclStringMatchObj -- * - * See if a particular string matches a particular pattern. Allows case - * insensitivity. This is the generic multi-type handler for the various - * matching algorithms. + * See if a particular string matches a particular pattern. + * Allows case insensitivity. This is the generic multi-type handler + * for the various matching algorithms. * * Results: * The return value is 1 if string matches pattern, and 0 otherwise. The @@ -2583,13 +2321,11 @@ TclByteArrayMatch( int TclStringMatchObj( - Tcl_Obj *strObj, /* string object. */ - Tcl_Obj *ptnObj, /* pattern object. */ - int flags) /* Only TCL_MATCH_NOCASE should be passed, or - * 0. */ + Tcl_Obj *strObj, /* string object. */ + Tcl_Obj *ptnObj, /* pattern object. */ + int flags) /* Only TCL_MATCH_NOCASE should be passed or 0. */ { - int match; - Tcl_Size length, plen; + int match, length, plen; /* * Promote based on the type of incoming object. @@ -2598,14 +2334,13 @@ TclStringMatchObj( trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj)); */ - if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) { + if (strObj->typePtr == &tclStringType) { Tcl_UniChar *udata, *uptn; - udata = TclGetUnicodeFromObj(strObj, &length); - uptn = TclGetUnicodeFromObj(ptnObj, &plen); + udata = Tcl_GetUnicodeFromObj(strObj, &length); + uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); - } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj) - && !flags) { + } else if (TclIsPureByteArray(strObj) && !flags) { unsigned char *data, *ptn; data = Tcl_GetByteArrayFromObj(strObj, &length); @@ -2667,13 +2402,15 @@ Tcl_DStringInit( char * Tcl_DStringAppend( Tcl_DString *dsPtr, /* Structure describing dynamic string. */ - const char *bytes, /* String to append. If length is - * < 0 then this must be null-terminated. */ - Tcl_Size length) /* Number of bytes from "bytes" to append. If + CONST char *bytes, /* String to append. If length is -1 then this + * must be null-terminated. */ + int length) /* Number of bytes from "bytes" to append. If * < 0, then append all of bytes, up to null * at end. */ { - Tcl_Size newSize; + int newSize; + char *dst; + CONST char *end; if (length < 0) { length = strlen(bytes); @@ -2689,24 +2426,13 @@ Tcl_DStringAppend( if (newSize >= dsPtr->spaceAvl) { dsPtr->spaceAvl = newSize * 2; if (dsPtr->string == dsPtr->staticSpace) { - char *newString = (char *)ckalloc(dsPtr->spaceAvl); + char *newString = ckalloc((unsigned) dsPtr->spaceAvl); - memcpy(newString, dsPtr->string, dsPtr->length); + memcpy(newString, dsPtr->string, (size_t) dsPtr->length); dsPtr->string = newString; } else { - Tcl_Size offset = -1; - - /* See [16896d49fd] */ - if (bytes >= dsPtr->string - && bytes <= dsPtr->string + dsPtr->length) { - /* Source string is within this DString. Note offset */ - offset = bytes - dsPtr->string; - } - dsPtr->string = - (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); - if (offset >= 0) { - bytes = dsPtr->string + offset; - } + dsPtr->string = ckrealloc((void *) dsPtr->string, + (size_t) dsPtr->spaceAvl); } } @@ -2714,46 +2440,18 @@ Tcl_DStringAppend( * Copy the new string into the buffer at the end of the old one. */ - memcpy(dsPtr->string + dsPtr->length, bytes, length); + for (dst = dsPtr->string + dsPtr->length, end = bytes+length; + bytes < end; bytes++, dst++) { + *dst = *bytes; + } + *dst = '\0'; dsPtr->length += length; - dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; } /* *---------------------------------------------------------------------- * - * TclDStringAppendObj, TclDStringAppendDString -- - * - * Simple wrappers round Tcl_DStringAppend that make it easier to append - * from particular sources of strings. - * - *---------------------------------------------------------------------- - */ - -char * -TclDStringAppendObj( - Tcl_DString *dsPtr, - Tcl_Obj *objPtr) -{ - Tcl_Size length; - const char *bytes = TclGetStringFromObj(objPtr, &length); - - return Tcl_DStringAppend(dsPtr, bytes, length); -} - -char * -TclDStringAppendDString( - Tcl_DString *dsPtr, - Tcl_DString *toAppendPtr) -{ - return Tcl_DStringAppend(dsPtr, Tcl_DStringValue(toAppendPtr), - Tcl_DStringLength(toAppendPtr)); -} - -/* - *---------------------------------------------------------------------- - * * Tcl_DStringAppendElement -- * * Append a list element to the current value of a dynamic string. @@ -2772,43 +2470,14 @@ TclDStringAppendDString( char * Tcl_DStringAppendElement( Tcl_DString *dsPtr, /* Structure describing dynamic string. */ - const char *element) /* String to append. Must be + CONST char *element) /* String to append. Must be * null-terminated. */ { char *dst = dsPtr->string + dsPtr->length; int needSpace = TclNeedSpace(dsPtr->string, dst); - char flags = 0; - int quoteHash = 1; - Tcl_Size newSize; - - if (needSpace) { - /* - * If we need a space to separate the new element from something - * already ending the string, we're not appending the first element - * of any list, so we need not quote any leading hash character. - */ - quoteHash = 0; - } else { - /* - * We don't need a space, maybe because there's some already there. - * Checking whether we might be appending a first element is a bit - * more involved. - * - * Backtrack over all whitespace. - */ - while ((--dst >= dsPtr->string) && TclIsSpaceProcM(*dst)) { - } - - /* Call again without whitespace to confound things. */ - quoteHash = !TclNeedSpace(dsPtr->string, dst+1); - } - if (!quoteHash) { - flags |= TCL_DONT_QUOTE_HASH; - } - newSize = dsPtr->length + needSpace + TclScanElement(element, TCL_INDEX_NONE, &flags); - if (!quoteHash) { - flags |= TCL_DONT_QUOTE_HASH; - } + int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; + int newSize = dsPtr->length + needSpace + + TclScanElement(element, -1, &flags); /* * Allocate a larger buffer for the string if the current one isn't large @@ -2821,27 +2490,16 @@ Tcl_DStringAppendElement( if (newSize >= dsPtr->spaceAvl) { dsPtr->spaceAvl = newSize * 2; if (dsPtr->string == dsPtr->staticSpace) { - char *newString = (char *)ckalloc(dsPtr->spaceAvl); + char *newString = ckalloc((unsigned) dsPtr->spaceAvl); - memcpy(newString, dsPtr->string, dsPtr->length); + memcpy(newString, dsPtr->string, (size_t) dsPtr->length); dsPtr->string = newString; } else { - int offset = -1; - - /* See [16896d49fd] */ - if (element >= dsPtr->string - && element <= dsPtr->string + dsPtr->length) { - /* Source string is within this DString. Note offset */ - offset = element - dsPtr->string; - } - dsPtr->string = - (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); - if (offset >= 0) { - element = dsPtr->string + offset; - } + dsPtr->string = (char *) ckrealloc((void *) dsPtr->string, + (size_t) dsPtr->spaceAvl); } + dst = dsPtr->string + dsPtr->length; } - dst = dsPtr->string + dsPtr->length; /* * Convert the new string to a list element and copy it into the buffer at @@ -2852,9 +2510,16 @@ Tcl_DStringAppendElement( *dst = ' '; dst++; dsPtr->length++; - } - dsPtr->length += TclConvertElement(element, TCL_INDEX_NONE, dst, flags); + /* + * If we need a space to separate this element from preceding stuff, + * then this element will not lead a list, and need not have it's + * leading '#' quoted. + */ + + flags |= TCL_DONT_QUOTE_HASH; + } + dsPtr->length += TclConvertElement(element, -1, dst, flags); dsPtr->string[dsPtr->length] = '\0'; return dsPtr->string; } @@ -2881,9 +2546,9 @@ Tcl_DStringAppendElement( void Tcl_DStringSetLength( Tcl_DString *dsPtr, /* Structure describing dynamic string. */ - Tcl_Size length) /* New length for dynamic string. */ + int length) /* New length for dynamic string. */ { - Tcl_Size newsize; + int newsize; if (length < 0) { length = 0; @@ -2908,12 +2573,13 @@ Tcl_DStringSetLength( dsPtr->spaceAvl = length + 1; } if (dsPtr->string == dsPtr->staticSpace) { - char *newString = (char *)ckalloc(dsPtr->spaceAvl); + char *newString = ckalloc((unsigned) dsPtr->spaceAvl); - memcpy(newString, dsPtr->string, dsPtr->length); + memcpy(newString, dsPtr->string, (size_t) dsPtr->length); dsPtr->string = newString; } else { - dsPtr->string = (char *)ckrealloc(dsPtr->string, dsPtr->spaceAvl); + dsPtr->string = (char *) ckrealloc((void *) dsPtr->string, + (size_t) dsPtr->spaceAvl); } } dsPtr->length = length; @@ -2976,7 +2642,22 @@ Tcl_DStringResult( Tcl_DString *dsPtr) /* Dynamic string that is to become the * result of interp. */ { - Tcl_SetObjResult(interp, Tcl_DStringToObj(dsPtr)); + Tcl_ResetResult(interp); + + if (dsPtr->string != dsPtr->staticSpace) { + interp->result = dsPtr->string; + interp->freeProc = TCL_DYNAMIC; + } else if (dsPtr->length < TCL_RESULT_SIZE) { + interp->result = ((Interp *) interp)->resultSpace; + strcpy(interp->result, dsPtr->string); + } else { + Tcl_SetResult(interp, dsPtr->string, TCL_VOLATILE); + } + + dsPtr->string = dsPtr->staticSpace; + dsPtr->length = 0; + dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; + dsPtr->staticSpace[0] = '\0'; } /* @@ -3005,14 +2686,6 @@ Tcl_DStringGetResult( Tcl_DString *dsPtr) /* Dynamic string that is to become the result * of interp. */ { -#if defined(TCL_NO_DEPRECATED) || TCL_MAJOR_VERSION > 8 - Tcl_Obj *obj = Tcl_GetObjResult(interp); - const char *bytes = TclGetString(obj); - - Tcl_DStringFree(dsPtr); - Tcl_DStringAppend(dsPtr, bytes, obj->length); - Tcl_ResetResult(interp); -#else Interp *iPtr = (Interp *) interp; if (dsPtr->string != dsPtr->staticSpace) { @@ -3020,39 +2693,6 @@ Tcl_DStringGetResult( } /* - * Do more efficient transfer when we know the result is a Tcl_Obj. When - * there's no string result, we only have to deal with two cases: - * - * 1. When the string rep is the empty string, when we don't copy but - * instead use the staticSpace in the DString to hold an empty string. - - * 2. When the string rep is not there or there's a real string rep, when - * we use Tcl_GetString to fetch (or generate) the string rep - which - * we know to have been allocated with ckalloc() - and use it to - * populate the DString space. Then, we free the internal rep. and set - * the object's string representation back to the canonical empty - * string. - */ - - if (!iPtr->result[0] && iPtr->objResultPtr - && !Tcl_IsShared(iPtr->objResultPtr)) { - if (iPtr->objResultPtr->bytes == &tclEmptyString) { - dsPtr->string = dsPtr->staticSpace; - dsPtr->string[0] = 0; - dsPtr->length = 0; - dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; - } else { - dsPtr->string = TclGetString(iPtr->objResultPtr); - dsPtr->length = iPtr->objResultPtr->length; - dsPtr->spaceAvl = dsPtr->length + 1; - TclFreeInternalRep(iPtr->objResultPtr); - iPtr->objResultPtr->bytes = &tclEmptyString; - iPtr->objResultPtr->length = 0; - } - return; - } - - /* * If the string result is empty, move the object result to the string * result, then reset the object result. */ @@ -3065,9 +2705,9 @@ Tcl_DStringGetResult( dsPtr->string = iPtr->result; dsPtr->spaceAvl = dsPtr->length+1; } else { - dsPtr->string = (char *)ckalloc(dsPtr->length+1); - memcpy(dsPtr->string, iPtr->result, dsPtr->length+1); - iPtr->freeProc(iPtr->result); + dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length+1)); + memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1); + (*iPtr->freeProc)(iPtr->result); } dsPtr->spaceAvl = dsPtr->length+1; iPtr->freeProc = NULL; @@ -3076,75 +2716,14 @@ Tcl_DStringGetResult( dsPtr->string = dsPtr->staticSpace; dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; } else { - dsPtr->string = (char *)ckalloc(dsPtr->length+1); + dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length + 1)); dsPtr->spaceAvl = dsPtr->length + 1; } - memcpy(dsPtr->string, iPtr->result, dsPtr->length+1); + memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1); } iPtr->result = iPtr->resultSpace; iPtr->resultSpace[0] = 0; -#endif /* !TCL_NO_DEPRECATED */ -} - -/* - *---------------------------------------------------------------------- - * - * Tcl_DStringToObj -- - * - * This function moves a dynamic string's contents to a new Tcl_Obj. Be - * aware that this function does *not* check that the encoding of the - * contents of the dynamic string is correct; this is the caller's - * responsibility to enforce. - * - * Results: - * The newly-allocated untyped (i.e., typePtr==NULL) Tcl_Obj with a - * reference count of zero. - * - * Side effects: - * The string is "moved" to the object. dsPtr is reinitialized to an - * empty string; it does not need to be Tcl_DStringFree'd after this if - * not used further. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj * -Tcl_DStringToObj( - Tcl_DString *dsPtr) -{ - Tcl_Obj *result; - - if (dsPtr->string == dsPtr->staticSpace) { - if (dsPtr->length == 0) { - TclNewObj(result); - } else { - /* - * Static buffer, so must copy. - */ - - TclNewStringObj(result, dsPtr->string, dsPtr->length); - } - } else { - /* - * Dynamic buffer, so transfer ownership and reset. - */ - - TclNewObj(result); - result->bytes = dsPtr->string; - result->length = dsPtr->length; - } - - /* - * Re-establish the DString as empty with no buffer allocated. - */ - - dsPtr->string = dsPtr->staticSpace; - dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; - dsPtr->length = 0; - dsPtr->staticSpace[0] = '\0'; - - return result; } /* @@ -3170,9 +2749,9 @@ Tcl_DStringStartSublist( Tcl_DString *dsPtr) /* Dynamic string. */ { if (TclNeedSpace(dsPtr->string, dsPtr->string + dsPtr->length)) { - TclDStringAppendLiteral(dsPtr, " {"); + Tcl_DStringAppend(dsPtr, " {", -1); } else { - TclDStringAppendLiteral(dsPtr, "{"); + Tcl_DStringAppend(dsPtr, "{", -1); } } @@ -3198,7 +2777,7 @@ void Tcl_DStringEndSublist( Tcl_DString *dsPtr) /* Dynamic string. */ { - TclDStringAppendLiteral(dsPtr, "}"); + Tcl_DStringAppend(dsPtr, "}", -1); } /* @@ -3223,7 +2802,9 @@ Tcl_DStringEndSublist( void Tcl_PrintDouble( - TCL_UNUSED(Tcl_Interp *), + Tcl_Interp *interp, /* Interpreter whose tcl_precision variable + * used to be used to control printing. It's + * ignored now. */ double value, /* Value to print as string. */ char *dst) /* Where to store converted value; must have * at least TCL_DOUBLE_SPACE characters. */ @@ -3231,90 +2812,91 @@ Tcl_PrintDouble( char *p, c; int exponent; int signum; - char *digits; - char *end; - int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int)); + char* digits; + char* end; + + int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int)sizeof(int)); /* - * Handle NaN. - */ + * Handle NaN. + */ - if (isnan(value)) { - TclFormatNaN(value, dst); - return; - } + if (TclIsNaN(value)) { + TclFormatNaN(value, dst); + return; + } - /* - * Handle infinities. - */ + /* + * Handle infinities. + */ - if (isinf(value)) { + if (TclIsInfinite(value)) { /* * Remember to copy the terminating NUL too. */ - if (value < 0) { + if (value < 0) { memcpy(dst, "-Inf", 5); - } else { + } else { memcpy(dst, "Inf", 4); + } + return; } - return; - } - /* - * Ordinary (normal and denormal) values. - */ + /* + * Ordinary (normal and denormal) values. + */ if (*precisionPtr == 0) { - digits = TclDoubleDigits(value, TCL_INDEX_NONE, TCL_DD_SHORTEST, - &exponent, &signum, &end); + digits = TclDoubleDigits(value, -1, TCL_DD_SHORTEST, + &exponent, &signum, &end); } else { /* * There are at least two possible interpretations for tcl_precision. * * The first is, "choose the decimal representation having - * $tcl_precision digits of significance that is nearest to the given - * number, breaking ties by rounding to even, and then trimming - * trailing zeros." This gives the greatest possible precision in the - * decimal string, but offers the anomaly that [expr 0.1] will be - * "0.10000000000000001". + * $tcl_precision digits of significance that is nearest to the + * given number, breaking ties by rounding to even, and then + * trimming trailing zeros." This gives the greatest possible + * precision in the decimal string, but offers the anomaly that + * [expr 0.1] will be "0.10000000000000001". * - * The second is "choose the decimal representation having at most - * $tcl_precision digits of significance that is nearest to the given - * number. If no such representation converts exactly to the given - * number, choose the one that is closest, breaking ties by rounding - * to even. If more than one such representation converts exactly to - * the given number, choose the shortest, breaking ties in favour of - * the nearest, breaking remaining ties in favour of the one ending in - * an even digit." + * The second is "choose the decimal representation having at + * most $tcl_precision digits of significance that is nearest + * to the given number. If no such representation converts + * exactly to the given number, choose the one that is closest, + * breaking ties by rounding to even. If more than one such + * representation converts exactly to the given number, choose + * the shortest, breaking ties in favour of the nearest, breaking + * remaining ties in favour of the one ending in an even digit." * * Tcl 8.4 implements the first of these, which gives rise to * anomalies in formatting: * - * % expr 0.1 - * 0.10000000000000001 - * % expr 0.01 - * 0.01 - * % expr 1e-7 - * 9.9999999999999995e-08 + * % expr 0.1 + * 0.10000000000000001 + * % expr 0.01 + * 0.01 + * % expr 1e-7 + * 9.9999999999999995e-08 * * For human readability, it appears better to choose the second rule, - * and let [expr 0.1] return 0.1. But for 8.4 compatibility, we prefer - * the first (the recommended zero value for tcl_precision avoids the - * problem entirely). + * and let [expr 0.1] return 0.1. But for 8.4 compatibility, we + * prefer the first (the recommended zero value for tcl_precision + * avoids the problem entirely). * - * Uncomment TCL_DD_SHORTEST in the next call to prefer the method - * that allows floating point values to be shortened if it can be done - * without loss of precision. + * Uncomment TCL_DD_SHORTEN_FLAG in the next call to prefer the + * method that allows floating point values to be shortened if + * it can be done without loss of precision. */ digits = TclDoubleDigits(value, *precisionPtr, - TCL_DD_E_FORMAT /* | TCL_DD_SHORTEST */, - &exponent, &signum, &end); - } - if (signum) { - *dst++ = '-'; + TCL_DD_E_FORMAT /* | TCL_DD_SHORTEN_FLAG */, + &exponent, &signum, &end); } + if (signum) { + *dst++ = '-'; + } p = digits; if (exponent < -4 || exponent > 16) { /* @@ -3330,16 +2912,14 @@ Tcl_PrintDouble( c = *++p; } } - /* - * Tcl 8.4 appears to format with at least a two-digit exponent; + * Tcl 8.4 appears to format with at least a two-digit exponent; \ * preserve that behaviour when tcl_precision != 0 */ - if (*precisionPtr == 0) { - snprintf(dst, TCL_DOUBLE_SPACE, "e%+d", exponent); + sprintf(dst, "e%+d", exponent); } else { - snprintf(dst, TCL_DOUBLE_SPACE, "e%+03d", exponent); + sprintf(dst, "e%+03d", exponent); } } else { /* @@ -3395,18 +2975,18 @@ Tcl_PrintDouble( *---------------------------------------------------------------------- */ -#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 + /* ARGSUSED */ char * TclPrecTraceProc( - void *clientData, + ClientData clientData, /* Not used. */ Tcl_Interp *interp, /* Interpreter containing variable. */ - const char *name1, /* Name of variable. */ - const char *name2, /* Second part of variable name. */ + CONST char *name1, /* Name of variable. */ + CONST char *name2, /* Second part of variable name. */ int flags) /* Information about what happened. */ { - Tcl_Obj *value; - Tcl_WideInt prec; - int *precisionPtr = (int *)Tcl_GetThreadData(&precisionKey, sizeof(int)); + Tcl_Obj* value; + int prec; + int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int)); /* * If the variable is unset, then recreate the trace. @@ -3429,7 +3009,7 @@ TclPrecTraceProc( if (flags & TCL_TRACE_READS) { - Tcl_SetVar2Ex(interp, name1, name2, Tcl_NewWideIntObj(*precisionPtr), + Tcl_SetVar2Ex(interp, name1, name2, Tcl_NewIntObj(*precisionPtr), flags & TCL_GLOBAL_ONLY); return NULL; } @@ -3441,18 +3021,17 @@ TclPrecTraceProc( */ if (Tcl_IsSafe(interp)) { - return (char *) "can't modify precision from a safe interpreter"; + return "can't modify precision from a safe interpreter"; } value = Tcl_GetVar2Ex(interp, name1, name2, flags & TCL_GLOBAL_ONLY); if (value == NULL - || Tcl_GetWideIntFromObj(NULL, value, &prec) != TCL_OK + || Tcl_GetIntFromObj((Tcl_Interp*) NULL, value, &prec) != TCL_OK || prec < 0 || prec > TCL_MAX_PREC) { - return (char *) "improper value for precision"; + return "improper value for precision"; } - *precisionPtr = (int)prec; + *precisionPtr = prec; return NULL; } -#endif /* !TCL_NO_DEPRECATED)*/ /* *---------------------------------------------------------------------- @@ -3473,76 +3052,69 @@ TclPrecTraceProc( int TclNeedSpace( - const char *start, /* First character in string. */ - const char *end) /* End of string (place where space will be + CONST char *start, /* First character in string. */ + CONST char *end) /* End of string (place where space will be * added, if appropriate). */ { /* * A space is needed unless either: * (a) we're at the start of the string, or - * - * (NOTE: This check is now absorbed into the loop below.) - * + */ if (end == start) { return 0; } - * - */ - /* * (b) we're at the start of a nested list-element, quoted with an open * curly brace; we can be nested arbitrarily deep, so long as the * first curly brace starts an element, so backtrack over open curly * braces that are trailing characters of the string; and - * - * (NOTE: Every character our parser is looking for is a proper - * single-byte encoding of an ASCII value. It does not accept - * overlong encodings. Given that, there's no benefit using - * Tcl_UtfPrev. If it would find what we seek, so would byte-by-byte - * backward scan. Save routine call overhead and risk of wrong - * results should the behavior of Tcl_UtfPrev change in unexpected ways. - * Reconsider this if we ever start treating non-ASCII Unicode - * characters as meaningful list syntax, expanded Unicode spaces as - * element separators, for example.) - * + */ end = Tcl_UtfPrev(end, start); while (*end == '{') { - if (end == start) { - return 0; - } - end = Tcl_UtfPrev(end, start); - } - - * - */ - - while ((--end >= start) && (*end == '{')) { - } - if (end < start) { - return 0; + if (end == start) { + return 0; + } + end = Tcl_UtfPrev(end, start); } /* * (c) the trailing character of the string is already a list-element - * separator, Use the same testing routine as TclFindElement to - * enforce consistency. + * separator (according to TclFindElement); that is, one of these + * characters: + * \u0009 \t TAB + * \u000A \n NEWLINE + * \u000B \v VERTICAL TAB + * \u000C \f FORM FEED + * \u000D \r CARRIAGE RETURN + * \u0020 SPACE + * with the condition that the penultimate character is not a + * backslash. */ - if (TclIsSpaceProcM(*end)) { - int result = 0; - + if (*end > 0x20) { /* - * Trailing whitespace might be part of a backslash escape - * sequence. Handle that possibility. + * Performance tweak. All ASCII spaces are <= 0x20. So get a quick + * answer for most characters before comparing against all spaces in + * the switch below. + * + * NOTE: Remove this if other Unicode spaces ever get accepted as + * list-element separators. */ - - while ((--end >= start) && (*end == '\\')) { - result = !result; + return 1; + } + switch (*end) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\v': + case '\f': + if ((end == start) || (end[-1] != '\\')) { + return 0; } - return result; } return 1; } @@ -3555,47 +3127,72 @@ TclNeedSpace( * This procedure formats an integer into a sequence of decimal digit * characters in a buffer. If the integer is negative, a minus sign is * inserted at the start of the buffer. A null character is inserted at - * the end of the formatted characters. It is the caller's responsibility - * to ensure that enough storage is available. This procedure has the - * effect of sprintf(buffer, "%ld", n) but is faster as proven in - * benchmarks. This is key to UpdateStringOfInt, which is a common path - * for a lot of code (e.g. int-indexed arrays). + * the end of the formatted characters. It is the caller's + * responsibility to ensure that enough storage is available. This + * procedure has the effect of sprintf(buffer, "%ld", n) but is faster + * as proven in benchmarks. This is key to UpdateStringOfInt, which + * is a common path for a lot of code (e.g. int-indexed arrays). * * Results: * An integer representing the number of characters formatted, not * including the terminating \0. * * Side effects: - * The formatted characters are written into the storage pointer to by - * the "buffer" argument. + * The formatted characters are written into the storage pointer to + * by the "buffer" argument. * *---------------------------------------------------------------------- */ -Tcl_Size -TclFormatInt( - char *buffer, /* Points to the storage into which the +int +TclFormatInt(buffer, n) + char *buffer; /* Points to the storage into which the * formatted characters are written. */ - Tcl_WideInt n) /* The integer to format. */ + long n; /* The integer to format. */ { - Tcl_WideUInt intVal; - int i = 0, numFormatted, j; - static const char digits[] = "0123456789"; + long intVal; + int i; + int numFormatted, j; + char *digits = "0123456789"; + + /* + * Check first whether "n" is zero. + */ + + if (n == 0) { + buffer[0] = '0'; + buffer[1] = 0; + return 1; + } + + /* + * Check whether "n" is the maximum negative value. This is + * -2^(m-1) for an m-bit word, and has no positive equivalent; + * negating it produces the same value. + */ + + intVal = -n; /* [Bug 3390638] Workaround for*/ + if (n == -n || intVal == n) { /* broken compiler optimizers. */ + return sprintf(buffer, "%ld", n); + } /* * Generate the characters of the result backwards in the buffer. */ - intVal = (n < 0 ? -(Tcl_WideUInt)n : (Tcl_WideUInt)n); + intVal = (n < 0? -n : n); + i = 0; + buffer[0] = '\0'; do { - buffer[i++] = digits[intVal % 10]; - intVal = intVal / 10; + i++; + buffer[i] = digits[intVal % 10]; + intVal = intVal/10; } while (intVal > 0); if (n < 0) { - buffer[i++] = '-'; + i++; + buffer[i] = '-'; } - buffer[i] = '\0'; - numFormatted = i--; + numFormatted = i; /* * Now reverse the characters. @@ -3603,7 +3200,6 @@ TclFormatInt( for (j = 0; j < i; j++, i--) { char tmp = buffer[i]; - buffer[i] = buffer[j]; buffer[j] = tmp; } @@ -3613,391 +3209,116 @@ TclFormatInt( /* *---------------------------------------------------------------------- * - * GetWideForIndex -- + * TclGetIntForIndex -- * - * This function produces a wide integer value corresponding to the - * index value held in *objPtr. The parsing supports all values - * recognized as any size of integer, and the syntaxes end[-+]$integer - * and $integer[-+]$integer. The argument endValue is used to give - * the meaning of the literal index value "end". Index arithmetic - * on arguments outside the wide integer range are only accepted - * when interp is a working interpreter, not NULL. + * This function returns an integer corresponding to the list index held + * in a Tcl object. The Tcl object's value is expected to be in the + * format integer([+-]integer)? or the format end([+-]integer)?. * * Results: - * When parsing of *objPtr successfully recognizes an index value, - * TCL_OK is returned, and the wide integer value corresponding to - * the recognized index value is written to *widePtr. When parsing - * fails, TCL_ERROR is returned and error information is written to - * interp, if non-NULL. + * The return value is normally TCL_OK, which means that the index was + * successfully stored into the location referenced by "indexPtr". If the + * Tcl object referenced by "objPtr" has the value "end", the value + * stored is "endValue". If "objPtr"s values is not of one of the + * expected formats, TCL_ERROR is returned and, if "interp" is non-NULL, + * an error message is left in the interpreter's result object. * * Side effects: - * The type of *objPtr may change. - * - *---------------------------------------------------------------------- - */ - -static int -GetWideForIndex( - Tcl_Interp *interp, /* Interpreter to use for error reporting. If - * NULL, then no error message is left after - * errors. */ - Tcl_Obj *objPtr, /* Points to the value to be parsed */ - Tcl_WideInt endValue, /* The value to be stored at *widePtr if - * objPtr holds "end". - * NOTE: this value may be TCL_INDEX_NONE. */ - Tcl_WideInt *widePtr) /* Location filled in with a wide integer - * representing an index. */ -{ - int numType; - void *cd; - int code = Tcl_GetNumberFromObj(NULL, objPtr, &cd, &numType); - - if (code == TCL_OK) { - if (numType == TCL_NUMBER_INT) { - /* objPtr holds an integer in the signed wide range */ - *widePtr = *(Tcl_WideInt *)cd; - if ((*widePtr < 0)) { - *widePtr = (endValue == -1) ? WIDE_MIN : -1; - } - return TCL_OK; - } - if (numType == TCL_NUMBER_BIG) { - /* objPtr holds an integer outside the signed wide range */ - /* Truncate to the signed wide range. */ - *widePtr = ((mp_isneg((mp_int *)cd)) ? ((endValue == -1) ? WIDE_MIN : -1) : WIDE_MAX); - return TCL_OK; - } - } - - /* objPtr does not hold a number, check the end+/- format... */ - return GetEndOffsetFromObj(interp, objPtr, endValue, widePtr); -} - -/* - *---------------------------------------------------------------------- - * - * Tcl_GetIntForIndex -- - * - * Provides an integer corresponding to the list index held in a Tcl - * object. The string value 'objPtr' is expected have the format - * integer([+-]integer)? or end([+-]integer)?. - * - * If the computed index lies within the valid range of Tcl indices - * (0..TCL_SIZE_MAX) it is returned. Higher values are returned as - * TCL_SIZE_MAX. Negative values are returned as TCL_INDEX_NONE (-1). - * - * - * Results: - * TCL_OK - * - * The index is stored at the address given by by 'indexPtr'. If - * 'objPtr' has the value "end", the value stored is 'endValue'. - * - * TCL_ERROR - * - * The value of 'objPtr' does not have one of the expected formats. If - * 'interp' is non-NULL, an error message is left in the interpreter's - * result object. - * - * Effect - * - * The object referenced by 'objPtr' is converted, as needed, to an - * integer, wide integer, or end-based-index object. + * The object referenced by "objPtr" might be converted to an integer, + * wide integer, or end-based-index object. * *---------------------------------------------------------------------- */ int -Tcl_GetIntForIndex( +TclGetIntForIndex( Tcl_Interp *interp, /* Interpreter to use for error reporting. If * NULL, then no error message is left after * errors. */ Tcl_Obj *objPtr, /* Points to an object containing either "end" * or an integer. */ - Tcl_Size endValue, /* The value corresponding to the "end" index */ - Tcl_Size *indexPtr) /* Location filled in with an integer - * representing an index. May be NULL.*/ + int endValue, /* The value to be stored at "indexPtr" if + * "objPtr" holds "end". */ + int *indexPtr) /* Location filled in with an integer + * representing an index. */ { - Tcl_WideInt wide; + int length; + char *opPtr, *bytes; - if (GetWideForIndex(interp, objPtr, endValue, &wide) == TCL_ERROR) { - return TCL_ERROR; - } - if (indexPtr != NULL) { - if ((wide < 0) && (endValue >= 0)) { - *indexPtr = TCL_INDEX_NONE; - } else if (wide > INT_MAX) { - *indexPtr = INT_MAX; - } else if (wide < INT_MIN) { - *indexPtr = INT_MIN; - } else { - *indexPtr = (int) wide; - } + if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) { + return TCL_OK; } - return TCL_OK; -} - -/* - *---------------------------------------------------------------------- - * - * GetEndOffsetFromObj -- - * - * Look for a string of the form "end[+-]offset" or "offset[+-]offset" and - * convert it to an internal representation. - * - * The internal representation (wideValue) uses the following encoding: - * - * WIDE_MIN: Index value TCL_INDEX_NONE (or -1) - * WIDE_MIN+1: Index value n, for any n < -1 (usually same effect as -1) - * -$n: Index "end-[expr {$n-1}]" - * -2: Index "end-1" - * -1: Index "end" - * 0: Index "0" - * WIDE_MAX-1: Index "end+n", for any n > 1. Distinguish from end+1 for - * commands like lset. - * WIDE_MAX: Index "end+1" - * - * Results: - * Tcl return code. - * - * Side effects: - * May store a Tcl_ObjType. - * - *---------------------------------------------------------------------- - */ - -static int -GetEndOffsetFromObj( - Tcl_Interp *interp, - Tcl_Obj *objPtr, /* Pointer to the object to parse */ - Tcl_WideInt endValue, /* The value to be stored at "widePtr" if - * "objPtr" holds "end". */ - Tcl_WideInt *widePtr) /* Location filled in with an integer - * representing an index. */ -{ - Tcl_ObjInternalRep *irPtr; - Tcl_WideInt offset = -1; /* Offset in the "end-offset" expression - 1 */ - void *cd; - while ((irPtr = TclFetchInternalRep(objPtr, &endOffsetType)) == NULL) { - Tcl_ObjInternalRep ir; - Tcl_Size length; - const char *bytes = TclGetStringFromObj(objPtr, &length); - - if (*bytes != 'e') { - int numType; - const char *opPtr; - int t1 = 0, t2 = 0; - - /* Value doesn't start with "e" */ - - /* If we reach here, the string rep of objPtr exists. */ - - /* - * The valid index syntax does not include any value that is - * a list of more than one element. This is necessary so that - * lists of index values can be reliably distinguished from any - * single index value. - */ - - /* - * Quick scan to see if multi-value list is even possible. - * This relies on TclGetString() returning a NUL-terminated string. - */ - if ((TclMaxListLength(bytes, TCL_INDEX_NONE, NULL) > 1) - - /* If it's possible, do the full list parse. */ - && (TCL_OK == TclListObjLength(NULL, objPtr, &length)) - && (length > 1)) { - goto parseError; - } - - /* Passed the list screen, so parse for index arithmetic expression */ - if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, NULL, TCL_INDEX_NONE, &opPtr, - TCL_PARSE_INTEGER_ONLY)) { - Tcl_WideInt w1=0, w2=0; - - /* value starts with valid integer... */ - - if ((*opPtr == '-') || (*opPtr == '+')) { - /* ... value continues with [-+] ... */ + if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) { + /* + * If the object is already an offset from the end of the list, or can + * be converted to one, use it. + */ - /* Save first integer as wide if possible */ - Tcl_GetNumberFromObj(NULL, objPtr, &cd, &t1); - if (t1 == TCL_NUMBER_INT) { - w1 = (*(Tcl_WideInt *)cd); - } + *indexPtr = endValue + objPtr->internalRep.longValue; + return TCL_OK; + } - if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, opPtr + 1, - TCL_INDEX_NONE, NULL, TCL_PARSE_INTEGER_ONLY)) { - /* ... value concludes with second valid integer */ + bytes = TclGetStringFromObj(objPtr, &length); - /* Save second integer as wide if possible */ - Tcl_GetNumberFromObj(NULL, objPtr, &cd, &t2); - if (t2 == TCL_NUMBER_INT) { - w2 = (*(Tcl_WideInt *)cd); - } - } - } - /* Clear invalid internalreps left by TclParseNumber */ - TclFreeInternalRep(objPtr); - - if (t1 && t2) { - /* We have both integer values */ - if ((t1 == TCL_NUMBER_INT) && (t2 == TCL_NUMBER_INT)) { - /* Both are wide, do wide-integer math */ - if (*opPtr == '-') { - if (w2 == WIDE_MIN) { - goto extreme; - } - w2 = -w2; - } + /* + * Leading whitespace is acceptable in an index. + */ - if ((w1 ^ w2) < 0) { - /* Different signs, sum cannot overflow */ - offset = w1 + w2; - } else if (w1 >= 0) { - if (w1 < WIDE_MAX - w2) { - offset = w1 + w2; - } else { - offset = WIDE_MAX; - } - } else { - if (w1 > WIDE_MIN - w2) { - offset = w1 + w2; - } else { - offset = WIDE_MIN; - } - } - } else { - /* - * At least one is big, do bignum math. Little reason to - * value performance here. Re-use code. Parse has verified - * objPtr is an expression. Compute it. - */ + while (length && TclIsSpaceProc(*bytes)) { + bytes++; + length--; + } - Tcl_Obj *sum; + if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr, + TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) { + int code, first, second; + char savedOp = *opPtr; - extreme: - if (interp) { - Tcl_ExprObj(interp, objPtr, &sum); - } else { - Tcl_Interp *compute = Tcl_CreateInterp(); - Tcl_ExprObj(compute, objPtr, &sum); - Tcl_DeleteInterp(compute); - } - Tcl_GetNumberFromObj(NULL, sum, &cd, &numType); - - if (numType == TCL_NUMBER_INT) { - /* sum holds an integer in the signed wide range */ - offset = *(Tcl_WideInt *)cd; - } else { - /* sum holds an integer outside the signed wide range */ - /* Truncate to the signed wide range. */ - if (mp_isneg((mp_int *)cd)) { - offset = WIDE_MIN; - } else { - offset = WIDE_MAX; - } - } - Tcl_DecrRefCount(sum); - } - if (offset < 0) { - offset = (offset == -1) ? WIDE_MIN : WIDE_MIN+1; - } - goto parseOK; - } - } + if ((savedOp != '+') && (savedOp != '-')) { goto parseError; } - - if ((length < 3) || (length == 4) || (strncmp(bytes, "end", 3) != 0)) { - /* Doesn't start with "end" */ + if (TclIsSpaceProc(opPtr[1])) { goto parseError; } - if (length > 4) { - int t; - - /* Parse for the "end-..." or "end+..." formats */ - - if ((bytes[3] != '-') && (bytes[3] != '+')) { - /* No operator where we need one */ - goto parseError; - } - if (TclIsSpaceProc(bytes[4])) { - /* Space after + or - not permitted. */ - goto parseError; - } - - /* Parse the integer offset */ - if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, - bytes+4, length-4, NULL, TCL_PARSE_INTEGER_ONLY)) { - /* Not a recognized integer format */ - goto parseError; - } - - /* Got an integer offset; pull it from where parser left it. */ - Tcl_GetNumberFromObj(NULL, objPtr, &cd, &t); - - if (t == TCL_NUMBER_BIG) { - /* Truncate to the signed wide range. */ - if (mp_isneg((mp_int *)cd)) { - offset = (bytes[3] == '-') ? WIDE_MAX : WIDE_MIN; - } else { - offset = (bytes[3] == '-') ? WIDE_MIN : WIDE_MAX; - } - } else { - /* assert (t == TCL_NUMBER_INT); */ - offset = (*(Tcl_WideInt *)cd); - if (bytes[3] == '-') { - offset = (offset == WIDE_MIN) ? WIDE_MAX : -offset; - } - if (offset == 1) { - offset = WIDE_MAX; /* "end+1" */ - } else if (offset > 1) { - offset = WIDE_MAX - 1; /* "end+n", out of range */ - } else if (offset != WIDE_MIN) { - offset--; - } - } + *opPtr = '\0'; + code = Tcl_GetInt(interp, bytes, &first); + *opPtr = savedOp; + if (code == TCL_ERROR) { + goto parseError; } - - parseOK: - /* Success. Store the new internal rep. */ - ir.wideValue = offset; - Tcl_StoreInternalRep(objPtr, &endOffsetType, &ir); + if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) { + goto parseError; + } + if (savedOp == '+') { + *indexPtr = first + second; + } else { + *indexPtr = first - second; + } + return TCL_OK; } - offset = irPtr->wideValue; - - if (offset == WIDE_MAX) { - *widePtr = (endValue == -1) ? WIDE_MAX : endValue + 1; - } else if (offset == WIDE_MIN) { - *widePtr = -1; - } else if (endValue == -1) { - *widePtr = offset; - } else if (offset < 0) { - /* Different signs, sum cannot overflow */ - *widePtr = endValue + offset + 1; - } else if (offset < WIDE_MAX) { - *widePtr = offset; - } else { - *widePtr = WIDE_MAX; - } - return TCL_OK; + /* + * Report a parse error. + */ - /* Report a parse error. */ parseError: if (interp != NULL) { - char * bytes = TclGetString(objPtr); - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "bad index \"%s\": must be integer?[+-]integer? or" - " end?[+-]integer?", bytes)); - if (!strncmp(bytes, "end-", 4)) { - bytes += 4; - } - TclCheckBadOctal(interp, bytes); - Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", (void *)NULL); + char *bytes = Tcl_GetString(objPtr); + + /* + * The result might not be empty; this resets it which should be both + * a cheap operation, and of little problem because this is an + * error-generation path anyway. + */ + + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, "bad index \"", bytes, + "\": must be integer?[+-]integer? or end?[+-]integer?", NULL); + if (!strncmp(bytes, "end-", 4)) { + bytes += 4; + } + TclCheckBadOctal(interp, bytes); } return TCL_ERROR; @@ -4006,123 +3327,135 @@ GetEndOffsetFromObj( /* *---------------------------------------------------------------------- * - * TclIndexEncode -- - * - * Parse objPtr to determine if it is an index value. Two cases - * are possible. The value objPtr might be parsed as an absolute - * index value in the C signed int range. Note that this includes - * index values that are integers as presented and it includes index - * arithmetic expressions. The absolute index values that can be - * directly meaningful as an index into either a list or a string are - * those integer values >= TCL_INDEX_START (0) - * and < INT_MAX. - * The largest string supported in Tcl 8 has bytelength INT_MAX. - * This means the largest supported character length is also INT_MAX, - * and the index of the last character in a string of length INT_MAX - * is INT_MAX-1. - * - * Any absolute index value parsed outside that range is encoded - * using the before and after values passed in by the - * caller as the encoding to use for indices that are either - * less than or greater than the usable index range. TCL_INDEX_NONE - * is available as a good choice for most callers to use for - * after. Likewise, the value TCL_INDEX_NONE is good for - * most callers to use for before. Other values are possible - * when the caller knows it is helpful in producing its own behavior - * for indices before and after the indexed item. - * - * A token can also be parsed as an end-relative index expression. - * All end-relative expressions that indicate an index larger - * than end (end+2, end--5) point beyond the end of the indexed - * collection, and can be encoded as after. The end-relative - * expressions that indicate an index less than or equal to end - * are encoded relative to the value TCL_INDEX_END (-2). The - * index "end" is encoded as -2, down to the index "end-0x7FFFFFFE" - * which is encoded as INT_MIN. Since the largest index into a - * string possible in Tcl 8 is 0x7FFFFFFE, the interpretation of - * "end-0x7FFFFFFE" for that largest string would be 0. Thus, - * if the tokens "end-0x7FFFFFFF" or "end+-0x80000000" are parsed, - * they can be encoded with the before value. - * - * Returns: - * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. + * UpdateStringOfEndOffset -- + * + * Update the string rep of a Tcl object holding an "end-offset" + * expression. + * + * Results: + * None. * * Side effects: - * When TCL_OK is returned, the encoded index value is written - * to *indexPtr. + * Stores a valid string in the object's string rep. + * + * This function does NOT free any earlier string rep. If it is called on an + * object that already has a valid string rep, it will leak memory. * *---------------------------------------------------------------------- */ -int -TclIndexEncode( - Tcl_Interp *interp, /* For error reporting, may be NULL */ - Tcl_Obj *objPtr, /* Index value to parse */ - Tcl_Size before, /* Value to return for index before beginning */ - Tcl_Size after, /* Value to return for index after end */ - int *indexPtr) /* Where to write the encoded answer, not NULL */ +static void +UpdateStringOfEndOffset( + register Tcl_Obj* objPtr) { - Tcl_WideInt wide; - int idx; - - if (TCL_OK == GetWideForIndex(interp, objPtr, (unsigned)TCL_INDEX_END , &wide)) { - const Tcl_ObjInternalRep *irPtr = TclFetchInternalRep(objPtr, &endOffsetType); - if (irPtr && irPtr->wideValue >= 0) { - /* "int[+-]int" syntax, works the same here as "int" */ - irPtr = NULL; - } - /* - * We parsed an end+offset index value. - * wide holds the offset value in the range WIDE_MIN...WIDE_MAX. - */ - if (wide > (unsigned)(irPtr ? TCL_INDEX_END : INT_MAX)) { - /* - * All end+postive or end-negative expressions - * always indicate "after the end". - */ - idx = after; - } else if (wide <= (irPtr ? INT_MAX : TCL_INDEX_NONE)) { - /* These indices always indicate "before the beginning" */ - idx = before; - } else { - /* Encoded end-positive (or end+negative) are offset */ - idx = (int)wide; - } - } else { - return TCL_ERROR; + char buffer[TCL_INTEGER_SPACE + sizeof("end") + 1]; + register int len; + + strcpy(buffer, "end"); + len = sizeof("end") - 1; + if (objPtr->internalRep.longValue != 0) { + buffer[len++] = '-'; + len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue)); } - *indexPtr = idx; - return TCL_OK; + objPtr->bytes = ckalloc((unsigned) len+1); + memcpy(objPtr->bytes, buffer, (unsigned) len+1); + objPtr->length = len; } /* *---------------------------------------------------------------------- * - * TclIndexDecode -- + * SetEndOffsetFromAny -- * - * Decodes a value previously encoded by TclIndexEncode. The argument - * endValue indicates what value of "end" should be used in the - * decoding. + * Look for a string of the form "end[+-]offset" and convert it to an + * internal representation holding the offset. * * Results: - * The decoded index value. + * Returns TCL_OK if ok, TCL_ERROR if the string was badly formed. + * + * Side effects: + * If interp is not NULL, stores an error message in the interpreter + * result. * *---------------------------------------------------------------------- */ -Tcl_Size -TclIndexDecode( - int encoded, /* Value to decode */ - Tcl_Size endValue) /* Meaning of "end" to use, > TCL_INDEX_END */ +static int +SetEndOffsetFromAny( + Tcl_Interp *interp, /* Tcl interpreter or NULL */ + Tcl_Obj *objPtr) /* Pointer to the object to parse */ { - if (encoded > TCL_INDEX_END) { - return encoded; + int offset; /* Offset in the "end-offset" expression */ + register char* bytes; /* String rep of the object */ + int length; /* Length of the object's string rep */ + + /* + * If it's already the right type, we're fine. + */ + + if (objPtr->typePtr == &tclEndOffsetType) { + return TCL_OK; + } + + /* + * Check for a string rep of the right form. + */ + + bytes = TclGetStringFromObj(objPtr, &length); + if ((*bytes != 'e') || (strncmp(bytes, "end", + (size_t)((length > 3) ? 3 : length)) != 0)) { + if (interp != NULL) { + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, "bad index \"", bytes, + "\": must be end?[+-]integer?", NULL); + } + return TCL_ERROR; } - endValue += encoded - TCL_INDEX_END; - if (endValue >= 0) { - return endValue; + + /* + * Convert the string rep. + */ + + if (length <= 3) { + offset = 0; + } else if ((length > 4) && ((bytes[3] == '-') || (bytes[3] == '+'))) { + /* + * This is our limited string expression evaluator. Pass everything + * after "end-" to Tcl_GetInt, then reverse for offset. + */ + + if (TclIsSpaceProc(bytes[4])) { + return TCL_ERROR; + } + if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) { + return TCL_ERROR; + } + if (bytes[3] == '-') { + offset = -offset; + } + } else { + /* + * Conversion failed. Report the error. + */ + + if (interp != NULL) { + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, "bad index \"", bytes, + "\": must be end?[+-]integer?", NULL); + } + return TCL_ERROR; } - return TCL_INDEX_NONE; + + /* + * The conversion succeeded. Free the old internal rep and set the new + * one. + */ + + TclFreeIntRep(objPtr); + objPtr->internalRep.longValue = offset; + objPtr->typePtr = &tclEndOffsetType; + + return TCL_OK; } /* @@ -4147,16 +3480,16 @@ TclCheckBadOctal( Tcl_Interp *interp, /* Interpreter to use for error reporting. If * NULL, then no error message is left after * errors. */ - const char *value) /* String to check. */ + CONST char *value) /* String to check. */ { - const char *p = value; + register CONST char *p = value; /* * A frequent mistake is invalid octal values due to an unwanted leading * zero. Try to generate a meaningful error message. */ - while (TclIsSpaceProcM(*p)) { + while (TclIsSpaceProc(*p)) { p++; } if (*p == '+' || *p == '-') { @@ -4164,12 +3497,12 @@ TclCheckBadOctal( } if (*p == '0') { if ((p[1] == 'o') || p[1] == 'O') { - p += 2; + p+=2; } while (isdigit(UCHAR(*p))) { /* INTL: digit. */ p++; } - while (TclIsSpaceProcM(*p)) { + while (TclIsSpaceProc(*p)) { p++; } if (*p == '\0') { @@ -4183,8 +3516,8 @@ TclCheckBadOctal( * be added to an existing error message as extra info. */ - Tcl_AppendToObj(Tcl_GetObjResult(interp), - " (looks like invalid octal number)", TCL_INDEX_NONE); + Tcl_AppendResult(interp, " (looks like invalid octal number)", + NULL); } return 1; } @@ -4211,8 +3544,7 @@ ClearHash( for (hPtr = Tcl_FirstHashEntry(tablePtr, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { - Tcl_Obj *objPtr = (Tcl_Obj *)Tcl_GetHashValue(hPtr); - + Tcl_Obj *objPtr = (Tcl_Obj *) Tcl_GetHashValue(hPtr); Tcl_DecrRefCount(objPtr); Tcl_DeleteHashEntry(hPtr); } @@ -4240,12 +3572,12 @@ static Tcl_HashTable * GetThreadHash( Tcl_ThreadDataKey *keyPtr) { - Tcl_HashTable **tablePtrPtr = - (Tcl_HashTable **)Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *)); + Tcl_HashTable **tablePtrPtr = (Tcl_HashTable **) + Tcl_GetThreadData(keyPtr, (int) sizeof(Tcl_HashTable *)); if (NULL == *tablePtrPtr) { *tablePtrPtr = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable)); - Tcl_CreateThreadExitHandler(FreeThreadHash, *tablePtrPtr); + Tcl_CreateThreadExitHandler(FreeThreadHash, (ClientData)*tablePtrPtr); Tcl_InitHashTable(*tablePtrPtr, TCL_ONE_WORD_KEYS); } return *tablePtrPtr; @@ -4267,13 +3599,13 @@ GetThreadHash( static void FreeThreadHash( - void *clientData) + ClientData clientData) { - Tcl_HashTable *tablePtr = (Tcl_HashTable *)clientData; + Tcl_HashTable *tablePtr = (Tcl_HashTable *) clientData; ClearHash(tablePtr); Tcl_DeleteHashTable(tablePtr); - ckfree(tablePtr); + ckfree((char *) tablePtr); } /* @@ -4289,9 +3621,9 @@ FreeThreadHash( static void FreeProcessGlobalValue( - void *clientData) + ClientData clientData) { - ProcessGlobalValue *pgvPtr = (ProcessGlobalValue *)clientData; + ProcessGlobalValue *pgvPtr = (ProcessGlobalValue *) clientData; pgvPtr->epoch++; pgvPtr->numBytes = 0; @@ -4321,7 +3653,7 @@ TclSetProcessGlobalValue( Tcl_Obj *newValue, Tcl_Encoding encoding) { - const char *bytes; + CONST char *bytes; Tcl_HashTable *cacheMap; Tcl_HashEntry *hPtr; int dummy; @@ -4336,12 +3668,11 @@ TclSetProcessGlobalValue( if (NULL != pgvPtr->value) { ckfree(pgvPtr->value); } else { - Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr); + Tcl_CreateExitHandler(FreeProcessGlobalValue, (ClientData) pgvPtr); } - bytes = TclGetString(newValue); - pgvPtr->numBytes = newValue->length; - pgvPtr->value = (char *)ckalloc(pgvPtr->numBytes + 1); - memcpy(pgvPtr->value, bytes, pgvPtr->numBytes + 1); + bytes = Tcl_GetStringFromObj(newValue, &pgvPtr->numBytes); + pgvPtr->value = ckalloc((unsigned) pgvPtr->numBytes + 1); + memcpy(pgvPtr->value, bytes, (unsigned) pgvPtr->numBytes + 1); if (pgvPtr->encoding) { Tcl_FreeEncoding(pgvPtr->encoding); } @@ -4349,15 +3680,16 @@ TclSetProcessGlobalValue( /* * Fill the local thread copy directly with the Tcl_Obj value to avoid - * loss of the internalrep. Increment newValue refCount early to handle case + * loss of the intrep. Increment newValue refCount early to handle case * where we set a PGV to itself. */ Tcl_IncrRefCount(newValue); cacheMap = GetThreadHash(&pgvPtr->key); ClearHash(cacheMap); - hPtr = Tcl_CreateHashEntry(cacheMap, INT2PTR(pgvPtr->epoch), &dummy); - Tcl_SetHashValue(hPtr, newValue); + hPtr = Tcl_CreateHashEntry(cacheMap, + (char *) INT2PTR(pgvPtr->epoch), &dummy); + Tcl_SetHashValue(hPtr, (ClientData) newValue); Tcl_MutexUnlock(&pgvPtr->mutex); } @@ -4382,31 +3714,33 @@ TclGetProcessGlobalValue( Tcl_Obj *value = NULL; Tcl_HashTable *cacheMap; Tcl_HashEntry *hPtr; - Tcl_Size epoch = pgvPtr->epoch; + int epoch = pgvPtr->epoch; if (pgvPtr->encoding) { Tcl_Encoding current = Tcl_GetEncoding(NULL, NULL); if (pgvPtr->encoding != current) { /* - * The system encoding has changed since the global string value - * was saved. Convert the global value to be based on the new + * The system encoding has changed since the master string value + * was saved. Convert the master value to be based on the new * system encoding. */ Tcl_DString native, newValue; Tcl_MutexLock(&pgvPtr->mutex); - epoch = ++pgvPtr->epoch; + pgvPtr->epoch++; + epoch = pgvPtr->epoch; Tcl_UtfToExternalDString(pgvPtr->encoding, pgvPtr->value, pgvPtr->numBytes, &native); Tcl_ExternalToUtfDString(current, Tcl_DStringValue(&native), Tcl_DStringLength(&native), &newValue); Tcl_DStringFree(&native); ckfree(pgvPtr->value); - pgvPtr->value = (char *)ckalloc(Tcl_DStringLength(&newValue) + 1); - memcpy(pgvPtr->value, Tcl_DStringValue(&newValue), + pgvPtr->value = ckalloc((unsigned int) Tcl_DStringLength(&newValue) + 1); + memcpy(pgvPtr->value, Tcl_DStringValue(&newValue), + (size_t) Tcl_DStringLength(&newValue) + 1); Tcl_DStringFree(&newValue); Tcl_FreeEncoding(pgvPtr->encoding); pgvPtr->encoding = current; @@ -4416,7 +3750,7 @@ TclGetProcessGlobalValue( } } cacheMap = GetThreadHash(&pgvPtr->key); - hPtr = Tcl_FindHashEntry(cacheMap, INT2PTR(epoch)); + hPtr = Tcl_FindHashEntry(cacheMap, (char *) INT2PTR(epoch)); if (NULL == hPtr) { int dummy; @@ -4436,11 +3770,12 @@ TclGetProcessGlobalValue( Tcl_MutexLock(&pgvPtr->mutex); if ((NULL == pgvPtr->value) && (pgvPtr->proc)) { pgvPtr->epoch++; - pgvPtr->proc(&pgvPtr->value,&pgvPtr->numBytes,&pgvPtr->encoding); + (*(pgvPtr->proc))(&pgvPtr->value, &pgvPtr->numBytes, + &pgvPtr->encoding); if (pgvPtr->value == NULL) { Tcl_Panic("PGV Initializer did not initialize"); } - Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr); + Tcl_CreateExitHandler(FreeProcessGlobalValue, (ClientData)pgvPtr); } /* @@ -4449,12 +3784,12 @@ TclGetProcessGlobalValue( value = Tcl_NewStringObj(pgvPtr->value, pgvPtr->numBytes); hPtr = Tcl_CreateHashEntry(cacheMap, - INT2PTR(pgvPtr->epoch), &dummy); + (char *) INT2PTR(pgvPtr->epoch), &dummy); Tcl_MutexUnlock(&pgvPtr->mutex); - Tcl_SetHashValue(hPtr, value); + Tcl_SetHashValue(hPtr, (ClientData) value); Tcl_IncrRefCount(value); } - return (Tcl_Obj *)Tcl_GetHashValue(hPtr); + return (Tcl_Obj *) Tcl_GetHashValue(hPtr); } /* @@ -4466,7 +3801,7 @@ TclGetProcessGlobalValue( * (normally as computed by TclpFindExecutable). * * Results: - * None. + * None. * * Side effects: * Stores the executable name. @@ -4497,7 +3832,7 @@ TclSetObjNameOfExecutable( * pathname of the application is unknown. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -4516,26 +3851,27 @@ TclGetObjNameOfExecutable(void) * This function retrieves the absolute pathname of the application in * which the Tcl library is running, and returns it in string form. * - * The returned string belongs to Tcl and should be copied if the caller - * plans to keep it, to guard against it becoming invalid. + * The returned string belongs to Tcl and should be copied if the caller + * plans to keep it, to guard against it becoming invalid. * * Results: * A pointer to the internal string or NULL if the internal full path * name has not been computed or unknown. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ -const char * +CONST char * Tcl_GetNameOfExecutable(void) { - Tcl_Obj *obj = TclGetObjNameOfExecutable(); - const char *bytes = TclGetString(obj); + int numBytes; + const char *bytes = + Tcl_GetStringFromObj(TclGetObjNameOfExecutable(), &numBytes); - if (obj->length == 0) { + if (numBytes == 0) { return NULL; } return bytes; @@ -4613,20 +3949,16 @@ int TclReToGlob( Tcl_Interp *interp, const char *reStr, - Tcl_Size reStrLen, + int reStrLen, Tcl_DString *dsPtr, - int *exactPtr, - int *quantifiersFoundPtr) + int *exactPtr) { int anchorLeft, anchorRight, lastIsStar, numStars; - char *dsStr, *dsStrStart; - const char *msg, *p, *strEnd, *code; + char *dsStr, *dsStrStart, *msg; + const char *p, *strEnd; strEnd = reStr + reStrLen; Tcl_DStringInit(dsPtr); - if (quantifiersFoundPtr != NULL) { - *quantifiersFoundPtr = 0; - } /* * "***=xxx" == "*xxx*", watch for glob-sensitive chars. @@ -4634,11 +3966,10 @@ TclReToGlob( if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) { /* - * At most, the glob pattern has length 2*reStrLen + 2 to backslash - * escape every character and have * at each end. + * At most, the glob pattern has length 2*reStrLen + 2 to + * backslash escape every character and have * at each end. */ - - Tcl_DStringSetLength(dsPtr, reStrLen + 2); + Tcl_DStringSetLength(dsPtr, 2*reStrLen + 2); dsStr = dsStrStart = Tcl_DStringValue(dsPtr); *dsStr++ = '*'; for (p = reStr + 4; p < strEnd; p++) { @@ -4661,8 +3992,8 @@ TclReToGlob( } /* - * At most, the glob pattern has length reStrLen + 2 to account for - * possible * at each end. + * At most, the glob pattern has length reStrLen + 2 to account + * for possible * at each end. */ Tcl_DStringSetLength(dsPtr, reStrLen + 2); @@ -4672,12 +4003,12 @@ TclReToGlob( * Check for anchored REs (ie ^foo$), so we can use string equal if * possible. Do not alter the start of str so we can free it correctly. * - * Keep track of the last char being an unescaped star to prevent multiple - * instances. Simpler than checking that the last star may be escaped. + * Keep track of the last char being an unescaped star to prevent + * multiple instances. Simpler than checking that the last star + * may be escaped. */ msg = NULL; - code = NULL; p = reStr; anchorRight = 0; lastIsStar = 0; @@ -4734,14 +4065,10 @@ TclReToGlob( break; default: msg = "invalid escape sequence"; - code = "BADESCAPE"; goto invalidGlob; } break; case '.': - if (quantifiersFoundPtr != NULL) { - *quantifiersFoundPtr = 1; - } anchorLeft = 0; /* prevent exact match */ if (p+1 < strEnd) { if (p[1] == '*') { @@ -4766,7 +4093,6 @@ TclReToGlob( case '$': if (p+1 != strEnd) { msg = "$ not anchor"; - code = "NONANCHOR"; goto invalidGlob; } anchorRight = 1; @@ -4774,8 +4100,8 @@ TclReToGlob( case '*': case '+': case '?': case '|': case '^': case '{': case '}': case '(': case ')': case '[': case ']': msg = "unhandled RE special char"; - code = "UNHANDLED"; goto invalidGlob; + break; default: *dsStr++ = *p; break; @@ -4787,9 +4113,7 @@ TclReToGlob( * Heuristic: if >1 non-anchoring *, the risk is large that glob * matching is slower than the RE engine, so report invalid. */ - msg = "excessive recursive glob backtrack potential"; - code = "OVERCOMPLEX"; goto invalidGlob; } @@ -4802,12 +4126,22 @@ TclReToGlob( *exactPtr = (anchorLeft && anchorRight); } +#if 0 + fprintf(stderr, "INPUT RE '%.*s' OUTPUT GLOB '%s' anchor %d:%d \n", + reStrLen, reStr, + Tcl_DStringValue(dsPtr), anchorLeft, anchorRight); + fflush(stderr); +#endif return TCL_OK; invalidGlob: +#if 0 + fprintf(stderr, "INPUT RE '%.*s' NO OUTPUT GLOB %s (%c)\n", + reStrLen, reStr, msg, *p); + fflush(stderr); +#endif if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); - Tcl_SetErrorCode(interp, "TCL", "RE2GLOB", code, (void *)NULL); + Tcl_AppendResult(interp, msg, NULL); } Tcl_DStringFree(dsPtr); return TCL_ERROR; |
