diff options
Diffstat (limited to 'generic/tclUtil.c')
| -rw-r--r-- | generic/tclUtil.c | 4299 | 
1 files changed, 3094 insertions, 1205 deletions
| diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 7cdded7..2d00adf 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1,120 +1,480 @@ -/*  +/*   * tclUtil.c --   * - *	This file contains utility procedures that are used by many Tcl + *	This file contains utility functions that are used by many Tcl   *	commands.   *   * Copyright (c) 1987-1993 The Regents of the University of California.   * Copyright (c) 1994-1998 Sun Microsystems, Inc. - * Copyright (c) 2001 by Kevin B. Kenny.  All rights reserved. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * Copyright (c) 2001 by Kevin B. Kenny. All rights reserved.   * - *  RCS: @(#) $Id: tclUtil.c,v 1.28 2002/01/25 20:40:56 dgp Exp $ + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES.   */  #include "tclInt.h" -#include "tclPort.h" +#include "tclParse.h" +#include "tclStringTrim.h" +#include <math.h>  /* - * The following variable holds the full path name of the binary - * from which this application was executed, or NULL if it isn't - * know.  The value of the variable is set by the procedure - * Tcl_FindExecutable.  The storage space is dynamically allocated. + * The absolute pathname of the executable in which this Tcl library is + * running.   */ -char *tclExecutableName = NULL; -char *tclNativeExecutableName = NULL; +static ProcessGlobalValue executableName = { +    0, 0, NULL, NULL, NULL, NULL, NULL +};  /* - * The following values are used in the flags returned by Tcl_ScanElement - * and used by Tcl_ConvertElement.  The value TCL_DONT_USE_BRACES is also - * defined in tcl.h;  make sure its value doesn't overlap with any of the - * values below. - * - * TCL_DONT_USE_BRACES -	1 means the string mustn't be enclosed in - *				braces (e.g. it contains unmatched braces, - *				or ends in a backslash character, or user - *				just doesn't want braces);  handle all - *				special characters by adding backslashes. - * USE_BRACES -			1 means the string contains a special - *				character that can be handled simply by - *				enclosing the entire argument in braces. - * BRACES_UNMATCHED -		1 means that braces aren't properly matched - *				in the argument. + * The following values are used in the flags arguments of Tcl*Scan*Element + * and Tcl*Convert*Element.  The values TCL_DONT_USE_BRACES and + * TCL_DONT_QUOTE_HASH are defined in tcl.h, like so: + * +#define TCL_DONT_USE_BRACES     1 +#define TCL_DONT_QUOTE_HASH     8 + * + * Those are public flag bits which callers of the public routines + * Tcl_Convert*Element() can use to indicate: + * + * TCL_DONT_USE_BRACES -	1 means the caller is insisting that brace + *				quoting not be used when converting the list + *				element. + * TCL_DONT_QUOTE_HASH -	1 means the caller insists that a leading hash + *				character ('#') should *not* be quoted. This + *				is appropriate when the caller can guarantee + *				the element is not the first element of a + *				list, so [eval] cannot mis-parse the element + *				as a comment. + * + * The remaining values which can be carried by the flags of these routines + * are for internal use only.  Make sure they do not overlap with the public + * values above. + * + * The Tcl*Scan*Element() routines make a determination which of 4 modes of + * conversion is most appropriate for Tcl*Convert*Element() to perform, and + * sets two bits of the flags value to indicate the mode selected. + * + * CONVERT_NONE		The element needs no quoting. Its literal string is + *			suitable as is. + * CONVERT_BRACE	The conversion should be enclosing the literal string + *			in braces. + * CONVERT_ESCAPE	The conversion should be using backslashes to escape + *			any characters in the string that require it. + * CONVERT_MASK		A mask value used to extract the conversion mode from + *			the flags argument. + *			Also indicates a strange conversion mode where all + *			special characters are escaped with backslashes + *			*except for braces*. This is a strange and unnecessary + *			case, but it's part of the historical way in which + *			lists have been formatted in Tcl. To experiment with + *			removing this case, set the value of COMPAT to 0. + * + * One last flag value is used only by callers of TclScanElement(). The flag + * value produced by a call to Tcl*Scan*Element() will never leave this bit + * set. + * + * CONVERT_ANY		The caller of TclScanElement() declares it can make no + *			promise about what public flags will be passed to the + *			matching call of TclConvertElement(). As such, + *			TclScanElement() has to determine the worst case + *			destination buffer length over all possibilities, and + *			in other cases this means an overestimate of the + *			required size. + * + * For more details, see the comments on the Tcl*Scan*Element and  + * Tcl*Convert*Element routines.   */ -#define USE_BRACES		2 -#define BRACES_UNMATCHED	4 +#define COMPAT 1 +#define CONVERT_NONE	0 +#define CONVERT_BRACE	2 +#define CONVERT_ESCAPE	4 +#define CONVERT_MASK	(CONVERT_BRACE | CONVERT_ESCAPE) +#define CONVERT_ANY	16  /* - * The following values determine the precision used when converting - * floating-point values to strings.  This information is linked to all - * of the tcl_precision variables in all interpreters via the procedure - * TclPrecTraceProc. + * The following key is used by Tcl_PrintDouble and TclPrecTraceProc to + * access the precision to be used for double formatting.   */ -static char precisionString[10] = "12"; -				/* The string value of all the tcl_precision -				 * variables. */ -static char precisionFormat[10] = "%.12g"; -				/* The format string actually used in calls -				 * to sprintf. */ -TCL_DECLARE_MUTEX(precisionMutex) +static Tcl_ThreadDataKey precisionKey;  /* - * Prototypes for procedures defined later in this file. + * Prototypes for functions defined later in this file.   */ -static void UpdateStringOfEndOffset _ANSI_ARGS_(( Tcl_Obj* objPtr )); -static int SetEndOffsetFromAny _ANSI_ARGS_(( Tcl_Interp* interp, -					     Tcl_Obj* objPtr )); +static void		ClearHash(Tcl_HashTable *tablePtr); +static void		FreeProcessGlobalValue(ClientData clientData); +static void		FreeThreadHash(ClientData clientData); +static Tcl_HashTable *	GetThreadHash(Tcl_ThreadDataKey *keyPtr); +static int		SetEndOffsetFromAny(Tcl_Interp *interp, +			    Tcl_Obj *objPtr); +static void		UpdateStringOfEndOffset(Tcl_Obj *objPtr);  /* - * The following is the Tcl object type definition for an object - * that represents a list index in the form, "end-offset".  It is - * used as a performance optimization in TclGetIntForIndex.  The - * internal rep is an integer, so no memory management is required - * for it. + * The following is the Tcl object type definition for an object that + * represents a list index in the form, "end-offset". It is used as a + * performance optimization in TclGetIntForIndex. The internal rep is an + * integer, so no memory management is required for it.   */ -Tcl_ObjType tclEndOffsetType = { +const Tcl_ObjType tclEndOffsetType = {      "end-offset",			/* name */ -    (Tcl_FreeInternalRepProc*) NULL,    /* freeIntRepProc */ -    (Tcl_DupInternalRepProc*) NULL,     /* dupIntRepProc */ +    NULL,				/* freeIntRepProc */ +    NULL,				/* dupIntRepProc */      UpdateStringOfEndOffset,		/* updateStringProc */ -    SetEndOffsetFromAny     +    SetEndOffsetFromAny  }; + +/* + *	*	STRING REPRESENTATION OF LISTS	*	*	* + * + * The next several routines implement the conversions of strings to and from + * Tcl lists. To understand their operation, the rules of parsing and + * generating the string representation of lists must be known.  Here we + * describe them in one place. + * + * A list is made up of zero or more elements. Any string is a list if it is + * made up of alternating substrings of element-separating ASCII whitespace + * and properly formatted elements. + * + * The ASCII characters which can make up the whitespace between list elements + * are: + * + *	\u0009	\t	TAB + *	\u000A	\n	NEWLINE + *	\u000B	\v	VERTICAL TAB + *	\u000C	\f	FORM FEED + * 	\u000D	\r	CARRIAGE RETURN + *	\u0020		SPACE + * + * NOTE: differences between this and other places where Tcl defines a role + * for "whitespace". + * + *	* Unlike command parsing, here NEWLINE is just another whitespace + *	  character; its role as a command terminator in a script has no + *	  importance here. + * + *	* Unlike command parsing, the BACKSLASH NEWLINE sequence is not + *	  considered to be a whitespace character. + * + *	* Other Unicode whitespace characters (recognized by [string is space] + *	  or Tcl_UniCharIsSpace()) do not play any role as element separators + *	  in Tcl lists. + * + *	* The NUL byte ought not appear, as it is not in strings properly + *	  encoded for Tcl, but if it is present, it is not treated as + *	  separating whitespace, or a string terminator. It is just another + *	  character in a list element. + * + * The interpretation of a formatted substring as a list element follows rules + * similar to the parsing of the words of a command in a Tcl script. Backslash + * substitution plays a key role, and is defined exactly as it is in command + * parsing. The same routine, TclParseBackslash() is used in both command + * parsing and list parsing. + * + * NOTE: This means that if and when backslash substitution rules ever change + * for command parsing, the interpretation of strings as lists also changes. + *  + * Backslash substitution replaces an "escape sequence" of one or more + * characters starting with + *		\u005c	\	BACKSLASH + * with a single character. The one character escape sequence case happens only + * when BACKSLASH is the last character in the string. In all other cases, the + * escape sequence is at least two characters long. + * + * The formatted substrings are interpreted as element values according to the + * following cases: + * + * * If the first character of a formatted substring is + *		\u007b	{	OPEN BRACE + *   then the end of the substring is the matching  + *		\u007d	}	CLOSE BRACE + *   character, where matching is determined by counting nesting levels, and + *   not including any brace characters that are contained within a backslash + *   escape sequence in the nesting count. Having found the matching brace, + *   all characters between the braces are the string value of the element. + *   If no matching close brace is found before the end of the string, the + *   string is not a Tcl list. If the character following the close brace is + *   not an element separating whitespace character, or the end of the string, + *   then the string is not a Tcl list. + * + *   NOTE: this differs from a brace-quoted word in the parsing of a Tcl + *   command only in its treatment of the backslash-newline sequence. In a + *   list element, the literal characters in the backslash-newline sequence + *   become part of the element value. In a script word, conversion to a + *   single SPACE character is done. + * + *   NOTE: Most list element values can be represented by a formatted + *   substring using brace quoting. The exceptions are any element value that + *   includes an unbalanced brace not in a backslash escape sequence, and any + *   value that ends with a backslash not itself in a backslash escape + *   sequence. + *  + * * If the first character of a formatted substring is + *		\u0022	"	QUOTE + *   then the end of the substring is the next QUOTE character, not counting + *   any QUOTE characters that are contained within a backslash escape + *   sequence. If no next QUOTE is found before the end of the string, the + *   string is not a Tcl list. If the character following the closing QUOTE is + *   not an element separating whitespace character, or the end of the string, + *   then the string is not a Tcl list. Having found the limits of the + *   substring, the element value is produced by performing backslash + *   substitution on the character sequence between the open and close QUOTEs. + * + *   NOTE: Any element value can be represented by this style of formatting, + *   given suitable choice of backslash escape sequences. + * + * * All other formatted substrings are terminated by the next element + *   separating whitespace character in the string.  Having found the limits + *   of the substring, the element value is produced by performing backslash + *   substitution on it. + * + *   NOTE: Any element value can be represented by this style of formatting, + *   given suitable choice of backslash escape sequences, with one exception. + *   The empty string cannot be represented as a list element without the use + *   of either braces or quotes to delimit it. + * + * This collection of parsing rules is implemented in the routine + * TclFindElement(). + * + * In order to produce lists that can be parsed by these rules, we need the + * ability to distinguish between characters that are part of a list element + * value from characters providing syntax that define the structure of the + * list. This means that our code that generates lists must at a minimum be + * able to produce escape sequences for the 10 characters identified above + * that have significance to a list parser. + * + *	*	*	CANONICAL LISTS	*	*	*	*	* + * + * In addition to the basic rules for parsing strings into Tcl lists, there + * are additional properties to be met by the set of list values that are + * generated by Tcl.  Such list values are often said to be in "canonical + * form": + * + * * When any canonical list is evaluated as a Tcl script, it is a script of + *   either zero commands (an empty list) or exactly one command. The command + *   word is exactly the first element of the list, and each argument word is + *   exactly one of the following elements of the list. This means that any + *   characters that have special meaning during script evaluation need + *   special treatment when canonical lists are produced: + * + *	* Whitespace between elements may not include NEWLINE. + *	* The command terminating character, + *		\u003b	;	SEMICOLON + *	  must be BRACEd, QUOTEd, or escaped so that it does not terminate the + * 	  command prematurely. + *	* Any of the characters that begin substitutions in scripts, + *		\u0024	$	DOLLAR + *		\u005b	[	OPEN BRACKET + *		\u005c	\	BACKSLASH + *	  need to be BRACEd or escaped. + *	* In any list where the first character of the first element is + *		\u0023	#	HASH + *	  that HASH character must be BRACEd, QUOTEd, or escaped so that it + *	  does not convert the command into a comment. + *	* Any list element that contains the character sequence BACKSLASH + *	  NEWLINE cannot be formatted with BRACEs. The BACKSLASH character + *	  must be represented by an escape sequence, and unless QUOTEs are + *	  used, the NEWLINE must be as well. + * + * * It is also guaranteed that one can use a canonical list as a building + *   block of a larger script within command substitution, as in this example: + *	set script "puts \[[list $cmd $arg]]"; eval $script + *   To support this usage, any appearance of the character + *		\u005d	]	CLOSE BRACKET + *   in a list element must be BRACEd, QUOTEd, or escaped. + * + * * Finally it is guaranteed that enclosing a canonical list in braces + *   produces a new value that is also a canonical list.  This new list has + *   length 1, and its only element is the original canonical list.  This same + *   guarantee also makes it possible to construct scripts where an argument + *   word is given a list value by enclosing the canonical form of that list + *   in braces: + *	set script "puts {[list $one $two $three]}"; eval $script + *   This sort of coding was once fairly common, though it's become more + *   idiomatic to see the following instead: + *	set script [list puts [list $one $two $three]]; eval $script + *   In order to support this guarantee, every canonical list must have + *   balance when counting those braces that are not in escape sequences. + * + * Within these constraints, the canonical list generation routines + * TclScanElement() and TclConvertElement() attempt to generate the string for + * any list that is easiest to read. When an element value is itself + * acceptable as the formatted substring, it is usually used (CONVERT_NONE). + * When some quoting or escaping is required, use of BRACEs (CONVERT_BRACE) is + * usually preferred over the use of escape sequences (CONVERT_ESCAPE). There + * are some exceptions to both of these preferences for reasons of code + * simplicity, efficiency, and continuation of historical habits. Canonical + * lists never use the QUOTE formatting to delimit their elements because that + * form of quoting does not nest, which makes construction of nested lists far + * too much trouble.  Canonical lists always use only a single SPACE character + * for element-separating whitespace. + * + *	*	*	FUTURE CONSIDERATIONS	*	*	* + * + * When a list element requires quoting or escaping due to a CLOSE BRACKET + * character or an internal QUOTE character, a strange formatting mode is + * recommended. For example, if the value "a{b]c}d" is converted by the usual + * modes: + * + *	CONVERT_BRACE:	a{b]c}d		=> {a{b]c}d} + *	CONVERT_ESCAPE:	a{b]c}d		=> a\{b\]c\}d + * + * we get perfectly usable formatted list elements. However, this is not what + * Tcl releases have been producing. Instead, we have: + * + *	CONVERT_MASK:	a{b]c}d		=> a{b\]c}d + * + * where the CLOSE BRACKET is escaped, but the BRACEs are not. The same effect + * can be seen replacing ] with " in this example. There does not appear to be + * any functional or aesthetic purpose for this strange additional mode. The + * sole purpose I can see for preserving it is to keep generating the same + * formatted lists programmers have become accustomed to, and perhaps written + * tests to expect. That is, compatibility only. The additional code + * complexity required to support this mode is significant. The lines of code + * supporting it are delimited in the routines below with #if COMPAT + * directives. This makes it easy to experiment with eliminating this + * formatting mode simply with "#define COMPAT 0" above. I believe this is + * worth considering. + *  + * Another consideration is the treatment of QUOTE characters in list + * elements. TclConvertElement() must have the ability to produce the escape + * sequence \" so that when a list element begins with a QUOTE we do not + * confuse that first character with a QUOTE used as list syntax to define + * list structure. However, that is the only place where QUOTE characters need + * quoting. In this way, handling QUOTE could really be much more like the way + * we handle HASH which also needs quoting and escaping only in particular + * situations. Following up this could increase the set of list elements that + * can use the CONVERT_NONE formatting mode. + * + * More speculative is that the demands of canonical list form require brace + * balance for the list as a whole, while the current implementation achieves + * this by establishing brace balance for every element. + * + * Finally, a reminder that the rules for parsing and formatting lists are + * closely tied together with the rules for parsing and evaluating scripts, + * and will need to evolve in sync. + */ + +/* + *---------------------------------------------------------------------- + * + * TclMaxListLength -- + * + *	Given 'bytes' pointing to 'numBytes' bytes, scan through them and + *	count the number of whitespace runs that could be list element + *	separators. If 'numBytes' is -1, scan to the terminating '\0'. Not a + *	full list parser. Typically used to get a quick and dirty overestimate + *	of length size in order to allocate space for an actual list parser to + *	operate with. + * + * Results: + *	Returns the largest number of list elements that could possibly be in + *	this string, interpreted as a Tcl list. If 'endPtr' is not NULL, + *	writes a pointer to the end of the string scanned there. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclMaxListLength( +    const char *bytes, +    int numBytes, +    const char **endPtr) +{ +    int count = 0; + +    if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { +	/* Empty string case - quick exit */ +	goto done; +    } + +    /* +     * No list element before leading white space. +     */ + +    count += 1 - TclIsSpaceProc(*bytes);  + +    /* +     * Count white space runs as potential element separators. +     */ + +    while (numBytes) { +	if ((numBytes == -1) && (*bytes == '\0')) { +	    break; +	} +	if (TclIsSpaceProc(*bytes)) { +	    /* +	     * Space run started; bump count. +	     */ + +	    count++; +	    do { +		bytes++; +		numBytes -= (numBytes != -1); +	    } while (numBytes && TclIsSpaceProc(*bytes)); +	    if ((numBytes == 0) || ((numBytes == -1) && (*bytes == '\0'))) { +		break; +	    } + +	    /* +	     * (*bytes) is non-space; return to counting state. +	     */ +	} +	bytes++; +	numBytes -= (numBytes != -1); +    } + +    /* +     * No list element following trailing white space. +     */ +    count -= TclIsSpaceProc(bytes[-1]);  + +  done: +    if (endPtr) { +	*endPtr = bytes; +    } +    return count; +}  /*   *----------------------------------------------------------------------   *   * TclFindElement --   * - *	Given a pointer into a Tcl list, locate the first (or next) - *	element in the list. + *	Given a pointer into a Tcl list, locate the first (or next) element in + *	the list.   *   * Results: - *	The return value is normally TCL_OK, which means that the - *	element was successfully located.  If TCL_ERROR is returned - *	it means that list didn't have proper list structure; - *	the interp's result contains a more detailed error message. + *	The return value is normally TCL_OK, which means that the element was + *	successfully located. If TCL_ERROR is returned it means that list + *	didn't have proper list structure; the interp's result contains a more + *	detailed error message.   *   *	If TCL_OK is returned, then *elementPtr will be set to point to the   *	first element of list, and *nextPtr will be set to point to the   *	character just after any white space following the last character - *	that's part of the element. If this is the last argument in the - *	list, then *nextPtr will point just after the last character in the - *	list (i.e., at the character at list+listLength). If sizePtr is - *	non-NULL, *sizePtr is filled in with the number of characters in the - *	element.  If the element is in braces, then *elementPtr will point - *	to the character after the opening brace and *sizePtr will not - *	include either of the braces. If there isn't an element in the list, - *	*sizePtr will be zero, and both *elementPtr and *termPtr will point - *	just after the last character in the list. Note: this procedure does - *	NOT collapse backslash sequences. + *	that's part of the element. If this is the last argument in the list, + *	then *nextPtr will point just after the last character in the list + *	(i.e., at the character at list+listLength). If sizePtr is non-NULL, + *	*sizePtr is filled in with the number of bytes in the element. If the + *	element is in braces, then *elementPtr will point to the character + *	after the opening brace and *sizePtr will not include either of the + *	braces. If there isn't an element in the list, *sizePtr will be zero, + *	and both *elementPtr and *nextPtr will point just after the last + *	character in the list. If literalPtr is non-NULL, *literalPtr is set + *	to a boolean value indicating whether the substring returned as the + *	values of **elementPtr and *sizePtr is the literal value of a list + *	element. If not, a call to TclCopyAndCollapse() is needed to produce + *	the actual value of the list element. Note: this function does NOT + *	collapse backslash sequences, but uses *literalPtr to tell callers + *	when it is required for them to do so.   *   * Side effects:   *	None. @@ -123,43 +483,46 @@ Tcl_ObjType tclEndOffsetType = {   */  int -TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr, -	       bracePtr) -    Tcl_Interp *interp;		/* Interpreter to use for error reporting.  -				 * If NULL, then no error message is left -				 * after errors. */ -    CONST char *list;		/* Points to the first byte of a string +TclFindElement( +    Tcl_Interp *interp,		/* Interpreter to use for error reporting. If +				 * NULL, then no error message is left after +				 * errors. */ +    const char *list,		/* Points to the first byte of a string  				 * containing a Tcl list with zero or more  				 * elements (possibly in braces). */ -    int listLength;		/* Number of bytes in the list's string. */ -    CONST char **elementPtr;	/* Where to put address of first significant +    int listLength,		/* Number of bytes in the list's string. */ +    const char **elementPtr,	/* Where to put address of first significant  				 * character in first element of list. */ -    CONST char **nextPtr;	/* Fill in with location of character just +    const char **nextPtr,	/* Fill in with location of character just  				 * after all white space following end of  				 * argument (next arg or end of list). */ -    int *sizePtr;		/* If non-zero, fill in with size of +    int *sizePtr,		/* If non-zero, fill in with size of  				 * element. */ -    int *bracePtr;		/* If non-zero, fill in with non-zero/zero -				 * to indicate that arg was/wasn't -				 * in braces. */ +    int *literalPtr)		/* If non-zero, fill in with non-zero/zero to +				 * indicate that the substring of *sizePtr +				 * bytes starting at **elementPtr is/is not +				 * the literal list element and therefore +				 * does not/does require a call to  +				 * TclCopyAndCollapse() by the caller. */  { -    CONST char *p = list; -    CONST char *elemStart;	/* Points to first byte of first element. */ -    CONST char *limit;		/* Points just after list's last byte. */ +    const char *p = list; +    const char *elemStart;	/* Points to first byte of first element. */ +    const char *limit;		/* Points just after list's last byte. */      int openBraces = 0;		/* Brace nesting level during parse. */      int inQuotes = 0;      int size = 0;		/* lint. */      int numChars; -    CONST char *p2; -     +    int literal = 1; +    const char *p2; +      /* -     * Skim off leading white space and check for an opening brace or -     * quote. We treat embedded NULLs in the list as bytes belonging to -     * a list element. +     * Skim off leading white space and check for an opening brace or quote. +     * We treat embedded NULLs in the list as bytes belonging to a list +     * element.       */      limit = (list + listLength); -    while ((p < limit) && (isspace(UCHAR(*p)))) { /* INTL: ISO space. */ +    while ((p < limit) && (TclIsSpaceProc(*p))) {  	p++;      }      if (p == limit) {		/* no element found */ @@ -175,9 +538,6 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,  	p++;      }      elemStart = p; -    if (bracePtr != 0) { -	*bracePtr = openBraces; -    }      /*       * Find element's end (a space, close brace, or the end of the string). @@ -185,123 +545,124 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,      while (p < limit) {  	switch (*p) { -  	    /*  	     * Open brace: don't treat specially unless the element is in  	     * braces. In this case, keep a nesting count.  	     */ -	    case '{': -		if (openBraces != 0) { -		    openBraces++; -		} -		break; +	case '{': +	    if (openBraces != 0) { +		openBraces++; +	    } +	    break;  	    /*  	     * Close brace: if element is in braces, keep nesting count and  	     * quit when the last close brace is seen.  	     */ -	    case '}': -		if (openBraces > 1) { -		    openBraces--; -		} else if (openBraces == 1) { -		    size = (p - elemStart); -		    p++; -		    if ((p >= limit) -			    || isspace(UCHAR(*p))) { /* INTL: ISO space. */ -			goto done; -		    } +	case '}': +	    if (openBraces > 1) { +		openBraces--; +	    } else if (openBraces == 1) { +		size = (p - elemStart); +		p++; +		if ((p >= limit) || TclIsSpaceProc(*p)) { +		    goto done; +		} -		    /* -		     * Garbage after the closing brace; return an error. -		     */ -		     -		    if (interp != NULL) { -			char buf[100]; -			 -			p2 = p; -			while ((p2 < limit) -				&& (!isspace(UCHAR(*p2))) /* INTL: ISO space. */ -			        && (p2 < p+20)) { -			    p2++; -			} -			sprintf(buf, -				"list element in braces followed by \"%.*s\" instead of space", -				(int) (p2-p), p); -			Tcl_SetResult(interp, buf, TCL_VOLATILE); +		/* +		 * Garbage after the closing brace; return an error. +		 */ + +		if (interp != NULL) { +		    p2 = p; +		    while ((p2 < limit) && (!TclIsSpaceProc(*p2)) +			    && (p2 < p+20)) { +			p2++;  		    } -		    return TCL_ERROR; +		    Tcl_SetObjResult(interp, Tcl_ObjPrintf( +			    "list element in braces followed by \"%.*s\" " +			    "instead of space", (int) (p2-p), p)); +		    Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "JUNK", +			    NULL);  		} -		break; +		return TCL_ERROR; +	    } +	    break;  	    /* -	     * Backslash:  skip over everything up to the end of the -	     * backslash sequence. +	     * Backslash: skip over everything up to the end of the backslash +	     * sequence.  	     */ -	    case '\\': { -		Tcl_UtfBackslash(p, &numChars, NULL); -		p += (numChars - 1); -		break; +	case '\\': +	    if (openBraces == 0) { +		/* +		 * A backslash sequence not within a brace quoted element +		 * means the value of the element is different from the +		 * substring we are parsing. A call to TclCopyAndCollapse() is +		 * needed to produce the element value. Inform the caller. +		 */ + +		literal = 0;  	    } +	    TclParseBackslash(p, limit - p, &numChars, NULL); +	    p += (numChars - 1); +	    break;  	    /*  	     * Space: ignore if element is in braces or quotes; otherwise  	     * terminate element.  	     */ -	    case ' ': -	    case '\f': -	    case '\n': -	    case '\r': -	    case '\t': -	    case '\v': -		if ((openBraces == 0) && !inQuotes) { -		    size = (p - elemStart); -		    goto done; -		} -		break; +	case ' ': +	case '\f': +	case '\n': +	case '\r': +	case '\t': +	case '\v': +	    if ((openBraces == 0) && !inQuotes) { +		size = (p - elemStart); +		goto done; +	    } +	    break;  	    /*  	     * Double-quote: if element is in quotes then terminate it.  	     */ -	    case '"': -		if (inQuotes) { -		    size = (p - elemStart); -		    p++; -		    if ((p >= limit) -			    || isspace(UCHAR(*p))) { /* INTL: ISO space */ -			goto done; -		    } +	case '"': +	    if (inQuotes) { +		size = (p - elemStart); +		p++; +		if ((p >= limit) || TclIsSpaceProc(*p)) { +		    goto done; +		} -		    /* -		     * Garbage after the closing quote; return an error. -		     */ -		     -		    if (interp != NULL) { -			char buf[100]; -			 -			p2 = p; -			while ((p2 < limit) -				&& (!isspace(UCHAR(*p2))) /* INTL: ISO space */ -				 && (p2 < p+20)) { -			    p2++; -			} -			sprintf(buf, -				"list element in quotes followed by \"%.*s\" %s", -				(int) (p2-p), p, "instead of space"); -			Tcl_SetResult(interp, buf, TCL_VOLATILE); +		/* +		 * Garbage after the closing quote; return an error. +		 */ + +		if (interp != NULL) { +		    p2 = p; +		    while ((p2 < limit) && (!TclIsSpaceProc(*p2)) +			    && (p2 < p+20)) { +			p2++;  		    } -		    return TCL_ERROR; +		    Tcl_SetObjResult(interp, Tcl_ObjPrintf( +			    "list element in quotes followed by \"%.*s\" " +			    "instead of space", (int) (p2-p), p)); +		    Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "JUNK", +			    NULL);  		} -		break; +		return TCL_ERROR; +	    } +	    break;  	}  	p++;      } -      /*       * End of list: terminate element.       */ @@ -309,22 +670,26 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,      if (p == limit) {  	if (openBraces != 0) {  	    if (interp != NULL) { -		Tcl_SetResult(interp, "unmatched open brace in list", -			TCL_STATIC); +		Tcl_SetObjResult(interp, Tcl_NewStringObj( +			"unmatched open brace in list", -1)); +		Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "BRACE", +			NULL);  	    }  	    return TCL_ERROR;  	} else if (inQuotes) {  	    if (interp != NULL) { -		Tcl_SetResult(interp, "unmatched open quote in list", -			TCL_STATIC); +		Tcl_SetObjResult(interp, Tcl_NewStringObj( +			"unmatched open quote in list", -1)); +		Tcl_SetErrorCode(interp, "TCL", "VALUE", "LIST", "QUOTE", +			NULL);  	    }  	    return TCL_ERROR;  	}  	size = (p - elemStart);      } -    done: -    while ((p < limit) && (isspace(UCHAR(*p)))) { /* INTL: ISO space. */ +  done: +    while ((p < limit) && (TclIsSpaceProc(*p))) {  	p++;      }      *elementPtr = elemStart; @@ -332,6 +697,9 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,      if (sizePtr != 0) {  	*sizePtr = size;      } +    if (literalPtr != 0) { +	*literalPtr = literal; +    }      return TCL_OK;  } @@ -340,14 +708,13 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,   *   * TclCopyAndCollapse --   * - *	Copy a string and eliminate any backslashes that aren't in braces. + *	Copy a string and substitute all backslash escape sequences   *   * Results: - *	Count characters get copied from src to	dst. Along the way, if - *	backslash sequences are found outside braces, the backslashes are - *	eliminated in the copy. After scanning count chars from source, a - *	null character is placed at the end of dst.  Returns the number - *	of characters that got copied. + *	Count bytes get copied from src to dst. Along the way, backslash + *	sequences are substituted in the copy. After scanning count bytes from + *	src, a null character is placed at the end of dst. Returns the number + *	of bytes that got written to dst.   *   * Side effects:   *	None. @@ -356,27 +723,30 @@ TclFindElement(interp, list, listLength, elementPtr, nextPtr, sizePtr,   */  int -TclCopyAndCollapse(count, src, dst) -    int count;			/* Number of characters to copy from src. */ -    CONST char *src;		/* Copy from here... */ -    char *dst;			/* ... to here. */ +TclCopyAndCollapse( +    int count,			/* Number of byte to copy from src. */ +    const char *src,		/* Copy from here... */ +    char *dst)			/* ... to here. */  { -    register char c; -    int numRead;      int newCount = 0; -    int backslashCount; -    for (c = *src;  count > 0;  src++, c = *src, count--) { +    while (count > 0) { +	char c = *src; +  	if (c == '\\') { -	    backslashCount = Tcl_UtfBackslash(src, &numRead, dst); +	    int numRead; +	    int backslashCount = TclParseBackslash(src, count, &numRead, dst); +  	    dst += backslashCount;  	    newCount += backslashCount; -	    src += numRead-1; -	    count -= numRead-1; +	    src += numRead; +	    count -= numRead;  	} else {  	    *dst = c;  	    dst++;  	    newCount++; +	    src++; +	    count--;  	}      }      *dst = 0; @@ -391,21 +761,19 @@ TclCopyAndCollapse(count, src, dst)   *	Splits a list up into its constituent fields.   *   * Results - *	The return value is normally TCL_OK, which means that - *	the list was successfully split up.  If TCL_ERROR is - *	returned, it means that "list" didn't have proper list - *	structure;  the interp's result will contain a more detailed - *	error message. - * - *	*argvPtr will be filled in with the address of an array - *	whose elements point to the elements of list, in order. - *	*argcPtr will get filled in with the number of valid elements - *	in the array.  A single block of memory is dynamically allocated - *	to hold both the argv array and a copy of the list (with - *	backslashes and braces removed in the standard way). - *	The caller must eventually free this memory by calling free() - *	on *argvPtr.  Note:  *argvPtr and *argcPtr are only modified - *	if the procedure returns normally. + *	The return value is normally TCL_OK, which means that the list was + *	successfully split up. If TCL_ERROR is returned, it means that "list" + *	didn't have proper list structure; the interp's result will contain a + *	more detailed error message. + * + *	*argvPtr will be filled in with the address of an array whose elements + *	point to the elements of list, in order. *argcPtr will get filled in + *	with the number of valid elements in the array. A single block of + *	memory is dynamically allocated to hold both the argv array and a copy + *	of the list (with backslashes and braces removed in the standard way). + *	The caller must eventually free this memory by calling free() on + *	*argvPtr. Note: *argvPtr and *argcPtr are only modified if the + *	function returns normally.   *   * Side effects:   *	Memory is allocated. @@ -414,68 +782,64 @@ TclCopyAndCollapse(count, src, dst)   */  int -Tcl_SplitList(interp, list, argcPtr, argvPtr) -    Tcl_Interp *interp;		/* Interpreter to use for error reporting.  -				 * If NULL, no error message is left. */ -    CONST char *list;		/* Pointer to string with list structure. */ -    int *argcPtr;		/* Pointer to location to fill in with -				 * the number of elements in the list. */ -    CONST char ***argvPtr;	/* Pointer to place to store pointer to -				 * array of pointers to list elements. */ +Tcl_SplitList( +    Tcl_Interp *interp,		/* Interpreter to use for error reporting. If +				 * NULL, no error message is left. */ +    const char *list,		/* Pointer to string with list structure. */ +    int *argcPtr,		/* Pointer to location to fill in with the +				 * number of elements in the list. */ +    const char ***argvPtr)	/* Pointer to place to store pointer to array +				 * of pointers to list elements. */  { -    CONST char **argv; -    CONST char *l; +    const char **argv, *end, *element;      char *p; -    int length, size, i, result, elSize, brace; -    CONST char *element; +    int length, size, i, result, elSize;      /* -     * Figure out how much space to allocate.  There must be enough -     * space for both the array of pointers and also for a copy of -     * the list.  To estimate the number of pointers needed, count -     * the number of space characters in the list. +     * Allocate enough space to work in. A (const char *) for each (possible) +     * list element plus one more for terminating NULL, plus as many bytes as +     * in the original string value, plus one more for a terminating '\0'. +     * Space used to hold element separating white space in the original +     * string gets re-purposed to hold '\0' characters in the argv array.       */ -    for (size = 1, l = list; *l != 0; l++) { -	if (isspace(UCHAR(*l))) { /* INTL: ISO space. */ -	    size++; -	} -    } -    size++;			/* Leave space for final NULL pointer. */ -    argv = (CONST char **) ckalloc((unsigned) -	    ((size * sizeof(char *)) + (l - list) + 1)); -    length = strlen(list); +    size = TclMaxListLength(list, -1, &end) + 1; +    length = end - list; +    argv = ckalloc((size * sizeof(char *)) + length + 1); +      for (i = 0, p = ((char *) argv) + size*sizeof(char *);  	    *list != 0;  i++) { -	CONST char *prevList = list; -	 -	result = TclFindElement(interp, list, length, &element, -				&list, &elSize, &brace); +	const char *prevList = list; +	int literal; + +	result = TclFindElement(interp, list, length, &element, &list, +		&elSize, &literal);  	length -= (list - prevList);  	if (result != TCL_OK) { -	    ckfree((char *) argv); +	    ckfree(argv);  	    return result;  	}  	if (*element == 0) {  	    break;  	}  	if (i >= size) { -	    ckfree((char *) argv); +	    ckfree(argv);  	    if (interp != NULL) { -		Tcl_SetResult(interp, "internal error in Tcl_SplitList", -			TCL_STATIC); +		Tcl_SetObjResult(interp, Tcl_NewStringObj( +			"internal error in Tcl_SplitList", -1)); +		Tcl_SetErrorCode(interp, "TCL", "INTERNAL", "Tcl_SplitList", +			NULL);  	    }  	    return TCL_ERROR;  	}  	argv[i] = p; -	if (brace) { -	    memcpy((VOID *) p, (VOID *) element, (size_t) elSize); +	if (literal) { +	    memcpy(p, element, (size_t) elSize);  	    p += elSize;  	    *p = 0;  	    p++;  	} else { -	    TclCopyAndCollapse(elSize, element, p); -	    p += elSize+1; +	    p += 1 + TclCopyAndCollapse(elSize, element, p);  	}      } @@ -490,17 +854,15 @@ Tcl_SplitList(interp, list, argcPtr, argvPtr)   *   * Tcl_ScanElement --   * - *	This procedure is a companion procedure to Tcl_ConvertElement. - *	It scans a string to see what needs to be done to it (e.g. add - *	backslashes or enclosing braces) to make the string into a - *	valid Tcl list element. + *	This function is a companion function to Tcl_ConvertElement. It scans + *	a string to see what needs to be done to it (e.g. add backslashes or + *	enclosing braces) to make the string into a valid Tcl list element.   *   * Results: - *	The return value is an overestimate of the number of characters - *	that will be needed by Tcl_ConvertElement to produce a valid - *	list element from string.  The word at *flagPtr is filled in - *	with a value needed by Tcl_ConvertElement when doing the actual - *	conversion. + *	The return value is an overestimate of the number of bytes that will + *	be needed by Tcl_ConvertElement to produce a valid list element from + *	src. The word at *flagPtr is filled in with a value needed by + *	Tcl_ConvertElement when doing the actual conversion.   *   * Side effects:   *	None. @@ -509,12 +871,12 @@ Tcl_SplitList(interp, list, argcPtr, argvPtr)   */  int -Tcl_ScanElement(string, flagPtr) -    register CONST char *string; /* String to convert to list element. */ -    register int *flagPtr;	 /* Where to store information to guide -				  * Tcl_ConvertCountedElement. */ +Tcl_ScanElement( +    register const char *src,	/* String to convert to list element. */ +    register int *flagPtr)	/* Where to store information to guide +				 * Tcl_ConvertCountedElement. */  { -    return Tcl_ScanCountedElement(string, -1, flagPtr); +    return Tcl_ScanCountedElement(src, -1, flagPtr);  }  /* @@ -522,19 +884,17 @@ Tcl_ScanElement(string, flagPtr)   *   * Tcl_ScanCountedElement --   * - *	This procedure is a companion procedure to - *	Tcl_ConvertCountedElement.  It scans a string to see what - *	needs to be done to it (e.g. add backslashes or enclosing - *	braces) to make the string into a valid Tcl list element. - *	If length is -1, then the string is scanned up to the first - *	null byte. + *	This function is a companion function to Tcl_ConvertCountedElement. It + *	scans a string to see what needs to be done to it (e.g. add + *	backslashes or enclosing braces) to make the string into a valid Tcl + *	list element. If length is -1, then the string is scanned from src up + *	to the first null byte.   *   * Results: - *	The return value is an overestimate of the number of characters - *	that will be needed by Tcl_ConvertCountedElement to produce a - *	valid list element from string.  The word at *flagPtr is - *	filled in with a value needed by Tcl_ConvertCountedElement - *	when doing the actual conversion. + *	The return value is an overestimate of the number of bytes that will + *	be needed by Tcl_ConvertCountedElement to produce a valid list element + *	from src. The word at *flagPtr is filled in with a value needed by + *	Tcl_ConvertCountedElement when doing the actual conversion.   *   * Side effects:   *	None. @@ -543,115 +903,313 @@ Tcl_ScanElement(string, flagPtr)   */  int -Tcl_ScanCountedElement(string, length, flagPtr) -    CONST char *string;		/* String to convert to Tcl list element. */ -    int length;			/* Number of bytes in string, or -1. */ -    int *flagPtr;		/* Where to store information to guide +Tcl_ScanCountedElement( +    const char *src,		/* String to convert to Tcl list element. */ +    int length,			/* Number of bytes in src, or -1. */ +    int *flagPtr)		/* Where to store information to guide  				 * Tcl_ConvertElement. */  { -    int flags, nestingLevel; -    register CONST char *p, *lastChar; +    int flags = CONVERT_ANY; +    int numBytes = TclScanElement(src, length, &flags); -    /* -     * This procedure and Tcl_ConvertElement together do two things: -     * -     * 1. They produce a proper list, one that will yield back the -     * argument strings when evaluated or when disassembled with -     * Tcl_SplitList.  This is the most important thing. -     *  -     * 2. They try to produce legible output, which means minimizing the -     * use of backslashes (using braces instead).  However, there are -     * some situations where backslashes must be used (e.g. an element -     * like "{abc": the leading brace will have to be backslashed. -     * For each element, one of three things must be done: -     * -     * (a) Use the element as-is (it doesn't contain any special -     * characters).  This is the most desirable option. -     * -     * (b) Enclose the element in braces, but leave the contents alone. -     * This happens if the element contains embedded space, or if it -     * contains characters with special interpretation ($, [, ;, or \), -     * or if it starts with a brace or double-quote, or if there are -     * no characters in the element. -     * -     * (c) Don't enclose the element in braces, but add backslashes to -     * prevent special interpretation of special characters.  This is a -     * last resort used when the argument would normally fall under case -     * (b) but contains unmatched braces.  It also occurs if the last -     * character of the argument is a backslash or if the element contains -     * a backslash followed by newline. -     * -     * The procedure figures out how many bytes will be needed to store -     * the result (actually, it overestimates). It also collects information -     * about the element in the form of a flags word. -     * -     * Note: list elements produced by this procedure and -     * Tcl_ConvertCountedElement must have the property that they can be -     * enclosing in curly braces to make sub-lists.  This means, for -     * example, that we must not leave unmatched curly braces in the -     * resulting list element.  This property is necessary in order for -     * procedures like Tcl_DStringStartSublist to work. -     */ +    *flagPtr = flags; +    return numBytes; +} + +/* + *---------------------------------------------------------------------- + * + * TclScanElement -- + * + *	This function is a companion function to TclConvertElement. It scans a + *	string to see what needs to be done to it (e.g. add backslashes or + *	enclosing braces) to make the string into a valid Tcl list element. If + *	length is -1, then the string is scanned from src up to the first null + *	byte. A NULL value for src is treated as an empty string. The incoming + *	value of *flagPtr is a report from the caller what additional flags it + *	will pass to TclConvertElement(). + * + * Results: + *	The recommended formatting mode for the element is determined and a + *	value is written to *flagPtr indicating that recommendation. This + *	recommendation is combined with the incoming flag values in *flagPtr + *	set by the caller to determine how many bytes will be needed by + *	TclConvertElement() in which to write the formatted element following + *	the recommendation modified by the flag values. This number of bytes + *	is the return value of the routine.  In some situations it may be an + *	overestimate, but so long as the caller passes the same flags to + *	TclConvertElement(), it will be large enough. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ -    nestingLevel = 0; -    flags = 0; -    if (string == NULL) { -	string = ""; -    } -    if (length == -1) { -	length = strlen(string); +int +TclScanElement( +    const char *src,		/* String to convert to Tcl list element. */ +    int length,			/* Number of bytes in src, or -1. */ +    int *flagPtr)		/* Where to store information to guide +				 * Tcl_ConvertElement. */ +{ +    const char *p = src; +    int nestingLevel = 0;	/* Brace nesting count */ +    int forbidNone = 0;		/* Do not permit CONVERT_NONE mode. Something +				 * needs protection or escape. */ +    int requireEscape = 0;	/* Force use of CONVERT_ESCAPE mode.  For some +				 * reason bare or brace-quoted form fails. */ +    int extra = 0;		/* Count of number of extra bytes needed for +				 * formatted element, assuming we use escape +				 * sequences in formatting. */ +    int bytesNeeded;		/* Buffer length computed to complete the +				 * element formatting in the selected mode. */ +#if COMPAT +    int preferEscape = 0;	/* Use preferences to track whether to use */ +    int preferBrace = 0;	/* CONVERT_MASK mode. */ +    int braceCount = 0;		/* Count of all braces '{' '}' seen. */ +#endif /* COMPAT */ +     +    if ((p == NULL) || (length == 0) || ((*p == '\0') && (length == -1))) { +	/* +	 * Empty string element must be brace quoted. +	 */ + +	*flagPtr = CONVERT_BRACE; +	return 2;      } -    lastChar = string + length; -    p = string; -    if ((p == lastChar) || (*p == '{') || (*p == '"')) { -	flags |= USE_BRACES; + +    if ((*p == '{') || (*p == '"')) { +	/* +	 * Must escape or protect so leading character of value is not +	 * misinterpreted as list element delimiting syntax. +	 */ + +	forbidNone = 1; +#if COMPAT +	preferBrace = 1; +#endif /* COMPAT */      } -    for ( ; p < lastChar; p++) { + +    while (length) { +      if (CHAR_TYPE(*p) != TYPE_NORMAL) {  	switch (*p) { -	    case '{': -		nestingLevel++; -		break; -	    case '}': -		nestingLevel--; -		if (nestingLevel < 0) { -		    flags |= TCL_DONT_USE_BRACES|BRACES_UNMATCHED; -		} -		break; -	    case '[': -	    case '$': -	    case ';': -	    case ' ': -	    case '\f': -	    case '\n': -	    case '\r': -	    case '\t': -	    case '\v': -		flags |= USE_BRACES; +	case '{':	/* TYPE_BRACE */ +#if COMPAT +	    braceCount++; +#endif /* COMPAT */ +	    extra++;				/* Escape '{' => '\{' */ +	    nestingLevel++; +	    break; +	case '}':	/* TYPE_BRACE */ +#if COMPAT +	    braceCount++; +#endif /* COMPAT */ +	    extra++;				/* Escape '}' => '\}' */ +	    nestingLevel--; +	    if (nestingLevel < 0) { +		/* +		 * Unbalanced braces!  Cannot format with brace quoting. +		 */ + +		requireEscape = 1; +	    } +	    break; +	case ']':	/* TYPE_CLOSE_BRACK */ +	case '"':	/* TYPE_SPACE */ +#if COMPAT +	    forbidNone = 1; +	    extra++;		/* Escapes all just prepend a backslash */ +	    preferEscape = 1; +	    break; +#else +	    /* FLOW THROUGH */ +#endif /* COMPAT */ +	case '[':	/* TYPE_SUBS */ +	case '$':	/* TYPE_SUBS */ +	case ';':	/* TYPE_COMMAND_END */ +	case ' ':	/* TYPE_SPACE */ +	case '\f':	/* TYPE_SPACE */ +	case '\n':	/* TYPE_COMMAND_END */ +	case '\r':	/* TYPE_SPACE */ +	case '\t':	/* TYPE_SPACE */ +	case '\v':	/* TYPE_SPACE */ +	    forbidNone = 1; +	    extra++;		/* Escape sequences all one byte longer. */ +#if COMPAT +	    preferBrace = 1; +#endif /* COMPAT */ +	    break; +	case '\\':	/* TYPE_SUBS */ +	    extra++;				/* Escape '\' => '\\' */ +	    if ((length == 1) || ((length == -1) && (p[1] == '\0'))) { +		/* +		 * Final backslash. Cannot format with brace quoting. +		 */ + +		requireEscape = 1;		  		break; -	    case '\\': -		if ((p+1 == lastChar) || (p[1] == '\n')) { -		    flags = TCL_DONT_USE_BRACES | BRACES_UNMATCHED; -		} else { -		    int size; +	    } +	    if (p[1] == '\n') { +		extra++;	/* Escape newline => '\n', one byte longer */ -		    Tcl_UtfBackslash(p, &size, NULL); -		    p += size-1; -		    flags |= USE_BRACES; -		} +		/* +		 * Backslash newline sequence.  Brace quoting not permitted. +		 */ + +		requireEscape = 1; +		length -= (length > 0); +		p++;  		break; +	    } +	    if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) { +		extra++;	/* Escape sequences all one byte longer. */ +		length -= (length > 0); +		p++; +	    } +	    forbidNone = 1; +#if COMPAT +	    preferBrace = 1; +#endif /* COMPAT */ +	    break; +	case '\0':	/* TYPE_SUBS */ +	    if (length == -1) { +		goto endOfString; +	    } +	    /* TODO: Panic on improper encoding? */ +	    break;  	} +      } +	length -= (length > 0); +	p++;      } + +  endOfString:      if (nestingLevel != 0) { -	flags = TCL_DONT_USE_BRACES | BRACES_UNMATCHED; +	/* +	 * Unbalanced braces!  Cannot format with brace quoting. +	 */ + +	requireEscape = 1; +    } + +    /* +     * We need at least as many bytes as are in the element value... +     */ + +    bytesNeeded = p - src; + +    if (requireEscape) { +	/* +	 * We must use escape sequences.  Add all the extra bytes needed to +	 * have room to create them. +	 */ + +	bytesNeeded += extra; + +	/* +	 * Make room to escape leading #, if needed. +	 */ + +	if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { +	    bytesNeeded++; +	} +	*flagPtr = CONVERT_ESCAPE; +	goto overflowCheck; +    } +    if (*flagPtr & CONVERT_ANY) { +	/* +	 * The caller has not let us know what flags it will pass to +	 * TclConvertElement() so compute the max size we might need for any +	 * possible choice.  Normally the formatting using escape sequences is +	 * the longer one, and a minimum "extra" value of 2 makes sure we +	 * don't request too small a buffer in those edge cases where that's +	 * not true. +	 */ + +	if (extra < 2) { +	    extra = 2; +	} +	*flagPtr &= ~CONVERT_ANY; +	*flagPtr |= TCL_DONT_USE_BRACES; +    } +    if (forbidNone) { +	/* +	 * We must request some form of quoting of escaping... +	 */ + +#if COMPAT +	if (preferEscape && !preferBrace) { +	    /* +	     * If we are quoting solely due to ] or internal " characters use +	     * the CONVERT_MASK mode where we escape all special characters +	     * except for braces. "extra" counted space needed to escape +	     * braces too, so substract "braceCount" to get our actual needs. +	     */ + +	    bytesNeeded += (extra - braceCount); +	    /* Make room to escape leading #, if needed. */ +	    if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { +		bytesNeeded++; +	    } + +	    /* +	     * If the caller reports it will direct TclConvertElement() to +	     * use full escapes on the element, add back the bytes needed to +	     * escape the braces. +	     */ + +	    if (*flagPtr & TCL_DONT_USE_BRACES) { +		bytesNeeded += braceCount; +	    } +	    *flagPtr = CONVERT_MASK; +	    goto overflowCheck; +	} +#endif /* COMPAT */ +	if (*flagPtr & TCL_DONT_USE_BRACES) { +	    /* +	     * If the caller reports it will direct TclConvertElement() to +	     * use escapes, add the extra bytes needed to have room for them. +	     */ + +	    bytesNeeded += extra; + +	    /* +	     * Make room to escape leading #, if needed. +	     */ + +	    if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { +		bytesNeeded++; +	    } +	} else { +	    /* +	     * Add 2 bytes for room for the enclosing braces. +	     */ + +	    bytesNeeded += 2; +	} +	*flagPtr = CONVERT_BRACE; +	goto overflowCheck;      } -    *flagPtr = flags;      /* -     * Allow enough space to backslash every character plus leave -     * two spaces for braces. +     * So far, no need to quote or escape anything.       */ -    return 2*(p-string) + 2; +    if ((*src == '#') && !(*flagPtr & TCL_DONT_QUOTE_HASH)) { +	/* +	 * If we need to quote a leading #, make room to enclose in braces. +	 */ + +	bytesNeeded += 2; +    } +    *flagPtr = CONVERT_NONE; + +  overflowCheck: +    if (bytesNeeded < 0) { +	Tcl_Panic("TclScanElement: string length overflow"); +    } +    return bytesNeeded;  }  /* @@ -659,16 +1217,15 @@ Tcl_ScanCountedElement(string, length, flagPtr)   *   * Tcl_ConvertElement --   * - *	This is a companion procedure to Tcl_ScanElement.  Given - *	the information produced by Tcl_ScanElement, this procedure - *	converts a string to a list element equal to that string. + *	This is a companion function to Tcl_ScanElement. Given the information + *	produced by Tcl_ScanElement, this function converts a string to a list + *	element equal to that string.   *   * Results: - *	Information is copied to *dst in the form of a list element - *	identical to src (i.e. if Tcl_SplitList is applied to dst it - *	will produce a string identical to src).  The return value is - *	a count of the number of characters copied (not including the - *	terminating NULL character). + *	Information is copied to *dst in the form of a list element identical + *	to src (i.e. if Tcl_SplitList is applied to dst it will produce a + *	string identical to src). The return value is a count of the number of + *	characters copied (not including the terminating NULL character).   *   * Side effects:   *	None. @@ -677,10 +1234,10 @@ Tcl_ScanCountedElement(string, length, flagPtr)   */  int -Tcl_ConvertElement(src, dst, flags) -    register CONST char *src;	/* Source information for list element. */ -    register char *dst;		/* Place to put list-ified element. */ -    register int flags;		/* Flags produced by Tcl_ScanElement. */ +Tcl_ConvertElement( +    register const char *src,	/* Source information for list element. */ +    register char *dst,		/* Place to put list-ified element. */ +    register int flags)		/* Flags produced by Tcl_ScanElement. */  {      return Tcl_ConvertCountedElement(src, -1, dst, flags);  } @@ -690,17 +1247,15 @@ Tcl_ConvertElement(src, dst, flags)   *   * Tcl_ConvertCountedElement --   * - *	This is a companion procedure to Tcl_ScanCountedElement.  Given - *	the information produced by Tcl_ScanCountedElement, this - *	procedure converts a string to a list element equal to that - *	string. + *	This is a companion function to Tcl_ScanCountedElement. Given the + *	information produced by Tcl_ScanCountedElement, this function converts + *	a string to a list element equal to that string.   *   * Results: - *	Information is copied to *dst in the form of a list element - *	identical to src (i.e. if Tcl_SplitList is applied to dst it - *	will produce a string identical to src).  The return value is - *	a count of the number of characters copied (not including the - *	terminating NULL character). + *	Information is copied to *dst in the form of a list element identical + *	to src (i.e. if Tcl_SplitList is applied to dst it will produce a + *	string identical to src). The return value is a count of the number of + *	characters copied (not including the terminating NULL character).   *   * Side effects:   *	None. @@ -709,118 +1264,197 @@ Tcl_ConvertElement(src, dst, flags)   */  int -Tcl_ConvertCountedElement(src, length, dst, flags) -    register CONST char *src;	/* Source information for list element. */ -    int length;			/* Number of bytes in src, or -1. */ -    char *dst;			/* Place to put list-ified element. */ -    int flags;			/* Flags produced by Tcl_ScanElement. */ +Tcl_ConvertCountedElement( +    register const char *src,	/* Source information for list element. */ +    int length,			/* Number of bytes in src, or -1. */ +    char *dst,			/* Place to put list-ified element. */ +    int flags)			/* Flags produced by Tcl_ScanElement. */  { -    register char *p = dst; -    register CONST char *lastChar; +    int numBytes = TclConvertElement(src, length, dst, flags); +    dst[numBytes] = '\0'; +    return numBytes; +} + +/* + *---------------------------------------------------------------------- + * + * TclConvertElement -- + * + *	This is a companion function to TclScanElement. Given the information + *	produced by TclScanElement, this function converts a string to a list + *	element equal to that string. + * + * Results: + *	Information is copied to *dst in the form of a list element identical + *	to src (i.e. if Tcl_SplitList is applied to dst it will produce a + *	string identical to src). The return value is a count of the number of + *	characters copied (not including the terminating NULL character). + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclConvertElement( +    register const char *src,	/* Source information for list element. */ +    int length,			/* Number of bytes in src, or -1. */ +    char *dst,			/* Place to put list-ified element. */ +    int flags)			/* Flags produced by Tcl_ScanElement. */ +{ +    int conversion = flags & CONVERT_MASK; +    char *p = dst;      /* -     * See the comment block at the beginning of the Tcl_ScanElement -     * code for details of how this works. +     * Let the caller demand we use escape sequences rather than braces.       */ -    if (src && length == -1) { -	length = strlen(src); +    if ((flags & TCL_DONT_USE_BRACES) && (conversion & CONVERT_BRACE)) { +	conversion = CONVERT_ESCAPE;      } -    if ((src == NULL) || (length == 0)) { -	p[0] = '{'; -	p[1] = '}'; -	p[2] = 0; -	return 2; + +    /* +     * No matter what the caller demands, empty string must be braced! +     */ + +    if ((src == NULL) || (length == 0) || (*src == '\0' && length == -1)) { +	src = tclEmptyStringRep; +	length = 0; +	conversion = CONVERT_BRACE; +    } + +    /* +     * Escape leading hash as needed and requested. +     */ + +    if ((*src == '#') && !(flags & TCL_DONT_QUOTE_HASH)) { +	if (conversion == CONVERT_ESCAPE) { +	    p[0] = '\\'; +	    p[1] = '#'; +	    p += 2; +	    src++; +	    length -= (length > 0); +	} else { +	    conversion = CONVERT_BRACE; +	}      } -    lastChar = src + length; -    if ((flags & USE_BRACES) && !(flags & TCL_DONT_USE_BRACES)) { + +    /* +     * No escape or quoting needed.  Copy the literal string value. +     */ + +    if (conversion == CONVERT_NONE) { +	if (length == -1) { +	    /* TODO: INT_MAX overflow? */ +	    while (*src) { +		*p++ = *src++; +	    } +	    return p - dst; +	} else { +	    memcpy(dst, src, length); +	    return length; +	} +    } + +    /* +     * Formatted string is original string enclosed in braces. +     */ + +    if (conversion == CONVERT_BRACE) {  	*p = '{';  	p++; -	for ( ; src != lastChar; src++, p++) { -	    *p = *src; +	if (length == -1) { +	    /* TODO: INT_MAX overflow? */ +	    while (*src) { +		*p++ = *src++; +	    } +	} else { +	    memcpy(p, src, length); +	    p += length;  	}  	*p = '}';  	p++; -    } else { -	if (*src == '{') { -	    /* -	     * Can't have a leading brace unless the whole element is -	     * enclosed in braces.  Add a backslash before the brace. -	     * Furthermore, this may destroy the balance between open -	     * and close braces, so set BRACES_UNMATCHED. -	     */ +	return p - dst; +    } -	    p[0] = '\\'; -	    p[1] = '{'; -	    p += 2; -	    src++; -	    flags |= BRACES_UNMATCHED; -	} -	for (; src != lastChar; src++) { -	    switch (*src) { -		case ']': -		case '[': -		case '$': -		case ';': -		case ' ': -		case '\\': -		case '"': -		    *p = '\\'; -		    p++; -		    break; -		case '{': -		case '}': -		    /* -		     * It may not seem necessary to backslash braces, but -		     * it is.  The reason for this is that the resulting -		     * list element may actually be an element of a sub-list -		     * enclosed in braces (e.g. if Tcl_DStringStartSublist -		     * has been invoked), so there may be a brace mismatch -		     * if the braces aren't backslashed. -		     */ - -		    if (flags & BRACES_UNMATCHED) { -			*p = '\\'; -			p++; -		    } -		    break; -		case '\f': -		    *p = '\\'; -		    p++; -		    *p = 'f'; -		    p++; -		    continue; -		case '\n': -		    *p = '\\'; -		    p++; -		    *p = 'n'; -		    p++; -		    continue; -		case '\r': -		    *p = '\\'; -		    p++; -		    *p = 'r'; -		    p++; -		    continue; -		case '\t': -		    *p = '\\'; -		    p++; -		    *p = 't'; -		    p++; -		    continue; -		case '\v': -		    *p = '\\'; -		    p++; -		    *p = 'v'; -		    p++; -		    continue; +    /* conversion == CONVERT_ESCAPE or CONVERT_MASK */ + +    /* +     * Formatted string is original string converted to escape sequences. +     */ + +    for ( ; length; src++, length -= (length > 0)) { +	switch (*src) { +	case ']': +	case '[': +	case '$': +	case ';': +	case ' ': +	case '\\': +	case '"': +	    *p = '\\'; +	    p++; +	    break; +	case '{': +	case '}': +#if COMPAT +	    if (conversion == CONVERT_ESCAPE) +#endif /* COMPAT */ +	    { +		*p = '\\'; +		p++;  	    } -	    *p = *src; +	    break; +	case '\f': +	    *p = '\\'; +	    p++; +	    *p = 'f';  	    p++; +	    continue; +	case '\n': +	    *p = '\\'; +	    p++; +	    *p = 'n'; +	    p++; +	    continue; +	case '\r': +	    *p = '\\'; +	    p++; +	    *p = 'r'; +	    p++; +	    continue; +	case '\t': +	    *p = '\\'; +	    p++; +	    *p = 't'; +	    p++; +	    continue; +	case '\v': +	    *p = '\\'; +	    p++; +	    *p = 'v'; +	    p++; +	    continue; +	case '\0': +	    if (length == -1) { +		return p - dst; +	    } + +	    /*  +	     * If we reach this point, there's an embedded NULL in the string +	     * range being processed, which should not happen when the +	     * encoding rules for Tcl strings are properly followed.  If the +	     * day ever comes when we stop tolerating such things, this is +	     * where to put the Tcl_Panic(). +	     */ + +	    break;  	} +	*p = *src; +	p++;      } -    *p = '\0'; -    return p-dst; +    return p - dst;  }  /* @@ -828,15 +1462,14 @@ Tcl_ConvertCountedElement(src, length, dst, flags)   *   * Tcl_Merge --   * - *	Given a collection of strings, merge them together into a - *	single string that has proper Tcl list structured (i.e. - *	Tcl_SplitList may be used to retrieve strings equal to the - *	original elements, and Tcl_Eval will parse the string back - *	into its original elements). + *	Given a collection of strings, merge them together into a single + *	string that has proper Tcl list structured (i.e. Tcl_SplitList may be + *	used to retrieve strings equal to the original elements, and Tcl_Eval + *	will parse the string back into its original elements).   *   * Results: - *	The return value is the address of a dynamically-allocated - *	string containing the merged list. + *	The return value is the address of a dynamically-allocated string + *	containing the merged list.   *   * Side effects:   *	None. @@ -845,16 +1478,26 @@ Tcl_ConvertCountedElement(src, length, dst, flags)   */  char * -Tcl_Merge(argc, argv) -    int argc;			/* How many strings to merge. */ -    CONST char * CONST *argv;	/* Array of string values. */ +Tcl_Merge( +    int argc,			/* How many strings to merge. */ +    const char *const *argv)	/* Array of string values. */  { -#   define LOCAL_SIZE 20 -    int localFlags[LOCAL_SIZE], *flagPtr; -    int numChars; -    char *result; -    char *dst; -    int i; +#define LOCAL_SIZE 20 +    int localFlags[LOCAL_SIZE], *flagPtr = NULL; +    int i, bytesNeeded = 0; +    char *result, *dst; +    const int maxFlags = UINT_MAX / sizeof(int); + +    /* +     * Handle empty list case first, so logic of the general case can be +     * simpler. +     */ + +    if (argc == 0) { +	result = ckalloc(1); +	result[0] = '\0'; +	return result; +    }      /*       * Pass 1: estimate space, gather flags. @@ -862,34 +1505,51 @@ Tcl_Merge(argc, argv)      if (argc <= LOCAL_SIZE) {  	flagPtr = localFlags; +    } else if (argc > maxFlags) { +	/* +	 * We cannot allocate a large enough flag array to format this list in +	 * one pass.  We could imagine converting this routine to a multi-pass +	 * implementation, but for sizeof(int) == 4, the limit is a max of +	 * 2^30 list elements and since each element is at least one byte +	 * formatted, and requires one byte space between it and the next one, +	 * that a minimum space requirement of 2^31 bytes, which is already +	 * INT_MAX. If we tried to format a list of > maxFlags elements, we're +	 * just going to overflow the size limits on the formatted string +	 * anyway, so just issue that same panic early. +	 */ + +	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);      } else { -	flagPtr = (int *) ckalloc((unsigned) argc*sizeof(int)); +	flagPtr = ckalloc(argc * sizeof(int));      } -    numChars = 1;      for (i = 0; i < argc; i++) { -	numChars += Tcl_ScanElement(argv[i], &flagPtr[i]) + 1; +	flagPtr[i] = ( i ? TCL_DONT_QUOTE_HASH : 0 ); +	bytesNeeded += TclScanElement(argv[i], -1, &flagPtr[i]); +	if (bytesNeeded < 0) { +	    Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); +	} +    } +    if (bytesNeeded > INT_MAX - argc + 1) { +	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);      } +    bytesNeeded += argc;      /*       * Pass two: copy into the result area.       */ -    result = (char *) ckalloc((unsigned) numChars); +    result = ckalloc(bytesNeeded);      dst = result;      for (i = 0; i < argc; i++) { -	numChars = Tcl_ConvertElement(argv[i], dst, flagPtr[i]); -	dst += numChars; +	flagPtr[i] |= ( i ? TCL_DONT_QUOTE_HASH : 0 ); +	dst += TclConvertElement(argv[i], -1, dst, flagPtr[i]);  	*dst = ' ';  	dst++;      } -    if (dst == result) { -	*dst = 0; -    } else { -	dst[-1] = 0; -    } +    dst[-1] = 0;      if (flagPtr != localFlags) { -	ckfree((char *) flagPtr); +	ckfree(flagPtr);      }      return result;  } @@ -902,10 +1562,10 @@ Tcl_Merge(argc, argv)   *	Figure out how to handle a backslash sequence.   *   * Results: - *	The return value is the character that should be substituted - *	in place of the backslash sequence that starts at src.  If - *	readPtr isn't NULL then it is filled in with a count of the - *	number of characters in the backslash sequence. + *	The return value is the character that should be substituted in place + *	of the backslash sequence that starts at src. If readPtr isn't NULL + *	then it is filled in with a count of the number of characters in the + *	backslash sequence.   *   * Side effects:   *	None. @@ -914,90 +1574,292 @@ Tcl_Merge(argc, argv)   */  char -Tcl_Backslash(src, readPtr) -    CONST char *src;		/* Points to the backslash character of -				 * a backslash sequence. */ -    int *readPtr;		/* Fill in with number of characters read -				 * from src, unless NULL. */ +Tcl_Backslash( +    const char *src,		/* Points to the backslash character of a +				 * backslash sequence. */ +    int *readPtr)		/* Fill in with number of characters read from +				 * src, unless NULL. */  {      char buf[TCL_UTF_MAX];      Tcl_UniChar ch;      Tcl_UtfBackslash(src, readPtr, buf); -    Tcl_UtfToUniChar(buf, &ch); +    TclUtfToUniChar(buf, &ch);      return (char) ch;  }  /*   *----------------------------------------------------------------------   * + * TclTrimRight -- + * + *	Takes two counted strings in the Tcl encoding which must both be null + *	terminated. Conceptually trims from the right side of the first string + *	all characters found in the second string. + * + * Results: + *	The number of bytes to be removed from the end of the string. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimRight( +    const char *bytes,		/* String to be trimmed... */ +    int numBytes,		/* ...and its length in bytes */ +    const char *trim,		/* String of trim characters... */ +    int numTrim)		/* ...and its length in bytes */ +{ +    const char *p = bytes + numBytes; +    int pInc; + +    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { +	Tcl_Panic("TclTrimRight works only on null-terminated strings"); +    } + +    /* +     * Empty strings -> nothing to do. +     */ + +    if ((numBytes == 0) || (numTrim == 0)) { +	return 0; +    } + +    /* +     * Outer loop: iterate over string to be trimmed. +     */ + +    do { +	Tcl_UniChar ch1; +	const char *q = trim; +	int bytesLeft = numTrim; + +	p = Tcl_UtfPrev(p, bytes); + 	pInc = TclUtfToUniChar(p, &ch1); + +	/* +	 * Inner loop: scan trim string for match to current character. +	 */ + +	do { +	    Tcl_UniChar ch2; +	    int qInc = TclUtfToUniChar(q, &ch2); + +	    if (ch1 == ch2) { +		break; +	    } + +	    q += qInc; +	    bytesLeft -= qInc; +	} while (bytesLeft); + +	if (bytesLeft == 0) { +	    /* +	     * No match; trim task done; *p is last non-trimmed char. +	     */ + +	    p += pInc; +	    break; +	} +    } while (p > bytes); + +    return numBytes - (p - bytes); +} + +/* + *---------------------------------------------------------------------- + * + * TclTrimLeft -- + * + *	Takes two counted strings in the Tcl encoding which must both be null + *	terminated. Conceptually trims from the left side of the first string + *	all characters found in the second string. + * + * Results: + *	The number of bytes to be removed from the start of the string. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclTrimLeft( +    const char *bytes,		/* String to be trimmed... */ +    int numBytes,		/* ...and its length in bytes */ +    const char *trim,		/* String of trim characters... */ +    int numTrim)		/* ...and its length in bytes */ +{ +    const char *p = bytes; + +    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) { +	Tcl_Panic("TclTrimLeft works only on null-terminated strings"); +    } + +    /* +     * Empty strings -> nothing to do. +     */ + +    if ((numBytes == 0) || (numTrim == 0)) { +	return 0; +    } + +    /* +     * Outer loop: iterate over string to be trimmed. +     */ + +    do { +	Tcl_UniChar ch1; +	int pInc = TclUtfToUniChar(p, &ch1); +	const char *q = trim; +	int bytesLeft = numTrim; + +	/* +	 * Inner loop: scan trim string for match to current character. +	 */ + +	do { +	    Tcl_UniChar ch2; +	    int qInc = TclUtfToUniChar(q, &ch2); + +	    if (ch1 == ch2) { +		break; +	    } + +	    q += qInc; +	    bytesLeft -= qInc; +	} while (bytesLeft); + +	if (bytesLeft == 0) { +	    /* +	     * No match; trim task done; *p is first non-trimmed char. +	     */ + +	    break; +	} + +	p += pInc; +	numBytes -= pInc; +    } while (numBytes); + +    return p - bytes; +} + +/* + *---------------------------------------------------------------------- + *   * Tcl_Concat --   *   *	Concatenate a set of strings into a single large string.   *   * Results: - *	The return value is dynamically-allocated string containing - *	a concatenation of all the strings in argv, with spaces between - *	the original argv elements. + *	The return value is dynamically-allocated string containing a + *	concatenation of all the strings in argv, with spaces between the + *	original argv elements.   *   * Side effects: - *	Memory is allocated for the result;  the caller is responsible - *	for freeing the memory. + *	Memory is allocated for the result; the caller is responsible for + *	freeing the memory.   *   *----------------------------------------------------------------------   */ +/* The whitespace characters trimmed during [concat] operations */ +#define CONCAT_WS_SIZE (int) (sizeof(CONCAT_TRIM_SET "") - 1) +  char * -Tcl_Concat(argc, argv) -    int argc;			/* Number of strings to concatenate. */ -    CONST char * CONST *argv;	/* Array of strings to concatenate. */ +Tcl_Concat( +    int argc,			/* Number of strings to concatenate. */ +    const char *const *argv)	/* Array of strings to concatenate. */  { -    int totalSize, i; -    char *p; -    char *result; +    int i, needSpace = 0, bytesNeeded = 0; +    char *result, *p; + +    /* +     * Dispose of the empty result corner case first to simplify later code. +     */ -    for (totalSize = 1, i = 0; i < argc; i++) { -	totalSize += strlen(argv[i]) + 1; -    } -    result = (char *) ckalloc((unsigned) totalSize);      if (argc == 0) { -	*result = '\0'; +	result = (char *) ckalloc(1); +	result[0] = '\0';  	return result;      } -    for (p = result, i = 0; i < argc; i++) { -	CONST char *element; -	int length; +    /* +     * First allocate the result buffer at the size required. +     */ + +    for (i = 0;  i < argc;  i++) { +	bytesNeeded += strlen(argv[i]); +	if (bytesNeeded < 0) { +	    Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); +	} +    } +    if (bytesNeeded + argc - 1 < 0) {  	/* -	 * Clip white space off the front and back of the string -	 * to generate a neater result, and ignore any empty -	 * elements. +	 * Panic test could be tighter, but not going to bother for this +	 * legacy routine.  	 */ +	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded"); +    } + +    /* +     * All element bytes + (argc - 1) spaces + 1 terminating NULL. +     */ + +    result = ckalloc((unsigned) (bytesNeeded + argc)); + +    for (p = result, i = 0;  i < argc;  i++) { +	int trim, elemLength; +	const char *element; +	  	element = argv[i]; -	while (isspace(UCHAR(*element))) { /* INTL: ISO space. */ -	    element++; -	} -	for (length = strlen(element); -		(length > 0) -		&& (isspace(UCHAR(element[length-1]))) /* INTL: ISO space. */ -		&& ((length < 2) || (element[length-2] != '\\')); -	        length--) { -	    /* Null loop body. */ -	} -	if (length == 0) { +	elemLength = strlen(argv[i]); + +	/* +	 * Trim away the leading whitespace. +	 */ + +	trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, +		CONCAT_WS_SIZE); +	element += trim; +	elemLength -= trim; + +	/* +	 * Trim away the trailing whitespace. Do not permit trimming to expose +	 * a final backslash character. +	 */ + +	trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, +		CONCAT_WS_SIZE); +	trim -= trim && (element[elemLength - trim - 1] == '\\'); +	elemLength -= trim; + +	/* +	 * If we're left with empty element after trimming, do nothing. +	 */ + +	if (elemLength == 0) {  	    continue;  	} -	memcpy((VOID *) p, (VOID *) element, (size_t) length); -	p += length; -	*p = ' '; -	p++; -    } -    if (p != result) { -	p[-1] = 0; -    } else { -	*p = 0; + +	/* +	 * Append to the result with space if needed. +	 */ + +	if (needSpace) { +	    *p++ = ' '; +	} +	memcpy(p, element, (size_t) elemLength); +	p += elemLength; +	needSpace = 1;      } +    *p = '\0';      return result;  } @@ -1010,8 +1872,8 @@ Tcl_Concat(argc, argv)   *	object with spaces between the original strings.   *   * Results: - *	The return value is a new string object containing a concatenation - *	of the strings in objv. Its ref count is zero. + *	The return value is a new string object containing a concatenation of + *	the strings in objv. Its ref count is zero.   *   * Side effects:   *	A new object is created. @@ -1020,120 +1882,119 @@ Tcl_Concat(argc, argv)   */  Tcl_Obj * -Tcl_ConcatObj(objc, objv) -    int objc;			/* Number of objects to concatenate. */ -    Tcl_Obj *CONST objv[];	/* Array of objects to concatenate. */ +Tcl_ConcatObj( +    int objc,			/* Number of objects to concatenate. */ +    Tcl_Obj *const objv[])	/* Array of objects to concatenate. */  { -    int allocSize, finalSize, length, elemLength, i; -    char *p; -    char *element; -    char *concatStr; -    Tcl_Obj *objPtr; +    int i, elemLength, needSpace = 0, bytesNeeded = 0; +    const char *element; +    Tcl_Obj *objPtr, *resPtr;      /* -     * Check first to see if all the items are of list type.  If so, -     * we will concat them together as lists, and return a list object. -     * This is only valid when the lists have no current string -     * representation, since we don't know what the original type was. -     * An original string rep may have lost some whitespace info when -     * converted which could be important. +     * Check first to see if all the items are of list type or empty. If so, +     * we will concat them together as lists, and return a list object. This +     * is only valid when the lists are in canonical form.       */ +      for (i = 0;  i < objc;  i++) { +	int length; +  	objPtr = objv[i]; -	if ((objPtr->typePtr != &tclListType) || (objPtr->bytes != NULL)) { +	if (TclListObjIsCanonical(objPtr)) { +	    continue; +	} +	Tcl_GetStringFromObj(objPtr, &length); +	if (length > 0) {  	    break;  	}      }      if (i == objc) { -	Tcl_Obj **listv; -	int listc; - -	objPtr = Tcl_NewListObj(0, NULL); +	resPtr = NULL;  	for (i = 0;  i < objc;  i++) { -	    /* -	     * Tcl_ListObjAppendList could be used here, but this saves -	     * us a bit of type checking (since we've already done it) -	     * Use of INT_MAX tells us to always put the new stuff on -	     * the end.  It will be set right in Tcl_ListObjReplace. -	     */ -	    Tcl_ListObjGetElements(NULL, objv[i], &listc, &listv); -	    Tcl_ListObjReplace(NULL, objPtr, INT_MAX, 0, listc, listv); +	    objPtr = objv[i]; +	    if (objPtr->bytes && objPtr->length == 0) { +		continue; +	    } +	    if (resPtr) { +		Tcl_ListObjAppendList(NULL, resPtr, objPtr); +	    } else { +		resPtr = TclListObjCopy(NULL, objPtr); +	    } +	} +	if (!resPtr) { +	    resPtr = Tcl_NewObj();  	} -	return objPtr; +	return resPtr;      } -    allocSize = 0; +    /* +     * Something cannot be determined to be safe, so build the concatenation +     * the slow way, using the string representations. +     * +     * First try to pre-allocate the size required. +     */ +      for (i = 0;  i < objc;  i++) { -	objPtr = objv[i]; -	element = Tcl_GetStringFromObj(objPtr, &length); -	if ((element != NULL) && (length > 0)) { -	    allocSize += (length + 1); +	element = TclGetStringFromObj(objv[i], &elemLength); +	bytesNeeded += elemLength; +	if (bytesNeeded < 0) { +	    break;  	}      } -    if (allocSize == 0) { -	allocSize = 1;		/* enough for the NULL byte at end */ -    }      /* -     * Allocate storage for the concatenated result. Note that allocSize -     * is one more than the total number of characters, and so includes -     * room for the terminating NULL byte. +     * Does not matter if this fails, will simply try later to build up the +     * string with each Append reallocating as needed with the usual string +     * append algorithm.  When that fails it will report the error.       */ -     -    concatStr = (char *) ckalloc((unsigned) allocSize); -    /* -     * Now concatenate the elements. Clip white space off the front and back -     * to generate a neater result, and ignore any empty elements. Also put -     * a null byte at the end. -     */ +    TclNewObj(resPtr); +    Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1); +    Tcl_SetObjLength(resPtr, 0); -    finalSize = 0; -    if (objc == 0) { -	*concatStr = '\0'; -    } else { -	p = concatStr; -        for (i = 0;  i < objc;  i++) { -	    objPtr = objv[i]; -	    element = Tcl_GetStringFromObj(objPtr, &elemLength); -	    while ((elemLength > 0) && (UCHAR(*element) < 127) -		    && isspace(UCHAR(*element))) { /* INTL: ISO C space. */ -	         element++; -		 elemLength--; -	    } +    for (i = 0;  i < objc;  i++) { +	int trim; +	 +	element = TclGetStringFromObj(objv[i], &elemLength); -	    /* -	     * Trim trailing white space.  But, be careful not to trim -	     * a space character if it is preceded by a backslash: in -	     * this case it could be significant. -	     */ +	/* +	 * Trim away the leading whitespace. +	 */ -	    while ((elemLength > 0) && (UCHAR(element[elemLength-1]) < 127) -		    && isspace(UCHAR(element[elemLength-1])) /* INTL: ISO C space. */ -		    && ((elemLength < 2) || (element[elemLength-2] != '\\'))) { -		elemLength--; -	    } -	    if (elemLength == 0) { -	         continue;	/* nothing left of this element */ -	    } -	    memcpy((VOID *) p, (VOID *) element, (size_t) elemLength); -	    p += elemLength; -	    *p = ' '; -	    p++; -	    finalSize += (elemLength + 1); -        } -        if (p != concatStr) { -	    p[-1] = 0; -	    finalSize -= 1;	/* we overwrote the final ' ' */ -        } else { -	    *p = 0; -        } +	trim = TclTrimLeft(element, elemLength, CONCAT_TRIM_SET, +		CONCAT_WS_SIZE); +	element += trim; +	elemLength -= trim; + +	/* +	 * Trim away the trailing whitespace. Do not permit trimming to expose +	 * a final backslash character. +	 */ + +	trim = TclTrimRight(element, elemLength, CONCAT_TRIM_SET, +		CONCAT_WS_SIZE); +	trim -= trim && (element[elemLength - trim - 1] == '\\'); +	elemLength -= trim; + +	/* +	 * If we're left with empty element after trimming, do nothing. +	 */ + +	if (elemLength == 0) { +	    continue; +	} + +	/* +	 * Append to the result with space if needed. +	 */ + +	if (needSpace) { +	    Tcl_AppendToObj(resPtr, " ", 1); +	} +	Tcl_AppendToObj(resPtr, element, elemLength); +	needSpace = 1;      } -     -    TclNewObj(objPtr); -    objPtr->bytes  = concatStr; -    objPtr->length = finalSize; -    return objPtr; +    return resPtr;  }  /* @@ -1144,10 +2005,9 @@ Tcl_ConcatObj(objc, objv)   *	See if a particular string matches a particular pattern.   *   * Results: - *	The return value is 1 if string matches pattern, and - *	0 otherwise.  The matching operation permits the following - *	special characters in the pattern: *?\[] (see the manual - *	entry for details on what these mean). + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean).   *   * Side effects:   *	None. @@ -1156,12 +2016,12 @@ Tcl_ConcatObj(objc, objv)   */  int -Tcl_StringMatch(string, pattern) -    CONST char *string;		/* String. */ -    CONST char *pattern;	/* Pattern, which may contain special +Tcl_StringMatch( +    const char *str,		/* String. */ +    const char *pattern)	/* Pattern, which may contain special  				 * characters. */  { -    return Tcl_StringCaseMatch(string, pattern, 0); +    return Tcl_StringCaseMatch(str, pattern, 0);  }  /* @@ -1169,14 +2029,13 @@ Tcl_StringMatch(string, pattern)   *   * Tcl_StringCaseMatch --   * - *	See if a particular string matches a particular pattern. - *	Allows case insensitivity. + *	See if a particular string matches a particular pattern. Allows case + *	insensitivity.   *   * Results: - *	The return value is 1 if string matches pattern, and - *	0 otherwise.  The matching operation permits the following - *	special characters in the pattern: *?\[] (see the manual - *	entry for details on what these mean). + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean).   *   * Side effects:   *	None. @@ -1185,133 +2044,165 @@ Tcl_StringMatch(string, pattern)   */  int -Tcl_StringCaseMatch(string, pattern, nocase) -    CONST char *string;		/* String. */ -    CONST char *pattern;	/* Pattern, which may contain special +Tcl_StringCaseMatch( +    const char *str,		/* String. */ +    const char *pattern,	/* Pattern, which may contain special  				 * characters. */ -    int nocase;			/* 0 for case sensitive, 1 for insensitive */ +    int nocase)			/* 0 for case sensitive, 1 for insensitive */  { -    int p; -    CONST char *pstart = pattern; +    int p, charLen; +    const char *pstart = pattern;      Tcl_UniChar ch1, ch2; -     +      while (1) {  	p = *pattern; -	 +  	/* -	 * See if we're at the end of both the pattern and the string.  If -	 * so, we succeeded.  If we're at the end of the pattern but not at -	 * the end of the string, we failed. +	 * See if we're at the end of both the pattern and the string. If so, +	 * we succeeded. If we're at the end of the pattern but not at the end +	 * of the string, we failed.  	 */ -	 +  	if (p == '\0') { -	    return (*string == '\0'); +	    return (*str == '\0');  	} -	if ((*string == '\0') && (p != '*')) { +	if ((*str == '\0') && (p != '*')) {  	    return 0;  	}  	/* -	 * Check for a "*" as the next pattern character.  It matches -	 * any substring.  We handle this by calling ourselves -	 * recursively for each postfix of string, until either we -	 * match or we reach the end of the string. +	 * Check for a "*" as the next pattern character. It matches any +	 * substring. We handle this by calling ourselves recursively for each +	 * postfix of string, until either we match or we reach the end of the +	 * string.  	 */ -	 +  	if (p == '*') {  	    /*  	     * Skip all successive *'s in the pattern  	     */ +  	    while (*(++pattern) == '*') {}  	    p = *pattern;  	    if (p == '\0') {  		return 1;  	    } -	    Tcl_UtfToUniChar(pattern, &ch2); -	    if (nocase) { -		ch2 = Tcl_UniCharToLower(ch2); + +	    /* +	     * This is a special case optimization for single-byte utf. +	     */ + +	    if (UCHAR(*pattern) < 0x80) { +		ch2 = (Tcl_UniChar) +			(nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); +	    } else { +		Tcl_UtfToUniChar(pattern, &ch2); +		if (nocase) { +		    ch2 = Tcl_UniCharToLower(ch2); +		}  	    } +  	    while (1) {  		/*  		 * Optimization for matching - cruise through the string  		 * quickly if the next char in the pattern isn't a special  		 * character  		 */ +  		if ((p != '[') && (p != '?') && (p != '\\')) {  		    if (nocase) { -			while (*string) { -			    int charLen = Tcl_UtfToUniChar(string, &ch1); +			while (*str) { +			    charLen = TclUtfToUniChar(str, &ch1);  			    if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {  				break;  			    } -			    string += charLen; +			    str += charLen;  			}  		    } else {  			/*  			 * There's no point in trying to make this code -			 * shorter, as the number of bytes you want to -			 * compare each time is non-constant. +			 * shorter, as the number of bytes you want to compare +			 * each time is non-constant.  			 */ -			while (*string) { -			    int charLen = Tcl_UtfToUniChar(string, &ch1); + +			while (*str) { +			    charLen = TclUtfToUniChar(str, &ch1);  			    if (ch2 == ch1) {  				break;  			    } -			    string += charLen; +			    str += charLen;  			}  		    }  		} -		if (Tcl_StringCaseMatch(string, pattern, nocase)) { +		if (Tcl_StringCaseMatch(str, pattern, nocase)) {  		    return 1;  		} -		if (*string == '\0') { +		if (*str == '\0') {  		    return 0;  		} -		string += Tcl_UtfToUniChar(string, &ch1); +		str += TclUtfToUniChar(str, &ch1);  	    }  	}  	/* -	 * Check for a "?" as the next pattern character.  It matches -	 * any single character. +	 * Check for a "?" as the next pattern character. It matches any +	 * single character.  	 */  	if (p == '?') {  	    pattern++; -	    string += Tcl_UtfToUniChar(string, &ch1); +	    str += TclUtfToUniChar(str, &ch1);  	    continue;  	}  	/* -	 * Check for a "[" as the next pattern character.  It is followed -	 * by a list of characters that are acceptable, or by a range -	 * (two characters separated by "-"). +	 * Check for a "[" as the next pattern character. It is followed by a +	 * list of characters that are acceptable, or by a range (two +	 * characters separated by "-").  	 */  	if (p == '[') {  	    Tcl_UniChar startChar, endChar;  	    pattern++; -	    string += Tcl_UtfToUniChar(string, &ch1); -	    if (nocase) { -		ch1 = Tcl_UniCharToLower(ch1); +	    if (UCHAR(*str) < 0x80) { +		ch1 = (Tcl_UniChar) +			(nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); +		str++; +	    } else { +		str += Tcl_UtfToUniChar(str, &ch1); +		if (nocase) { +		    ch1 = Tcl_UniCharToLower(ch1); +		}  	    }  	    while (1) {  		if ((*pattern == ']') || (*pattern == '\0')) {  		    return 0;  		} -		pattern += Tcl_UtfToUniChar(pattern, &startChar); -		if (nocase) { -		    startChar = Tcl_UniCharToLower(startChar); +		if (UCHAR(*pattern) < 0x80) { +		    startChar = (Tcl_UniChar) (nocase +			    ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); +		    pattern++; +		} else { +		    pattern += Tcl_UtfToUniChar(pattern, &startChar); +		    if (nocase) { +			startChar = Tcl_UniCharToLower(startChar); +		    }  		}  		if (*pattern == '-') {  		    pattern++;  		    if (*pattern == '\0') {  			return 0;  		    } -		    pattern += Tcl_UtfToUniChar(pattern, &endChar); -		    if (nocase) { -			endChar = Tcl_UniCharToLower(endChar); +		    if (UCHAR(*pattern) < 0x80) { +			endChar = (Tcl_UniChar) (nocase +				? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); +			pattern++; +		    } else { +			pattern += Tcl_UtfToUniChar(pattern, &endChar); +			if (nocase) { +			    endChar = Tcl_UniCharToLower(endChar); +			}  		    }  		    if (((startChar <= ch1) && (ch1 <= endChar))  			    || ((endChar <= ch1) && (ch1 <= startChar))) { @@ -1335,10 +2226,10 @@ Tcl_StringCaseMatch(string, pattern, nocase)  	    pattern++;  	    continue;  	} -     +  	/* -	 * If the next pattern character is '\', just strip off the '\' -	 * so we do exact matching on the character that follows. +	 * If the next pattern character is '\', just strip off the '\' so we +	 * do exact matching on the character that follows.  	 */  	if (p == '\\') { @@ -1349,12 +2240,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)  	}  	/* -	 * There's no special character.  Just make sure that the next -	 * bytes of each string match. +	 * There's no special character. Just make sure that the next bytes of +	 * each string match.  	 */ -	string  += Tcl_UtfToUniChar(string, &ch1); -	pattern += Tcl_UtfToUniChar(pattern, &ch2); +	str += TclUtfToUniChar(str, &ch1); +	pattern += TclUtfToUniChar(pattern, &ch2);  	if (nocase) {  	    if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {  		return 0; @@ -1368,11 +2259,243 @@ Tcl_StringCaseMatch(string, pattern, nocase)  /*   *----------------------------------------------------------------------   * + * TclByteArrayMatch -- + * + *	See if a particular string matches a particular pattern.  Does not + *	allow for case insensitivity. + *	Parallels tclUtf.c:TclUniCharMatch, adjusted for char* and sans nocase. + * + * Results: + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean). + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclByteArrayMatch( +    const unsigned char *string,/* String. */ +    int strLen,			/* Length of String */ +    const unsigned char *pattern, +				/* Pattern, which may contain special +				 * characters. */ +    int ptnLen,			/* Length of Pattern */ +    int flags) +{ +    const unsigned char *stringEnd, *patternEnd; +    unsigned char p; + +    stringEnd = string + strLen; +    patternEnd = pattern + ptnLen; + +    while (1) { +	/* +	 * See if we're at the end of both the pattern and the string. If so, +	 * we succeeded. If we're at the end of the pattern but not at the end +	 * of the string, we failed. +	 */ + +	if (pattern == patternEnd) { +	    return (string == stringEnd); +	} +	p = *pattern; +	if ((string == stringEnd) && (p != '*')) { +	    return 0; +	} + +	/* +	 * Check for a "*" as the next pattern character. It matches any +	 * substring. We handle this by skipping all the characters up to the +	 * next matching one in the pattern, and then calling ourselves +	 * recursively for each postfix of string, until either we match or we +	 * reach the end of the string. +	 */ + +	if (p == '*') { +	    /* +	     * Skip all successive *'s in the pattern. +	     */ + +	    while ((++pattern < patternEnd) && (*pattern == '*')) { +		/* empty body */ +	    } +	    if (pattern == patternEnd) { +		return 1; +	    } +	    p = *pattern; +	    while (1) { +		/* +		 * Optimization for matching - cruise through the string +		 * quickly if the next char in the pattern isn't a special +		 * character. +		 */ + +		if ((p != '[') && (p != '?') && (p != '\\')) { +		    while ((string < stringEnd) && (p != *string)) { +			string++; +		    } +		} +		if (TclByteArrayMatch(string, stringEnd - string, +				pattern, patternEnd - pattern, 0)) { +		    return 1; +		} +		if (string == stringEnd) { +		    return 0; +		} +		string++; +	    } +	} + +	/* +	 * Check for a "?" as the next pattern character. It matches any +	 * single character. +	 */ + +	if (p == '?') { +	    pattern++; +	    string++; +	    continue; +	} + +	/* +	 * Check for a "[" as the next pattern character. It is followed by a +	 * list of characters that are acceptable, or by a range (two +	 * characters separated by "-"). +	 */ + +	if (p == '[') { +	    unsigned char ch1, startChar, endChar; + +	    pattern++; +	    ch1 = *string; +	    string++; +	    while (1) { +		if ((*pattern == ']') || (pattern == patternEnd)) { +		    return 0; +		} +		startChar = *pattern; +		pattern++; +		if (*pattern == '-') { +		    pattern++; +		    if (pattern == patternEnd) { +			return 0; +		    } +		    endChar = *pattern; +		    pattern++; +		    if (((startChar <= ch1) && (ch1 <= endChar)) +			    || ((endChar <= ch1) && (ch1 <= startChar))) { +			/* +			 * Matches ranges of form [a-z] or [z-a]. +			 */ + +			break; +		    } +		} else if (startChar == ch1) { +		    break; +		} +	    } +	    while (*pattern != ']') { +		if (pattern == patternEnd) { +		    pattern--; +		    break; +		} +		pattern++; +	    } +	    pattern++; +	    continue; +	} + +	/* +	 * If the next pattern character is '\', just strip off the '\' so we +	 * do exact matching on the character that follows. +	 */ + +	if (p == '\\') { +	    if (++pattern == patternEnd) { +		return 0; +	    } +	} + +	/* +	 * There's no special character. Just make sure that the next bytes of +	 * each string match. +	 */ + +	if (*string != *pattern) { +	    return 0; +	} +	string++; +	pattern++; +    } +} + +/* + *---------------------------------------------------------------------- + * + * TclStringMatchObj -- + * + *	See if a particular string matches a particular pattern. Allows case + *	insensitivity. This is the generic multi-type handler for the various + *	matching algorithms. + * + * Results: + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean). + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclStringMatchObj( +    Tcl_Obj *strObj,		/* string object. */ +    Tcl_Obj *ptnObj,		/* pattern object. */ +    int flags)			/* Only TCL_MATCH_NOCASE should be passed, or +				 * 0. */ +{ +    int match, length, plen; + +    /* +     * Promote based on the type of incoming object. +     * XXX: Currently doesn't take advantage of exact-ness that +     * XXX: TclReToGlob tells us about +    trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj)); +     */ + +    if ((strObj->typePtr == &tclStringType) || (strObj->typePtr == NULL)) { +	Tcl_UniChar *udata, *uptn; + +	udata = Tcl_GetUnicodeFromObj(strObj, &length); +	uptn  = Tcl_GetUnicodeFromObj(ptnObj, &plen); +	match = TclUniCharMatch(udata, length, uptn, plen, flags); +    } else if (TclIsPureByteArray(strObj) && !flags) { +	unsigned char *data, *ptn; + +	data = Tcl_GetByteArrayFromObj(strObj, &length); +	ptn  = Tcl_GetByteArrayFromObj(ptnObj, &plen); +	match = TclByteArrayMatch(data, length, ptn, plen, 0); +    } else { +	match = Tcl_StringCaseMatch(TclGetString(strObj), +		TclGetString(ptnObj), flags); +    } +    return match; +} + +/* + *---------------------------------------------------------------------- + *   * Tcl_DStringInit --   * - *	Initializes a dynamic string, discarding any previous contents - *	of the string (Tcl_DStringFree should have been called already - *	if the dynamic string was previously in use). + *	Initializes a dynamic string, discarding any previous contents of the + *	string (Tcl_DStringFree should have been called already if the dynamic + *	string was previously in use).   *   * Results:   *	None. @@ -1384,8 +2507,8 @@ Tcl_StringCaseMatch(string, pattern, nocase)   */  void -Tcl_DStringInit(dsPtr) -    Tcl_DString *dsPtr;		/* Pointer to structure for dynamic string. */ +Tcl_DStringInit( +    Tcl_DString *dsPtr)		/* Pointer to structure for dynamic string. */  {      dsPtr->string = dsPtr->staticSpace;      dsPtr->length = 0; @@ -1398,75 +2521,97 @@ Tcl_DStringInit(dsPtr)   *   * Tcl_DStringAppend --   * - *	Append more characters to the current value of a dynamic string. + *	Append more bytes to the current value of a dynamic string.   *   * Results:   *	The return value is a pointer to the dynamic string's new value.   *   * Side effects: - *	Length bytes from string (or all of string if length is less - *	than zero) are added to the current value of the string. Memory - *	gets reallocated if needed to accomodate the string's new size. + *	Length bytes from "bytes" (or all of "bytes" if length is less than + *	zero) are added to the current value of the string. Memory gets + *	reallocated if needed to accomodate the string's new size.   *   *----------------------------------------------------------------------   */ -CONST char * -Tcl_DStringAppend(dsPtr, string, length) -    Tcl_DString *dsPtr;		/* Structure describing dynamic string. */ -    CONST char *string;		/* String to append.  If length is -1 then -				 * this must be null-terminated. */ -    int length;			/* Number of characters from string to -				 * append.  If < 0, then append all of string, -				 * up to null at end. */ +char * +Tcl_DStringAppend( +    Tcl_DString *dsPtr,		/* Structure describing dynamic string. */ +    const char *bytes,		/* String to append. If length is -1 then this +				 * must be null-terminated. */ +    int length)			/* Number of bytes from "bytes" to append. If +				 * < 0, then append all of bytes, up to null +				 * at end. */  {      int newSize; -    char *dst; -    CONST char *end;      if (length < 0) { -	length = strlen(string); +	length = strlen(bytes);      }      newSize = length + dsPtr->length;      /* -     * Allocate a larger buffer for the string if the current one isn't -     * large enough. Allocate extra space in the new buffer so that there -     * will be room to grow before we have to allocate again. +     * Allocate a larger buffer for the string if the current one isn't large +     * enough. Allocate extra space in the new buffer so that there will be +     * room to grow before we have to allocate again.       */      if (newSize >= dsPtr->spaceAvl) {  	dsPtr->spaceAvl = newSize * 2;  	if (dsPtr->string == dsPtr->staticSpace) { -	    char *newString; +	    char *newString = ckalloc(dsPtr->spaceAvl); -	    newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl); -	    memcpy((VOID *) newString, (VOID *) dsPtr->string, -		    (size_t) dsPtr->length); +	    memcpy(newString, dsPtr->string, (size_t) dsPtr->length);  	    dsPtr->string = newString;  	} else { -	    dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string, -		    (size_t) dsPtr->spaceAvl); +	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);  	}      }      /* -     * Copy the new string into the buffer at the end of the old -     * one. +     * Copy the new string into the buffer at the end of the old one.       */ -    for (dst = dsPtr->string + dsPtr->length, end = string+length; -	    string < end; string++, dst++) { -	*dst = *string; -    } -    *dst = '\0'; +    memcpy(dsPtr->string + dsPtr->length, bytes, length);      dsPtr->length += length; +    dsPtr->string[dsPtr->length] = '\0';      return dsPtr->string;  }  /*   *----------------------------------------------------------------------   * + * TclDStringAppendObj, TclDStringAppendDString -- + * + *	Simple wrappers round Tcl_DStringAppend that make it easier to append + *	from particular sources of strings. + * + *---------------------------------------------------------------------- + */ + +char * +TclDStringAppendObj( +    Tcl_DString *dsPtr, +    Tcl_Obj *objPtr) +{ +    int length; +    char *bytes = Tcl_GetStringFromObj(objPtr, &length); + +    return Tcl_DStringAppend(dsPtr, bytes, length); +} + +char * +TclDStringAppendDString( +    Tcl_DString *dsPtr, +    Tcl_DString *toAppendPtr) +{ +    return Tcl_DStringAppend(dsPtr, Tcl_DStringValue(toAppendPtr), +	    Tcl_DStringLength(toAppendPtr)); +} + +/* + *---------------------------------------------------------------------- + *   * Tcl_DStringAppendElement --   *   *	Append a list element to the current value of a dynamic string. @@ -1475,62 +2620,66 @@ Tcl_DStringAppend(dsPtr, string, length)   *	The return value is a pointer to the dynamic string's new value.   *   * Side effects: - *	String is reformatted as a list element and added to the current - *	value of the string.  Memory gets reallocated if needed to - *	accomodate the string's new size. + *	String is reformatted as a list element and added to the current value + *	of the string. Memory gets reallocated if needed to accomodate the + *	string's new size.   *   *----------------------------------------------------------------------   */ -CONST char * -Tcl_DStringAppendElement(dsPtr, string) -    Tcl_DString *dsPtr;		/* Structure describing dynamic string. */ -    CONST char *string;		/* String to append.  Must be +char * +Tcl_DStringAppendElement( +    Tcl_DString *dsPtr,		/* Structure describing dynamic string. */ +    const char *element)	/* String to append. Must be  				 * null-terminated. */  { -    int newSize, flags, strSize; -    char *dst; - -    strSize = ((string == NULL) ? 0 : strlen(string)); -    newSize = Tcl_ScanCountedElement(string, strSize, &flags) -	+ dsPtr->length + 1; +    char *dst = dsPtr->string + dsPtr->length; +    int needSpace = TclNeedSpace(dsPtr->string, dst); +    int flags = needSpace ? TCL_DONT_QUOTE_HASH : 0; +    int newSize = dsPtr->length + needSpace +	    + TclScanElement(element, -1, &flags);      /* -     * Allocate a larger buffer for the string if the current one isn't -     * large enough.  Allocate extra space in the new buffer so that there -     * will be room to grow before we have to allocate again. -     * SPECIAL NOTE: must use memcpy, not strcpy, to copy the string -     * to a larger buffer, since there may be embedded NULLs in the -     * string in some cases. +     * Allocate a larger buffer for the string if the current one isn't large +     * enough. Allocate extra space in the new buffer so that there will be +     * room to grow before we have to allocate again. SPECIAL NOTE: must use +     * memcpy, not strcpy, to copy the string to a larger buffer, since there +     * may be embedded NULLs in the string in some cases.       */      if (newSize >= dsPtr->spaceAvl) {  	dsPtr->spaceAvl = newSize * 2;  	if (dsPtr->string == dsPtr->staticSpace) { -	    char *newString; +	    char *newString = ckalloc(dsPtr->spaceAvl); -	    newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl); -	    memcpy((VOID *) newString, (VOID *) dsPtr->string, -		    (size_t) dsPtr->length); +	    memcpy(newString, dsPtr->string, (size_t) dsPtr->length);  	    dsPtr->string = newString;  	} else { -	    dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string, -		    (size_t) dsPtr->spaceAvl); +	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);  	} +	dst = dsPtr->string + dsPtr->length;      }      /* -     * Convert the new string to a list element and copy it into the -     * buffer at the end, with a space, if needed. +     * Convert the new string to a list element and copy it into the buffer at +     * the end, with a space, if needed.       */ -    dst = dsPtr->string + dsPtr->length; -    if (TclNeedSpace(dsPtr->string, dst)) { +    if (needSpace) {  	*dst = ' ';  	dst++;  	dsPtr->length++; + +	/* +	 * If we need a space to separate this element from preceding stuff, +	 * then this element will not lead a list, and need not have it's +	 * leading '#' quoted. +	 */ + +	flags |= TCL_DONT_QUOTE_HASH;      } -    dsPtr->length += Tcl_ConvertCountedElement(string, strSize, dst, flags); +    dsPtr->length += TclConvertElement(element, -1, dst, flags); +    dsPtr->string[dsPtr->length] = '\0';      return dsPtr->string;  } @@ -1539,25 +2688,24 @@ Tcl_DStringAppendElement(dsPtr, string)   *   * Tcl_DStringSetLength --   * - *	Change the length of a dynamic string.  This can cause the - *	string to either grow or shrink, depending on the value of - *	length. + *	Change the length of a dynamic string. This can cause the string to + *	either grow or shrink, depending on the value of length.   *   * Results:   *	None.   *   * Side effects: - *	The length of dsPtr is changed to length and a null byte is - *	stored at that position in the string.  If length is larger - *	than the space allocated for dsPtr, then a panic occurs. + *	The length of dsPtr is changed to length and a null byte is stored at + *	that position in the string. If length is larger than the space + *	allocated for dsPtr, then a panic occurs.   *   *----------------------------------------------------------------------   */  void -Tcl_DStringSetLength(dsPtr, length) -    Tcl_DString *dsPtr;		/* Structure describing dynamic string. */ -    int length;			/* New length for dynamic string. */ +Tcl_DStringSetLength( +    Tcl_DString *dsPtr,		/* Structure describing dynamic string. */ +    int length)			/* New length for dynamic string. */  {      int newsize; @@ -1566,15 +2714,15 @@ Tcl_DStringSetLength(dsPtr, length)      }      if (length >= dsPtr->spaceAvl) {  	/* -	 * There are two interesting cases here.  In the first case, the user -	 * may be trying to allocate a large buffer of a specific size.  It +	 * There are two interesting cases here. In the first case, the user +	 * may be trying to allocate a large buffer of a specific size. It  	 * would be wasteful to overallocate that buffer, so we just allocate -	 * enough for the requested size plus the trailing null byte.  In the +	 * enough for the requested size plus the trailing null byte. In the  	 * second case, we are growing the buffer incrementally, so we need -	 * behavior similar to Tcl_DStringAppend.  The requested length will -	 * usually be a small delta above the current spaceAvl, so we'll end up -	 * doubling the old size.  This won't grow the buffer quite as quickly, -	 * but it should be close enough. +	 * behavior similar to Tcl_DStringAppend. The requested length will +	 * usually be a small delta above the current spaceAvl, so we'll end +	 * up doubling the old size. This won't grow the buffer quite as +	 * quickly, but it should be close enough.  	 */  	newsize = dsPtr->spaceAvl * 2; @@ -1584,15 +2732,12 @@ Tcl_DStringSetLength(dsPtr, length)  	    dsPtr->spaceAvl = length + 1;  	}  	if (dsPtr->string == dsPtr->staticSpace) { -	    char *newString; +	    char *newString = ckalloc(dsPtr->spaceAvl); -	    newString = (char *) ckalloc((unsigned) dsPtr->spaceAvl); -	    memcpy((VOID *) newString, (VOID *) dsPtr->string, -		    (size_t) dsPtr->length); +	    memcpy(newString, dsPtr->string, (size_t) dsPtr->length);  	    dsPtr->string = newString;  	} else { -	    dsPtr->string = (char *) ckrealloc((VOID *) dsPtr->string, -		    (size_t) dsPtr->spaceAvl); +	    dsPtr->string = ckrealloc(dsPtr->string, dsPtr->spaceAvl);  	}      }      dsPtr->length = length; @@ -1604,21 +2749,22 @@ Tcl_DStringSetLength(dsPtr, length)   *   * Tcl_DStringFree --   * - *	Frees up any memory allocated for the dynamic string and - *	reinitializes the string to an empty state. + *	Frees up any memory allocated for the dynamic string and reinitializes + *	the string to an empty state.   *   * Results:   *	None.   *   * Side effects: - *	The previous contents of the dynamic string are lost, and - *	the new value is an empty string. + *	The previous contents of the dynamic string are lost, and the new + *	value is an empty string.   * - *---------------------------------------------------------------------- */ + *---------------------------------------------------------------------- + */  void -Tcl_DStringFree(dsPtr) -    Tcl_DString *dsPtr;		/* Structure describing dynamic string. */ +Tcl_DStringFree( +    Tcl_DString *dsPtr)		/* Structure describing dynamic string. */  {      if (dsPtr->string != dsPtr->staticSpace) {  	ckfree(dsPtr->string); @@ -1634,43 +2780,28 @@ Tcl_DStringFree(dsPtr)   *   * Tcl_DStringResult --   * - *	This procedure moves the value of a dynamic string into an - *	interpreter as its string result. Afterwards, the dynamic string - *	is reset to an empty string. + *	This function moves the value of a dynamic string into an interpreter + *	as its string result. Afterwards, the dynamic string is reset to an + *	empty string.   *   * Results:   *	None.   *   * Side effects: - *	The string is "moved" to interp's result, and any existing - *	string result for interp is freed. dsPtr is reinitialized to - *	an empty string. + *	The string is "moved" to interp's result, and any existing string + *	result for interp is freed. dsPtr is reinitialized to an empty string.   *   *----------------------------------------------------------------------   */  void -Tcl_DStringResult(interp, dsPtr) -    Tcl_Interp *interp;		/* Interpreter whose result is to be reset. */ -    Tcl_DString *dsPtr;		/* Dynamic string that is to become the +Tcl_DStringResult( +    Tcl_Interp *interp,		/* Interpreter whose result is to be reset. */ +    Tcl_DString *dsPtr)		/* Dynamic string that is to become the  				 * result of interp. */  {      Tcl_ResetResult(interp); -     -    if (dsPtr->string != dsPtr->staticSpace) { -	interp->result = dsPtr->string; -	interp->freeProc = TCL_DYNAMIC; -    } else if (dsPtr->length < TCL_RESULT_SIZE) { -	interp->result = ((Interp *) interp)->resultSpace; -	strcpy(interp->result, dsPtr->string); -    } else { -	Tcl_SetResult(interp, dsPtr->string, TCL_VOLATILE); -    } -     -    dsPtr->string = dsPtr->staticSpace; -    dsPtr->length = 0; -    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; -    dsPtr->staticSpace[0] = '\0'; +    Tcl_SetObjResult(interp, TclDStringToObj(dsPtr));  }  /* @@ -1678,14 +2809,14 @@ Tcl_DStringResult(interp, dsPtr)   *   * Tcl_DStringGetResult --   * - *	This procedure moves an interpreter's result into a dynamic string. + *	This function moves an interpreter's result into a dynamic string.   *   * Results:   *	None.   *   * Side effects: - *	The interpreter's string result is cleared, and the previous - *	contents of dsPtr are freed. + *	The interpreter's string result is cleared, and the previous contents + *	of dsPtr are freed.   *   *	If the string result is empty, the object result is moved to the   *	string result, then the object result is reset. @@ -1694,37 +2825,66 @@ Tcl_DStringResult(interp, dsPtr)   */  void -Tcl_DStringGetResult(interp, dsPtr) -    Tcl_Interp *interp;		/* Interpreter whose result is to be reset. */ -    Tcl_DString *dsPtr;		/* Dynamic string that is to become the -				 * result of interp. */ +Tcl_DStringGetResult( +    Tcl_Interp *interp,		/* Interpreter whose result is to be reset. */ +    Tcl_DString *dsPtr)		/* Dynamic string that is to become the result +				 * of interp. */  {      Interp *iPtr = (Interp *) interp; -     +      if (dsPtr->string != dsPtr->staticSpace) {  	ckfree(dsPtr->string);      }      /* -     * If the string result is empty, move the object result to the -     * string result, then reset the object result. +     * Do more efficient transfer when we know the result is a Tcl_Obj. When +     * there's no st`ring result, we only have to deal with two cases: +     * +     *  1. When the string rep is the empty string, when we don't copy but +     *     instead use the staticSpace in the DString to hold an empty string. + +     *  2. When the string rep is not there or there's a real string rep, when +     *     we use Tcl_GetString to fetch (or generate) the string rep - which +     *     we know to have been allocated with ckalloc() - and use it to +     *     populate the DString space. Then, we free the internal rep. and set +     *     the object's string representation back to the canonical empty +     *     string.       */ -    if (*(iPtr->result) == 0) { -	Tcl_SetResult(interp, TclGetString(Tcl_GetObjResult(interp)), -	        TCL_VOLATILE); +    if (!iPtr->result[0] && iPtr->objResultPtr +	    && !Tcl_IsShared(iPtr->objResultPtr)) { +	if (iPtr->objResultPtr->bytes == tclEmptyStringRep) { +	    dsPtr->string = dsPtr->staticSpace; +	    dsPtr->string[0] = 0; +	    dsPtr->length = 0; +	    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; +	} else { +	    dsPtr->string = Tcl_GetString(iPtr->objResultPtr); +	    dsPtr->length = iPtr->objResultPtr->length; +	    dsPtr->spaceAvl = dsPtr->length + 1; +	    TclFreeIntRep(iPtr->objResultPtr); +	    iPtr->objResultPtr->bytes = tclEmptyStringRep; +	    iPtr->objResultPtr->length = 0; +	} +	return;      } +    /* +     * If the string result is empty, move the object result to the string +     * result, then reset the object result. +     */ + +    (void) Tcl_GetStringResult(interp); +      dsPtr->length = strlen(iPtr->result);      if (iPtr->freeProc != NULL) { -	if ((iPtr->freeProc == TCL_DYNAMIC) -		|| (iPtr->freeProc == (Tcl_FreeProc *) free)) { +	if (iPtr->freeProc == TCL_DYNAMIC) {  	    dsPtr->string = iPtr->result;  	    dsPtr->spaceAvl = dsPtr->length+1;  	} else { -	    dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length+1)); -	    strcpy(dsPtr->string, iPtr->result); -	    (*iPtr->freeProc)(iPtr->result); +	    dsPtr->string = ckalloc(dsPtr->length+1); +	    memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1); +	    iPtr->freeProc(iPtr->result);  	}  	dsPtr->spaceAvl = dsPtr->length+1;  	iPtr->freeProc = NULL; @@ -1733,12 +2893,12 @@ Tcl_DStringGetResult(interp, dsPtr)  	    dsPtr->string = dsPtr->staticSpace;  	    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;  	} else { -	    dsPtr->string = (char *) ckalloc((unsigned) (dsPtr->length + 1)); +	    dsPtr->string = ckalloc(dsPtr->length+1);  	    dsPtr->spaceAvl = dsPtr->length + 1;  	} -	strcpy(dsPtr->string, iPtr->result); +	memcpy(dsPtr->string, iPtr->result, (unsigned) dsPtr->length+1);      } -     +      iPtr->result = iPtr->resultSpace;      iPtr->resultSpace[0] = 0;  } @@ -1746,11 +2906,71 @@ Tcl_DStringGetResult(interp, dsPtr)  /*   *----------------------------------------------------------------------   * + * TclDStringToObj -- + * + *	This function moves a dynamic string's contents to a new Tcl_Obj. Be + *	aware that this function does *not* check that the encoding of the + *	contents of the dynamic string is correct; this is the caller's + *	responsibility to enforce. + * + * Results: + *	The newly-allocated untyped (i.e., typePtr==NULL) Tcl_Obj with a + *	reference count of zero. + * + * Side effects: + *	The string is "moved" to the object. dsPtr is reinitialized to an + *	empty string; it does not need to be Tcl_DStringFree'd after this if + *	not used further. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclDStringToObj( +    Tcl_DString *dsPtr) +{ +    Tcl_Obj *result; + +    if (dsPtr->string == dsPtr->staticSpace) { +	if (dsPtr->length == 0) { +	    TclNewObj(result); +	} else { +	    /* +	     * Static buffer, so must copy. +	     */ +	     +	    TclNewStringObj(result, dsPtr->string, dsPtr->length); +	} +    } else { +	/* +	 * Dynamic buffer, so transfer ownership and reset. +	 */ + +	TclNewObj(result); +	result->bytes = dsPtr->string; +	result->length = dsPtr->length; +    } + +    /* +     * Re-establish the DString as empty with no buffer allocated. +     */ + +    dsPtr->string = dsPtr->staticSpace; +    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE; +    dsPtr->length = 0; +    dsPtr->staticSpace[0] = '\0'; + +    return result; +} + +/* + *---------------------------------------------------------------------- + *   * Tcl_DStringStartSublist --   * - *	This procedure adds the necessary information to a dynamic - *	string (e.g. " {" to start a sublist.  Future element - *	appends will be in the sublist rather than the main list. + *	This function adds the necessary information to a dynamic string + *	(e.g. " {") to start a sublist. Future element appends will be in the + *	sublist rather than the main list.   *   * Results:   *	None. @@ -1762,13 +2982,13 @@ Tcl_DStringGetResult(interp, dsPtr)   */  void -Tcl_DStringStartSublist(dsPtr) -    Tcl_DString *dsPtr;			/* Dynamic string. */ +Tcl_DStringStartSublist( +    Tcl_DString *dsPtr)		/* Dynamic string. */  {      if (TclNeedSpace(dsPtr->string, dsPtr->string + dsPtr->length)) { -	Tcl_DStringAppend(dsPtr, " {", -1); +	TclDStringAppendLiteral(dsPtr, " {");      } else { -	Tcl_DStringAppend(dsPtr, "{", -1); +	TclDStringAppendLiteral(dsPtr, "{");      }  } @@ -1777,10 +2997,9 @@ Tcl_DStringStartSublist(dsPtr)   *   * Tcl_DStringEndSublist --   * - *	This procedure adds the necessary characters to a dynamic - *	string to end a sublist (e.g. "}").  Future element appends - *	will be in the enclosing (sub)list rather than the current - *	sublist. + *	This function adds the necessary characters to a dynamic string to end + *	a sublist (e.g. "}"). Future element appends will be in the enclosing + *	(sub)list rather than the current sublist.   *   * Results:   *	None. @@ -1792,10 +3011,10 @@ Tcl_DStringStartSublist(dsPtr)   */  void -Tcl_DStringEndSublist(dsPtr) -    Tcl_DString *dsPtr;			/* Dynamic string. */ +Tcl_DStringEndSublist( +    Tcl_DString *dsPtr)		/* Dynamic string. */  { -    Tcl_DStringAppend(dsPtr, "}", -1); +    TclDStringAppendLiteral(dsPtr, "}");  }  /* @@ -1803,14 +3022,14 @@ Tcl_DStringEndSublist(dsPtr)   *   * Tcl_PrintDouble --   * - *	Given a floating-point value, this procedure converts it to - *	an ASCII string using. + *	Given a floating-point value, this function converts it to an ASCII + *	string using.   *   * Results: - *	The ASCII equivalent of "value" is written at "dst".  It is - *	written using the current precision, and it is guaranteed to - *	contain a decimal point or exponent, so that it looks like - *	a floating-point value and not an integer. + *	The ASCII equivalent of "value" is written at "dst". It is written + *	using the current precision, and it is guaranteed to contain a decimal + *	point or exponent, so that it looks like a floating-point value and + *	not an integer.   *   * Side effects:   *	None. @@ -1819,38 +3038,159 @@ Tcl_DStringEndSublist(dsPtr)   */  void -Tcl_PrintDouble(interp, value, dst) -    Tcl_Interp *interp;			/* Interpreter whose tcl_precision -					 * variable used to be used to control -					 * printing.  It's ignored now. */ -    double value;			/* Value to print as string. */ -    char *dst;				/* Where to store converted value; -					 * must have at least TCL_DOUBLE_SPACE -					 * characters. */ +Tcl_PrintDouble( +    Tcl_Interp *interp,		/* Interpreter whose tcl_precision variable +				 * used to be used to control printing. It's +				 * ignored now. */ +    double value,		/* Value to print as string. */ +    char *dst)			/* Where to store converted value; must have +				 * at least TCL_DOUBLE_SPACE characters. */  {      char *p, c; -    Tcl_UniChar ch; +    int exponent; +    int signum; +    char *digits; +    char *end; +    int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int)); -    Tcl_MutexLock(&precisionMutex); -    sprintf(dst, precisionFormat, value); -    Tcl_MutexUnlock(&precisionMutex); +    /* +     * Handle NaN. +     */ +     +    if (TclIsNaN(value)) { +	TclFormatNaN(value, dst); +	return; +    } + +    /* +     * Handle infinities. +     */ +     +    if (TclIsInfinite(value)) { +	/* +	 * Remember to copy the terminating NUL too. +	 */ +	 +	if (value < 0) { +	    memcpy(dst, "-Inf", 5); +	} else { +	    memcpy(dst, "Inf", 4); +	} +	return; +    }      /* -     * If the ASCII result looks like an integer, add ".0" so that it -     * doesn't look like an integer anymore.  This prevents floating-point -     * values from being converted to integers unintentionally. +     * Ordinary (normal and denormal) values.       */ +     +    if (*precisionPtr == 0) { +	digits = TclDoubleDigits(value, -1, TCL_DD_SHORTEST, +		&exponent, &signum, &end); +    } else { +	/* +	 * There are at least two possible interpretations for tcl_precision. +	 * +	 * The first is, "choose the decimal representation having +	 * $tcl_precision digits of significance that is nearest to the given +	 * number, breaking ties by rounding to even, and then trimming +	 * trailing zeros." This gives the greatest possible precision in the +	 * decimal string, but offers the anomaly that [expr 0.1] will be +	 * "0.10000000000000001". +	 * +	 * The second is "choose the decimal representation having at most +	 * $tcl_precision digits of significance that is nearest to the given +	 * number. If no such representation converts exactly to the given +	 * number, choose the one that is closest, breaking ties by rounding +	 * to even. If more than one such representation converts exactly to +	 * the given number, choose the shortest, breaking ties in favour of +	 * the nearest, breaking remaining ties in favour of the one ending in +	 * an even digit." +	 * +	 * Tcl 8.4 implements the first of these, which gives rise to +	 * anomalies in formatting: +	 * +	 *	% expr 0.1 +	 *	0.10000000000000001 +	 *	% expr 0.01 +	 *	0.01 +	 *	% expr 1e-7 +	 *	9.9999999999999995e-08 +	 * +	 * For human readability, it appears better to choose the second rule, +	 * and let [expr 0.1] return 0.1. But for 8.4 compatibility, we prefer +	 * the first (the recommended zero value for tcl_precision avoids the +	 * problem entirely). +	 * +	 * Uncomment TCL_DD_SHORTEN_FLAG in the next call to prefer the method +	 * that allows floating point values to be shortened if it can be done +	 * without loss of precision. +	 */ + +	digits = TclDoubleDigits(value, *precisionPtr, +		TCL_DD_E_FORMAT /* | TCL_DD_SHORTEN_FLAG */,  +		&exponent, &signum, &end); +    } +    if (signum) { +	*dst++ = '-'; +    } +    p = digits; +    if (exponent < -4 || exponent > 16) { +	/* +	 * E format for numbers < 1e-3 or >= 1e17. +	 */ +	 +	*dst++ = *p++; +	c = *p; +	if (c != '\0') { +	    *dst++ = '.'; +	    while (c != '\0') { +		*dst++ = c; +		c = *++p; +	    } +	} -    for (p = dst; *p != 0; ) { -	p += Tcl_UtfToUniChar(p, &ch); -	c = UCHAR(ch); -	if ((c == '.') || isalpha(UCHAR(c))) {	/* INTL: ISO only. */ -	    return; +	/* +	 * Tcl 8.4 appears to format with at least a two-digit exponent; +	 * preserve that behaviour when tcl_precision != 0 +	 */ + +	if (*precisionPtr == 0) { +	    sprintf(dst, "e%+d", exponent); +	} else { +	    sprintf(dst, "e%+03d", exponent); +	} +    } else { +	/* +	 * F format for others. +	 */ +	 +	if (exponent < 0) { +	    *dst++ = '0';  	} +	c = *p; +	while (exponent-- >= 0) { +	    if (c != '\0') { +		*dst++ = c; +		c = *++p; +	    } else { +		*dst++ = '0'; +	    } +	} +	*dst++ = '.'; +	if (c == '\0') { +	    *dst++ = '0'; +	} else { +	    while (++exponent < -1) { +		*dst++ = '0'; +	    } +	    while (c != '\0') { +		*dst++ = c; +		c = *++p; +	    } +	} +	*dst++ = '\0';      } -    p[0] = '.'; -    p[1] = '0'; -    p[2] = 0; +    ckfree(digits);  }  /* @@ -1858,92 +3198,77 @@ Tcl_PrintDouble(interp, value, dst)   *   * TclPrecTraceProc --   * - *	This procedure is invoked whenever the variable "tcl_precision" - *	is written. + *	This function is invoked whenever the variable "tcl_precision" is + *	written.   *   * Results: - *	Returns NULL if all went well, or an error message if the - *	new value for the variable doesn't make sense. + *	Returns NULL if all went well, or an error message if the new value + *	for the variable doesn't make sense.   *   * Side effects: - *	If the new value doesn't make sense then this procedure - *	undoes the effect of the variable modification.  Otherwise - *	it modifies the format string that's used by Tcl_PrintDouble. + *	If the new value doesn't make sense then this function undoes the + *	effect of the variable modification. Otherwise it modifies the format + *	string that's used by Tcl_PrintDouble.   *   *----------------------------------------------------------------------   */  	/* ARGSUSED */  char * -TclPrecTraceProc(clientData, interp, name1, name2, flags) -    ClientData clientData;	/* Not used. */ -    Tcl_Interp *interp;		/* Interpreter containing variable. */ -    char *name1;		/* Name of variable. */ -    char *name2;		/* Second part of variable name. */ -    int flags;			/* Information about what happened. */ +TclPrecTraceProc( +    ClientData clientData,	/* Not used. */ +    Tcl_Interp *interp,		/* Interpreter containing variable. */ +    const char *name1,		/* Name of variable. */ +    const char *name2,		/* Second part of variable name. */ +    int flags)			/* Information about what happened. */  { -    CONST char *value; -    char *end; +    Tcl_Obj *value;      int prec; +    int *precisionPtr = Tcl_GetThreadData(&precisionKey, (int) sizeof(int));      /*       * If the variable is unset, then recreate the trace.       */      if (flags & TCL_TRACE_UNSETS) { -	if ((flags & TCL_TRACE_DESTROYED) && !(flags & TCL_INTERP_DESTROYED)) { +	if ((flags & TCL_TRACE_DESTROYED) && !Tcl_InterpDeleted(interp)) {  	    Tcl_TraceVar2(interp, name1, name2,  		    TCL_GLOBAL_ONLY|TCL_TRACE_READS|TCL_TRACE_WRITES  		    |TCL_TRACE_UNSETS, TclPrecTraceProc, clientData);  	} -	return (char *) NULL; +	return NULL;      }      /* -     * When the variable is read, reset its value from our shared -     * value.  This is needed in case the variable was modified in -     * some other interpreter so that this interpreter's value is -     * out of date. +     * When the variable is read, reset its value from our shared value. This +     * is needed in case the variable was modified in some other interpreter +     * so that this interpreter's value is out of date.       */ -    Tcl_MutexLock(&precisionMutex);      if (flags & TCL_TRACE_READS) { -	Tcl_SetVar2(interp, name1, name2, precisionString, +	Tcl_SetVar2Ex(interp, name1, name2, Tcl_NewIntObj(*precisionPtr),  		flags & TCL_GLOBAL_ONLY); -	Tcl_MutexUnlock(&precisionMutex); -	return (char *) NULL; +	return NULL;      }      /* -     * The variable is being written.  Check the new value and disallow -     * it if it isn't reasonable or if this is a safe interpreter (we -     * don't want safe interpreters messing up the precision of other -     * interpreters). +     * The variable is being written. Check the new value and disallow it if +     * it isn't reasonable or if this is a safe interpreter (we don't want +     * safe interpreters messing up the precision of other interpreters).       */      if (Tcl_IsSafe(interp)) { -	Tcl_SetVar2(interp, name1, name2, precisionString, -		flags & TCL_GLOBAL_ONLY); -	Tcl_MutexUnlock(&precisionMutex); -	return "can't modify precision from a safe interpreter"; +	return (char *) "can't modify precision from a safe interpreter";      } -    value = Tcl_GetVar2(interp, name1, name2, flags & TCL_GLOBAL_ONLY); -    if (value == NULL) { -	value = ""; -    } -    prec = strtoul(value, &end, 10); -    if ((prec <= 0) || (prec > TCL_MAX_PREC) || (prec > 100) || -	    (end == value) || (*end != 0)) { -	Tcl_SetVar2(interp, name1, name2, precisionString, -		flags & TCL_GLOBAL_ONLY); -	Tcl_MutexUnlock(&precisionMutex); -	return "improper value for precision"; +    value = Tcl_GetVar2Ex(interp, name1, name2, flags & TCL_GLOBAL_ONLY); +    if (value == NULL +	    || Tcl_GetIntFromObj(NULL, value, &prec) != TCL_OK +	    || prec < 0 || prec > TCL_MAX_PREC) { +	return (char *) "improper value for precision";      } -    TclFormatInt(precisionString, prec); -    sprintf(precisionFormat, "%%.%dg", prec); -    Tcl_MutexUnlock(&precisionMutex); -    return (char *) NULL; +    *precisionPtr = prec; +    return NULL;  }  /* @@ -1951,9 +3276,8 @@ TclPrecTraceProc(clientData, interp, name1, name2, flags)   *   * TclNeedSpace --   * - *	This procedure checks to see whether it is appropriate to - *	add a space before appending a new list element to an - *	existing string. + *	This function checks to see whether it is appropriate to add a space + *	before appending a new list element to an existing string.   *   * Results:   *	The return value is 1 if a space is appropriate, 0 otherwise. @@ -1965,47 +3289,71 @@ TclPrecTraceProc(clientData, interp, name1, name2, flags)   */  int -TclNeedSpace(start, end) -    CONST char *start;		/* First character in string. */ -    CONST char *end;			/* End of string (place where space will -				 * be added, if appropriate). */ +TclNeedSpace( +    const char *start,		/* First character in string. */ +    const char *end)		/* End of string (place where space will be +				 * added, if appropriate). */  { -    Tcl_UniChar ch; -      /* -     * A space is needed unless either +     * A space is needed unless either:       * (a) we're at the start of the string, or -     * (b) the trailing characters of the string consist of one or more -     *     open curly braces preceded by a space or extending back to -     *     the beginning of the string. -     * (c) the trailing characters of the string consist of a space -     *	   preceded by a character other than backslash.       */      if (end == start) {  	return 0;      } + +    /* +     * (b) we're at the start of a nested list-element, quoted with an open +     *	   curly brace; we can be nested arbitrarily deep, so long as the +     *	   first curly brace starts an element, so backtrack over open curly +     *	   braces that are trailing characters of the string; and +     */ +      end = Tcl_UtfPrev(end, start); -    if (*end != '{') { -	Tcl_UtfToUniChar(end, &ch); -	/* -	 * Direct char comparison on next line is safe as it is with -	 * a character in the ASCII subset, and so single-byte in UTF8. -	 */ -	if (Tcl_UniCharIsSpace(ch) && ((end == start) || (end[-1] != '\\'))) { +    while (*end == '{') { +	if (end == start) {  	    return 0;  	} +	end = Tcl_UtfPrev(end, start); +    } + +    /* +     * (c) the trailing character of the string is already a list-element +     *	   separator (according to TclFindElement); that is, one of these +     *	   characters: +     *		\u0009	\t	TAB +     *		\u000A	\n	NEWLINE +     *		\u000B	\v	VERTICAL TAB +     *		\u000C	\f	FORM FEED +     *		\u000D	\r	CARRIAGE RETURN +     *		\u0020		SPACE +     *	   with the condition that the penultimate character is not a +     *	   backslash. +     */ + +    if (*end > 0x20) { +	/* +	 * Performance tweak. All ASCII spaces are <= 0x20. So get a quick +	 * answer for most characters before comparing against all spaces in +	 * the switch below. +	 * +	 * NOTE: Remove this if other Unicode spaces ever get accepted as +	 * list-element separators. +	 */ +  	return 1;      } -    do { -	if (end == start) { +    switch (*end) { +    case ' ': +    case '\t': +    case '\n': +    case '\r': +    case '\v': +    case '\f': +	if ((end == start) || (end[-1] != '\\')) {  	    return 0;  	} -	end = Tcl_UtfPrev(end, start); -    } while (*end == '{'); -    Tcl_UtfToUniChar(end, &ch); -    if (Tcl_UniCharIsSpace(ch)) { -	return 0;      }      return 1;  } @@ -2018,31 +3366,33 @@ TclNeedSpace(start, end)   *	This procedure formats an integer into a sequence of decimal digit   *	characters in a buffer. If the integer is negative, a minus sign is   *	inserted at the start of the buffer. A null character is inserted at - *	the end of the formatted characters. It is the caller's - *	responsibility to ensure that enough storage is available. This - *	procedure has the effect of sprintf(buffer, "%d", n) but is faster. + *	the end of the formatted characters. It is the caller's responsibility + *	to ensure that enough storage is available. This procedure has the + *	effect of sprintf(buffer, "%ld", n) but is faster as proven in + *	benchmarks.  This is key to UpdateStringOfInt, which is a common path + *	for a lot of code (e.g. int-indexed arrays).   *   * Results:   *	An integer representing the number of characters formatted, not   *	including the terminating \0.   *   * Side effects: - *	The formatted characters are written into the storage pointer to - *	by the "buffer" argument. + *	The formatted characters are written into the storage pointer to by + *	the "buffer" argument.   *   *----------------------------------------------------------------------   */  int -TclFormatInt(buffer, n) -    char *buffer;		/* Points to the storage into which the +TclFormatInt( +    char *buffer,		/* Points to the storage into which the  				 * formatted characters are written. */ -    long n;			/* The integer to format. */ +    long n)			/* The integer to format. */  {      long intVal;      int i;      int numFormatted, j; -    char *digits = "0123456789"; +    const char *digits = "0123456789";      /*       * Check first whether "n" is zero. @@ -2055,14 +3405,14 @@ TclFormatInt(buffer, n)      }      /* -     * Check whether "n" is the maximum negative value. This is -     * -2^(m-1) for an m-bit word, and has no positive equivalent; -     * negating it produces the same value. +     * Check whether "n" is the maximum negative value. This is -2^(m-1) for +     * an m-bit word, and has no positive equivalent; negating it produces the +     * same value.       */ -    if (n == -n) { -	sprintf(buffer, "%ld", n); -	return strlen(buffer); +    intVal = -n;			/* [Bug 3390638] Workaround for*/ +    if (n == -n || intVal == n) {	/* broken compiler optimizers. */ +	return sprintf(buffer, "%ld", n);      }      /* @@ -2089,6 +3439,7 @@ TclFormatInt(buffer, n)      for (j = 0;  j < i;  j++, i--) {  	char tmp = buffer[i]; +  	buffer[i] = buffer[j];  	buffer[j] = tmp;      } @@ -2098,154 +3449,114 @@ TclFormatInt(buffer, n)  /*   *----------------------------------------------------------------------   * - * TclLooksLikeInt -- - * - *	This procedure decides whether the leading characters of a - *	string look like an integer or something else (such as a - *	floating-point number or string). - * - * Results: - *	The return value is 1 if the leading characters of p look - *	like a valid Tcl integer.  If they look like a floating-point - *	number (e.g. "e01" or "2.4"), or if they don't look like a - *	number at all, then 0 is returned. - * - * Side effects: - *	None. - * - *---------------------------------------------------------------------- - */ - -int -TclLooksLikeInt(bytes, length) -    register CONST char *bytes;	/* Points to first byte of the string. */ -    int length;			/* Number of bytes in the string. If < 0 -				 * bytes up to the first null byte are -				 * considered (if they may appear in an  -				 * integer). */ -{ -    register CONST char *p, *end; - -    if (length < 0) { -	length = (bytes? strlen(bytes) : 0); -    } -    end = (bytes + length); - -    p = bytes; -    while ((p < end) && isspace(UCHAR(*p))) { /* INTL: ISO space. */ -	p++; -    } -    if (p == end) { -	return 0; -    } -     -    if ((*p == '+') || (*p == '-')) { -	p++; -    } -    if ((p == end) || !isdigit(UCHAR(*p))) { /* INTL: digit */ -	return 0; -    } -    p++; -    while ((p < end) && isdigit(UCHAR(*p))) { /* INTL: digit */ -	p++; -    } -    if (p == end) { -	return 1; -    } -    if ((*p != '.') && (*p != 'e') && (*p != 'E')) { -	return 1; -    } -    return 0; -} - -/* - *---------------------------------------------------------------------- - *   * TclGetIntForIndex --   * - *	This procedure returns an integer corresponding to the list index - *	held in a Tcl object. The Tcl object's value is expected to be - *	either an integer or a string of the form "end([+-]integer)?".  + *	This function returns an integer corresponding to the list index held + *	in a Tcl object. The Tcl object's value is expected to be in the + *	format integer([+-]integer)? or the format end([+-]integer)?.   *   * Results:   *	The return value is normally TCL_OK, which means that the index was - *	successfully stored into the location referenced by "indexPtr".  If - *	the Tcl object referenced by "objPtr" has the value "end", the - *	value stored is "endValue". If "objPtr"s values is not of the form - *	"end([+-]integer)?" and - *	can not be converted to an integer, TCL_ERROR is returned and, if - *	"interp" is non-NULL, an error message is left in the interpreter's - *	result object. + *	successfully stored into the location referenced by "indexPtr". If the + *	Tcl object referenced by "objPtr" has the value "end", the value + *	stored is "endValue". If "objPtr"s values is not of one of the + *	expected formats, TCL_ERROR is returned and, if "interp" is non-NULL, + *	an error message is left in the interpreter's result object.   *   * Side effects: - *	The object referenced by "objPtr" might be converted to an - *	integer object. + *	The object referenced by "objPtr" might be converted to an integer, + *	wide integer, or end-based-index object.   *   *----------------------------------------------------------------------   */  int -TclGetIntForIndex(interp, objPtr, endValue, indexPtr) -    Tcl_Interp *interp;		/* Interpreter to use for error reporting.  -				 * If NULL, then no error message is left -				 * after errors. */ -    Tcl_Obj *objPtr;		/* Points to an object containing either -				 * "end" or an integer. */ -    int endValue;		/* The value to be stored at "indexPtr" if +TclGetIntForIndex( +    Tcl_Interp *interp,		/* Interpreter to use for error reporting. If +				 * NULL, then no error message is left after +				 * errors. */ +    Tcl_Obj *objPtr,		/* Points to an object containing either "end" +				 * or an integer. */ +    int endValue,		/* The value to be stored at "indexPtr" if  				 * "objPtr" holds "end". */ -    int *indexPtr;		/* Location filled in with an integer +    int *indexPtr)		/* Location filled in with an integer  				 * representing an index. */  { -    char *bytes; -    int length, offset; +    int length; +    char *opPtr; +    const char *bytes; -    /* If the object is already an integer, use it. */ - -    if (objPtr->typePtr == &tclIntType) { -	*indexPtr = (int)objPtr->internalRep.longValue; +    if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) {  	return TCL_OK;      } -    if ( SetEndOffsetFromAny( NULL, objPtr ) == TCL_OK ) { - +    if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) {  	/* -	 * If the object is already an offset from the end of the list, or -	 * can be converted to one, use it. +	 * If the object is already an offset from the end of the list, or can +	 * be converted to one, use it.  	 */  	*indexPtr = endValue + objPtr->internalRep.longValue; +	return TCL_OK; +    } -    } else if ( Tcl_GetIntFromObj( NULL, objPtr, &offset ) == TCL_OK ) { - -	/* -	 * If the object can be converted to an integer, use that. -	 */ +    bytes = TclGetStringFromObj(objPtr, &length); -	*indexPtr = offset; +    /* +     * Leading whitespace is acceptable in an index. +     */ -    } else { +    while (length && TclIsSpaceProc(*bytes)) { +	bytes++; +	length--; +    } -	/* -	 * Report a parse error. -	 */ +    if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr, +	    TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) { +	int code, first, second; +	char savedOp = *opPtr; -	if ((Interp *)interp != NULL) { -	    bytes = Tcl_GetStringFromObj( objPtr, &length ); -	    Tcl_AppendStringsToObj( Tcl_GetObjResult(interp), -				    "bad index \"", bytes, -				    "\": must be integer or end?-integer?", -				    (char *) NULL); -	    if ( !strncmp ( bytes, "end-", 3 ) ) { -		bytes += 3; -	    } -	    TclCheckBadOctal(interp, bytes); +	if ((savedOp != '+') && (savedOp != '-')) { +	    goto parseError; +	} +	if (TclIsSpaceProc(opPtr[1])) { +	    goto parseError;  	} +	*opPtr = '\0'; +	code = Tcl_GetInt(interp, bytes, &first); +	*opPtr = savedOp; +	if (code == TCL_ERROR) { +	    goto parseError; +	} +	if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) { +	    goto parseError; +	} +	if (savedOp == '+') { +	    *indexPtr = first + second; +	} else { +	    *indexPtr = first - second; +	} +	return TCL_OK; +    } -	return TCL_ERROR; +    /* +     * Report a parse error. +     */ + +  parseError: +    if (interp != NULL) { +	bytes = Tcl_GetString(objPtr); +	Tcl_SetObjResult(interp, Tcl_ObjPrintf( +		"bad index \"%s\": must be integer?[+-]integer? or" +		" end?[+-]integer?", bytes)); +	if (!strncmp(bytes, "end-", 4)) { +	    bytes += 4; +	} +	TclCheckBadOctal(interp, bytes); +	Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);      } -	     -    return TCL_OK; +    return TCL_ERROR;  }  /* @@ -2262,29 +3573,26 @@ TclGetIntForIndex(interp, objPtr, endValue, indexPtr)   * Side effects:   *	Stores a valid string in the object's string rep.   * - * This procedure does NOT free any earlier string rep.  If it is - * called on an object that already has a valid string rep, it will - * leak memory. + * This function does NOT free any earlier string rep. If it is called on an + * object that already has a valid string rep, it will leak memory.   *   *----------------------------------------------------------------------   */  static void -UpdateStringOfEndOffset( objPtr ) -    register Tcl_Obj* objPtr; +UpdateStringOfEndOffset( +    register Tcl_Obj *objPtr)  { -    char buffer[ TCL_INTEGER_SPACE + sizeof("end") + 1 ]; -    register int len; +    char buffer[TCL_INTEGER_SPACE + 5]; +    register int len = 3; -    strcpy( buffer, "end" ); -    len = sizeof( "end" ) - 1; -    if ( objPtr->internalRep.longValue != 0 ) { +    memcpy(buffer, "end", 4); +    if (objPtr->internalRep.longValue != 0) {  	buffer[len++] = '-'; -	len += TclFormatInt( buffer + len, -			     -( objPtr->internalRep.longValue ) ); +	len += TclFormatInt(buffer+len, -(objPtr->internalRep.longValue));      } -    objPtr->bytes = ckalloc( (unsigned) ( len + 1 ) ); -    strcpy( objPtr->bytes, buffer ); +    objPtr->bytes = ckalloc((unsigned) len+1); +    memcpy(objPtr->bytes, buffer, (unsigned) len+1);      objPtr->length = len;  } @@ -2293,101 +3601,105 @@ UpdateStringOfEndOffset( objPtr )   *   * SetEndOffsetFromAny --   * - *	Look for a string of the form "end-offset" and convert it - *	to an internal representation holding the offset. + *	Look for a string of the form "end[+-]offset" and convert it to an + *	internal representation holding the offset.   *   * Results:   *	Returns TCL_OK if ok, TCL_ERROR if the string was badly formed.   *   * Side effects: - *	If interp is not NULL, stores an error message in the - *	interpreter result. + *	If interp is not NULL, stores an error message in the interpreter + *	result.   *   *----------------------------------------------------------------------   */  static int -SetEndOffsetFromAny( Tcl_Interp* interp, -				/* Tcl interpreter or NULL */ -		     Tcl_Obj* objPtr ) -				/* Pointer to the object to parse */ +SetEndOffsetFromAny( +    Tcl_Interp *interp,		/* Tcl interpreter or NULL */ +    Tcl_Obj *objPtr)		/* Pointer to the object to parse */  {      int offset;			/* Offset in the "end-offset" expression */ -    Tcl_ObjType* oldTypePtr = objPtr->typePtr; -				/* Old internal rep type of the object */ -    register char* bytes;	/* String rep of the object */ +    register const char *bytes;	/* String rep of the object */      int length;			/* Length of the object's string rep */ -    /* If it's already the right type, we're fine. */ +    /* +     * If it's already the right type, we're fine. +     */ -    if ( objPtr->typePtr == &tclEndOffsetType ) { +    if (objPtr->typePtr == &tclEndOffsetType) {  	return TCL_OK;      } -    /* Check for a string rep of the right form. */ +    /* +     * Check for a string rep of the right form. +     */ -    bytes = Tcl_GetStringFromObj(objPtr, &length); +    bytes = TclGetStringFromObj(objPtr, &length);      if ((*bytes != 'e') || (strncmp(bytes, "end",  	    (size_t)((length > 3) ? 3 : length)) != 0)) { -	if ( interp != NULL ) { -	    Tcl_AppendStringsToObj( Tcl_GetObjResult( interp ), -				    "bad index \"", bytes, -				    "\": must be end?-integer?", -				    (char*) NULL ); +	if (interp != NULL) { +	    Tcl_SetObjResult(interp, Tcl_ObjPrintf( +		    "bad index \"%s\": must be end?[+-]integer?", bytes)); +	    Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);  	}  	return TCL_ERROR;      } -    /* Convert the string rep */ +    /* +     * Convert the string rep. +     */      if (length <= 3) {  	offset = 0; -    } else if (bytes[3] == '-') { - +    } else if ((length > 4) && ((bytes[3] == '-') || (bytes[3] == '+'))) {  	/* -	 * This is our limited string expression evaluator +	 * This is our limited string expression evaluator. Pass everything +	 * after "end-" to Tcl_GetInt, then reverse for offset.  	 */ -	if (Tcl_GetInt(interp, bytes+3, &offset) != TCL_OK) { + +	if (TclIsSpaceProc(bytes[4])) { +	    goto badIndexFormat; +	} +	if (Tcl_GetInt(interp, bytes+4, &offset) != TCL_OK) {  	    return TCL_ERROR;  	} - +	if (bytes[3] == '-') { +	    offset = -offset; +	}      } else { +	/* +	 * Conversion failed. Report the error. +	 */ -	/* Conversion failed.  Report the error. */ - - -	if ( interp != NULL ) { -	    Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), -				   "bad index \"", bytes, -				   "\": must be integer or end?-integer?", -				   (char *) NULL); +    badIndexFormat: +	if (interp != NULL) { +	    Tcl_SetObjResult(interp, Tcl_ObjPrintf( +		    "bad index \"%s\": must be end?[+-]integer?", bytes)); +	    Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);  	}  	return TCL_ERROR; -      }      /* -     * The conversion succeeded. Free the old internal rep and set -     * the new one. +     * The conversion succeeded. Free the old internal rep and set the new +     * one.       */ -    if ((oldTypePtr != NULL) && (oldTypePtr->freeIntRepProc != NULL)) { -	oldTypePtr->freeIntRepProc(objPtr); -    } -     +    TclFreeIntRep(objPtr);      objPtr->internalRep.longValue = offset;      objPtr->typePtr = &tclEndOffsetType;      return TCL_OK; -}     +}  /*   *----------------------------------------------------------------------   *   * TclCheckBadOctal --   * - *	This procedure checks for a bad octal value and appends a - *	meaningful error to the interp's result. + *	This function checks for a bad octal value and appends a meaningful + *	error to the interp's result.   *   * Results:   *	1 if the argument was a bad octal, else 0. @@ -2399,41 +3711,48 @@ SetEndOffsetFromAny( Tcl_Interp* interp,   */  int -TclCheckBadOctal(interp, value) -    Tcl_Interp *interp;		/* Interpreter to use for error reporting.  -				 * If NULL, then no error message is left -				 * after errors. */ -    CONST char *value;		/* String to check. */ +TclCheckBadOctal( +    Tcl_Interp *interp,		/* Interpreter to use for error reporting. If +				 * NULL, then no error message is left after +				 * errors. */ +    const char *value)		/* String to check. */  { -    register CONST char *p = value; +    register const char *p = value;      /* -     * A frequent mistake is invalid octal values due to an unwanted -     * leading zero. Try to generate a meaningful error message. +     * A frequent mistake is invalid octal values due to an unwanted leading +     * zero. Try to generate a meaningful error message.       */ -    while (isspace(UCHAR(*p))) {	/* INTL: ISO space. */ +    while (TclIsSpaceProc(*p)) {  	p++;      }      if (*p == '+' || *p == '-') {  	p++;      }      if (*p == '0') { +	if ((p[1] == 'o') || p[1] == 'O') { +	    p += 2; +	}  	while (isdigit(UCHAR(*p))) {	/* INTL: digit. */  	    p++;  	} -	while (isspace(UCHAR(*p))) {	/* INTL: ISO space. */ +	while (TclIsSpaceProc(*p)) {  	    p++;  	}  	if (*p == '\0') { -	    /* Reached end of string */ +	    /* +	     * Reached end of string. +	     */ +  	    if (interp != NULL) {  		/* -		 * Don't reset the result here because we want this result -		 * to be added to an existing error message as extra info. +		 * Don't reset the result here because we want this result to +		 * be added to an existing error message as extra info.  		 */ -		Tcl_AppendResult(interp, " (looks like invalid octal number)", -			(char *) NULL); + +		Tcl_AppendToObj(Tcl_GetObjResult(interp), +			" (looks like invalid octal number)", -1);  	    }  	    return 1;  	} @@ -2444,28 +3763,351 @@ TclCheckBadOctal(interp, value)  /*   *----------------------------------------------------------------------   * + * ClearHash -- + * + *	Remove all the entries in the hash table *tablePtr. + * + *---------------------------------------------------------------------- + */ + +static void +ClearHash( +    Tcl_HashTable *tablePtr) +{ +    Tcl_HashSearch search; +    Tcl_HashEntry *hPtr; + +    for (hPtr = Tcl_FirstHashEntry(tablePtr, &search); hPtr != NULL; +	    hPtr = Tcl_NextHashEntry(&search)) { +	Tcl_Obj *objPtr = Tcl_GetHashValue(hPtr); + +	Tcl_DecrRefCount(objPtr); +	Tcl_DeleteHashEntry(hPtr); +    } +} + +/* + *---------------------------------------------------------------------- + * + * GetThreadHash -- + * + *	Get a thread-specific (Tcl_HashTable *) associated with a thread data + *	key. + * + * Results: + *	The Tcl_HashTable * corresponding to *keyPtr. + * + * Side effects: + *	The first call on a keyPtr in each thread creates a new Tcl_HashTable, + *	and registers a thread exit handler to dispose of it. + * + *---------------------------------------------------------------------- + */ + +static Tcl_HashTable * +GetThreadHash( +    Tcl_ThreadDataKey *keyPtr) +{ +    Tcl_HashTable **tablePtrPtr = +	    Tcl_GetThreadData(keyPtr, sizeof(Tcl_HashTable *)); + +    if (NULL == *tablePtrPtr) { +	*tablePtrPtr = ckalloc(sizeof(Tcl_HashTable)); +	Tcl_CreateThreadExitHandler(FreeThreadHash, *tablePtrPtr); +	Tcl_InitHashTable(*tablePtrPtr, TCL_ONE_WORD_KEYS); +    } +    return *tablePtrPtr; +} + +/* + *---------------------------------------------------------------------- + * + * FreeThreadHash -- + * + *	Thread exit handler used by GetThreadHash to dispose of a thread hash + *	table. + * + * Side effects: + *	Frees a Tcl_HashTable. + * + *---------------------------------------------------------------------- + */ + +static void +FreeThreadHash( +    ClientData clientData) +{ +    Tcl_HashTable *tablePtr = clientData; + +    ClearHash(tablePtr); +    Tcl_DeleteHashTable(tablePtr); +    ckfree(tablePtr); +} + +/* + *---------------------------------------------------------------------- + * + * FreeProcessGlobalValue -- + * + *	Exit handler used by Tcl(Set|Get)ProcessGlobalValue to cleanup a + *	ProcessGlobalValue at exit. + * + *---------------------------------------------------------------------- + */ + +static void +FreeProcessGlobalValue( +    ClientData clientData) +{ +    ProcessGlobalValue *pgvPtr = clientData; + +    pgvPtr->epoch++; +    pgvPtr->numBytes = 0; +    ckfree(pgvPtr->value); +    pgvPtr->value = NULL; +    if (pgvPtr->encoding) { +	Tcl_FreeEncoding(pgvPtr->encoding); +	pgvPtr->encoding = NULL; +    } +    Tcl_MutexFinalize(&pgvPtr->mutex); +} + +/* + *---------------------------------------------------------------------- + * + * TclSetProcessGlobalValue -- + * + *	Utility routine to set a global value shared by all threads in the + *	process while keeping a thread-local copy as well. + * + *---------------------------------------------------------------------- + */ + +void +TclSetProcessGlobalValue( +    ProcessGlobalValue *pgvPtr, +    Tcl_Obj *newValue, +    Tcl_Encoding encoding) +{ +    const char *bytes; +    Tcl_HashTable *cacheMap; +    Tcl_HashEntry *hPtr; +    int dummy; + +    Tcl_MutexLock(&pgvPtr->mutex); + +    /* +     * Fill the global string value. +     */ + +    pgvPtr->epoch++; +    if (NULL != pgvPtr->value) { +	ckfree(pgvPtr->value); +    } else { +	Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr); +    } +    bytes = Tcl_GetStringFromObj(newValue, &pgvPtr->numBytes); +    pgvPtr->value = ckalloc(pgvPtr->numBytes + 1); +    memcpy(pgvPtr->value, bytes, (unsigned) pgvPtr->numBytes + 1); +    if (pgvPtr->encoding) { +	Tcl_FreeEncoding(pgvPtr->encoding); +    } +    pgvPtr->encoding = encoding; + +    /* +     * Fill the local thread copy directly with the Tcl_Obj value to avoid +     * loss of the intrep. Increment newValue refCount early to handle case +     * where we set a PGV to itself. +     */ + +    Tcl_IncrRefCount(newValue); +    cacheMap = GetThreadHash(&pgvPtr->key); +    ClearHash(cacheMap); +    hPtr = Tcl_CreateHashEntry(cacheMap, INT2PTR(pgvPtr->epoch), &dummy); +    Tcl_SetHashValue(hPtr, newValue); +    Tcl_MutexUnlock(&pgvPtr->mutex); +} + +/* + *---------------------------------------------------------------------- + * + * TclGetProcessGlobalValue -- + * + *	Retrieve a global value shared among all threads of the process, + *	preferring a thread-local copy as long as it remains valid. + * + * Results: + *	Returns a (Tcl_Obj *) that holds a copy of the global value. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclGetProcessGlobalValue( +    ProcessGlobalValue *pgvPtr) +{ +    Tcl_Obj *value = NULL; +    Tcl_HashTable *cacheMap; +    Tcl_HashEntry *hPtr; +    int epoch = pgvPtr->epoch; + +    if (pgvPtr->encoding) { +	Tcl_Encoding current = Tcl_GetEncoding(NULL, NULL); + +	if (pgvPtr->encoding != current) { +	    /* +	     * The system encoding has changed since the master string value +	     * was saved. Convert the master value to be based on the new +	     * system encoding. +	     */ + +	    Tcl_DString native, newValue; + +	    Tcl_MutexLock(&pgvPtr->mutex); +	    pgvPtr->epoch++; +	    epoch = pgvPtr->epoch; +	    Tcl_UtfToExternalDString(pgvPtr->encoding, pgvPtr->value, +		    pgvPtr->numBytes, &native); +	    Tcl_ExternalToUtfDString(current, Tcl_DStringValue(&native), +	    Tcl_DStringLength(&native), &newValue); +	    Tcl_DStringFree(&native); +	    ckfree(pgvPtr->value); +	    pgvPtr->value = ckalloc(Tcl_DStringLength(&newValue) + 1); +	    memcpy(pgvPtr->value, Tcl_DStringValue(&newValue), +		    (size_t) Tcl_DStringLength(&newValue) + 1); +	    Tcl_DStringFree(&newValue); +	    Tcl_FreeEncoding(pgvPtr->encoding); +	    pgvPtr->encoding = current; +	    Tcl_MutexUnlock(&pgvPtr->mutex); +	} else { +	    Tcl_FreeEncoding(current); +	} +    } +    cacheMap = GetThreadHash(&pgvPtr->key); +    hPtr = Tcl_FindHashEntry(cacheMap, (char *) INT2PTR(epoch)); +    if (NULL == hPtr) { +	int dummy; + +	/* +	 * No cache for the current epoch - must be a new one. +	 * +	 * First, clear the cacheMap, as anything in it must refer to some +	 * expired epoch. +	 */ + +	ClearHash(cacheMap); + +	/* +	 * If no thread has set the shared value, call the initializer. +	 */ + +	Tcl_MutexLock(&pgvPtr->mutex); +	if ((NULL == pgvPtr->value) && (pgvPtr->proc)) { +	    pgvPtr->epoch++; +	    pgvPtr->proc(&pgvPtr->value,&pgvPtr->numBytes,&pgvPtr->encoding); +	    if (pgvPtr->value == NULL) { +		Tcl_Panic("PGV Initializer did not initialize"); +	    } +	    Tcl_CreateExitHandler(FreeProcessGlobalValue, pgvPtr); +	} + +	/* +	 * Store a copy of the shared value in our epoch-indexed cache. +	 */ + +	value = Tcl_NewStringObj(pgvPtr->value, pgvPtr->numBytes); +	hPtr = Tcl_CreateHashEntry(cacheMap, +		INT2PTR(pgvPtr->epoch), &dummy); +	Tcl_MutexUnlock(&pgvPtr->mutex); +	Tcl_SetHashValue(hPtr, value); +	Tcl_IncrRefCount(value); +    } +    return Tcl_GetHashValue(hPtr); +} + +/* + *---------------------------------------------------------------------- + * + * TclSetObjNameOfExecutable -- + * + *	This function stores the absolute pathname of the executable file + *	(normally as computed by TclpFindExecutable). + * + * Results: + *	None. + * + * Side effects: + *	Stores the executable name. + * + *---------------------------------------------------------------------- + */ + +void +TclSetObjNameOfExecutable( +    Tcl_Obj *name, +    Tcl_Encoding encoding) +{ +    TclSetProcessGlobalValue(&executableName, name, encoding); +} + +/* + *---------------------------------------------------------------------- + * + * TclGetObjNameOfExecutable -- + * + *	This function retrieves the absolute pathname of the application in + *	which the Tcl library is running, usually as previously stored by + *	TclpFindExecutable(). This function call is the C API equivalent to + *	the "info nameofexecutable" command. + * + * Results: + *	A pointer to an "fsPath" Tcl_Obj, or to an empty Tcl_Obj if the + *	pathname of the application is unknown. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclGetObjNameOfExecutable(void) +{ +    return TclGetProcessGlobalValue(&executableName); +} + +/* + *---------------------------------------------------------------------- + *   * Tcl_GetNameOfExecutable --   * - *	This procedure simply returns a pointer to the internal full - *	path name of the executable file as computed by - *	Tcl_FindExecutable.  This procedure call is the C API - *	equivalent to the "info nameofexecutable" command. + *	This function retrieves the absolute pathname of the application in + *	which the Tcl library is running, and returns it in string form. + * + *	The returned string belongs to Tcl and should be copied if the caller + *	plans to keep it, to guard against it becoming invalid.   *   * Results: - *	A pointer to the internal string or NULL if the internal full - *	path name has not been computed or unknown. + *	A pointer to the internal string or NULL if the internal full path + *	name has not been computed or unknown.   *   * Side effects: - *	The object referenced by "objPtr" might be converted to an - *	integer object. + *	None.   *   *----------------------------------------------------------------------   */ -CONST char * -Tcl_GetNameOfExecutable() +const char * +Tcl_GetNameOfExecutable(void)  { -    return (tclExecutableName); +    int numBytes; +    const char *bytes = +	    Tcl_GetStringFromObj(TclGetObjNameOfExecutable(), &numBytes); + +    if (numBytes == 0) { +	return NULL; +    } +    return bytes;  }  /* @@ -2473,7 +4115,9 @@ Tcl_GetNameOfExecutable()   *   * TclpGetTime --   * - *	Deprecated synonym for Tcl_GetTime. + *	Deprecated synonym for Tcl_GetTime. This function is provided for the + *	benefit of extensions written before Tcl_GetTime was exported from the + *	library.   *   * Results:   *	None. @@ -2481,15 +4125,260 @@ Tcl_GetNameOfExecutable()   * Side effects:   *	Stores current time in the buffer designated by "timePtr"   * - * This procedure is provided for the benefit of extensions written - * before Tcl_GetTime was exported from the library. - *   *----------------------------------------------------------------------   */  void -TclpGetTime( timePtr ) -    Tcl_Time* timePtr; +TclpGetTime( +    Tcl_Time *timePtr) +{ +    Tcl_GetTime(timePtr); +} + +/* + *---------------------------------------------------------------------- + * + * TclGetPlatform -- + * + *	This is a kludge that allows the test library to get access the + *	internal tclPlatform variable. + * + * Results: + *	Returns a pointer to the tclPlatform variable. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +TclPlatformType * +TclGetPlatform(void) +{ +    return &tclPlatform; +} + +/* + *---------------------------------------------------------------------- + * + * TclReToGlob -- + * + *	Attempt to convert a regular expression to an equivalent glob pattern. + * + * Results: + *	Returns TCL_OK on success, TCL_ERROR on failure. If interp is not + *	NULL, an error message is placed in the result. On success, the + *	DString will contain an exact equivalent glob pattern. The caller is + *	responsible for calling Tcl_DStringFree on success. If exactPtr is not + *	NULL, it will be 1 if an exact match qualifies. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclReToGlob( +    Tcl_Interp *interp, +    const char *reStr, +    int reStrLen, +    Tcl_DString *dsPtr, +    int *exactPtr)  { -    Tcl_GetTime( timePtr ); +    int anchorLeft, anchorRight, lastIsStar, numStars; +    char *dsStr, *dsStrStart; +    const char *msg, *p, *strEnd, *code; + +    strEnd = reStr + reStrLen; +    Tcl_DStringInit(dsPtr); + +    /* +     * "***=xxx" == "*xxx*", watch for glob-sensitive chars. +     */ + +    if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) { +	/* +	 * At most, the glob pattern has length 2*reStrLen + 2 to backslash +	 * escape every character and have * at each end. +	 */ + +	Tcl_DStringSetLength(dsPtr, reStrLen + 2); +	dsStr = dsStrStart = Tcl_DStringValue(dsPtr); +	*dsStr++ = '*'; +	for (p = reStr + 4; p < strEnd; p++) { +	    switch (*p) { +	    case '\\': case '*': case '[': case ']': case '?': +		/* Only add \ where necessary for glob */ +		*dsStr++ = '\\'; +		/* fall through */ +	    default: +		*dsStr++ = *p; +		break; +	    } +	} +	*dsStr++ = '*'; +	Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart); +	if (exactPtr) { +	    *exactPtr = 0; +	} +	return TCL_OK; +    } + +    /* +     * At most, the glob pattern has length reStrLen + 2 to account for +     * possible * at each end. +     */ + +    Tcl_DStringSetLength(dsPtr, reStrLen + 2); +    dsStr = dsStrStart = Tcl_DStringValue(dsPtr); + +    /* +     * Check for anchored REs (ie ^foo$), so we can use string equal if +     * possible. Do not alter the start of str so we can free it correctly. +     * +     * Keep track of the last char being an unescaped star to prevent multiple +     * instances.  Simpler than checking that the last star may be escaped. +     */ + +    msg = NULL; +    code = NULL; +    p = reStr; +    anchorRight = 0; +    lastIsStar = 0; +    numStars = 0; + +    if (*p == '^') { +	anchorLeft = 1; +	p++; +    } else { +	anchorLeft = 0; +	*dsStr++ = '*'; +	lastIsStar = 1; +    } + +    for ( ; p < strEnd; p++) { +	switch (*p) { +	case '\\': +	    p++; +	    switch (*p) { +	    case 'a': +		*dsStr++ = '\a'; +		break; +	    case 'b': +		*dsStr++ = '\b'; +		break; +	    case 'f': +		*dsStr++ = '\f'; +		break; +	    case 'n': +		*dsStr++ = '\n'; +		break; +	    case 'r': +		*dsStr++ = '\r'; +		break; +	    case 't': +		*dsStr++ = '\t'; +		break; +	    case 'v': +		*dsStr++ = '\v'; +		break; +	    case 'B': case '\\': +		*dsStr++ = '\\'; +		*dsStr++ = '\\'; +		anchorLeft = 0; /* prevent exact match */ +		break; +	    case '*': case '[': case ']': case '?': +		/* Only add \ where necessary for glob */ +		*dsStr++ = '\\'; +		anchorLeft = 0; /* prevent exact match */ +		/* fall through */ +	    case '{': case '}': case '(': case ')': case '+': +	    case '.': case '|': case '^': case '$': +		*dsStr++ = *p; +		break; +	    default: +		msg = "invalid escape sequence"; +		code = "BADESCAPE"; +		goto invalidGlob; +	    } +	    break; +	case '.': +	    anchorLeft = 0; /* prevent exact match */ +	    if (p+1 < strEnd) { +		if (p[1] == '*') { +		    p++; +		    if (!lastIsStar) { +			*dsStr++ = '*'; +			lastIsStar = 1; +			numStars++; +		    } +		    continue; +		} else if (p[1] == '+') { +		    p++; +		    *dsStr++ = '?'; +		    *dsStr++ = '*'; +		    lastIsStar = 1; +		    numStars++; +		    continue; +		} +	    } +	    *dsStr++ = '?'; +	    break; +	case '$': +	    if (p+1 != strEnd) { +		msg = "$ not anchor"; +		code = "NONANCHOR"; +		goto invalidGlob; +	    } +	    anchorRight = 1; +	    break; +	case '*': case '+': case '?': case '|': case '^': +	case '{': case '}': case '(': case ')': case '[': case ']': +	    msg = "unhandled RE special char"; +	    code = "UNHANDLED"; +	    goto invalidGlob; +	default: +	    *dsStr++ = *p; +	    break; +	} +	lastIsStar = 0; +    } +    if (numStars > 1) { +	/* +	 * Heuristic: if >1 non-anchoring *, the risk is large that glob +	 * matching is slower than the RE engine, so report invalid. +	 */ + +	msg = "excessive recursive glob backtrack potential"; +	code = "OVERCOMPLEX"; +	goto invalidGlob; +    } + +    if (!anchorRight && !lastIsStar) { +	*dsStr++ = '*'; +    } +    Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart); + +    if (exactPtr) { +	*exactPtr = (anchorLeft && anchorRight); +    } + +    return TCL_OK; + +  invalidGlob: +    if (interp != NULL) { +	Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); +	Tcl_SetErrorCode(interp, "TCL", "RE2GLOB", code, NULL); +    } +    Tcl_DStringFree(dsPtr); +    return TCL_ERROR;  } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ | 
