diff options
Diffstat (limited to 'generic/tclParse.c')
| -rw-r--r-- | generic/tclParse.c | 1204 | 
1 files changed, 692 insertions, 512 deletions
| diff --git a/generic/tclParse.c b/generic/tclParse.c index 5da1abb..8a28bf2 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -11,11 +11,11 @@   *   * See the file "license.terms" for information on usage and redistribution of   * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclParse.c,v 1.45 2005/11/02 14:51:04 dkf Exp $   */  #include "tclInt.h" +#include "tclParse.h" +#include <assert.h>  /*   * The following table provides parsing information about each possible 8-bit @@ -43,18 +43,7 @@   * TYPE_BRACE -		Character is a curly brace (either left or right).   */ -#define TYPE_NORMAL		0 -#define TYPE_SPACE		0x1 -#define TYPE_COMMAND_END	0x2 -#define TYPE_SUBS		0x4 -#define TYPE_QUOTE		0x8 -#define TYPE_CLOSE_PAREN	0x10 -#define TYPE_CLOSE_BRACK	0x20 -#define TYPE_BRACE		0x40 - -#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)] - -static CONST char charTypeTable[] = { +const char tclCharTypeTable[] = {      /*       * Negative character values, from -128 to -1:       */ @@ -171,24 +160,28 @@ static CONST char charTypeTable[] = {   * Prototypes for local functions defined in this file:   */ -static int		CommandComplete(CONST char *script, int numBytes); -static int		ParseComment(CONST char *src, int numBytes, +static inline int	CommandComplete(const char *script, int numBytes); +static int		ParseComment(const char *src, int numBytes,  			    Tcl_Parse *parsePtr); -static int		ParseTokens(CONST char *src, int numBytes, -			    int mask, int flags, Tcl_Parse *parsePtr); +static int		ParseTokens(const char *src, int numBytes, int mask, +			    int flags, Tcl_Parse *parsePtr); +static int		ParseWhiteSpace(const char *src, int numBytes, +			    int *incompletePtr, char *typePtr); +static int		ParseAllWhiteSpace(const char *src, int numBytes, +			    int *incompletePtr);  /*   *----------------------------------------------------------------------   *   * TclParseInit --   * - * 	Initialize the fields of a Tcl_Parse struct. + *	Initialize the fields of a Tcl_Parse struct.   *   * Results: - * 	None. + *	None.   *   * Side effects: - * 	The Tcl_Parse struct pointed to by parsePtr gets initialized. + *	The Tcl_Parse struct pointed to by parsePtr gets initialized.   *   *----------------------------------------------------------------------   */ @@ -196,7 +189,7 @@ static int		ParseTokens(CONST char *src, int numBytes,  void  TclParseInit(      Tcl_Interp *interp,		/* Interpreter to use for error reporting */ -    CONST char *string,		/* String to be parsed. */ +    const char *start,		/* Start of string to be parsed. */      int numBytes,		/* Total number of bytes in string. If < 0,  				 * the script consists of all bytes up to the  				 * first null character. */ @@ -206,8 +199,8 @@ TclParseInit(      parsePtr->tokenPtr = parsePtr->staticTokens;      parsePtr->numTokens = 0;      parsePtr->tokensAvailable = NUM_STATIC_TOKENS; -    parsePtr->string = string; -    parsePtr->end = string + numBytes; +    parsePtr->string = start; +    parsePtr->end = start + numBytes;      parsePtr->term = parsePtr->end;      parsePtr->interp = interp;      parsePtr->incomplete = 0; @@ -241,7 +234,7 @@ int  Tcl_ParseCommand(      Tcl_Interp *interp,		/* Interpreter to use for error reporting; if  				 * NULL, then no error message is provided. */ -    CONST char *start,		/* First character of string containing one or +    const char *start,		/* First character of string containing one or  				 * more Tcl commands. */      register int numBytes,	/* Total number of bytes in string. If < 0,  				 * the script consists of all bytes up to the @@ -251,24 +244,25 @@ Tcl_ParseCommand(  				 * command terminator. If zero, then close  				 * bracket has no special meaning. */      register Tcl_Parse *parsePtr) -    				/* Structure to fill in with information about +				/* Structure to fill in with information about  				 * the parsed command; any previous  				 * information in the structure is ignored. */  { -    register CONST char *src;	/* Points to current character in the +    register const char *src;	/* Points to current character in the  				 * command. */      char type;			/* Result returned by CHAR_TYPE(*src). */      Tcl_Token *tokenPtr;	/* Pointer to token being filled in. */      int wordIndex;		/* Index of word token for current word. */      int terminators;		/* CHAR_TYPE bits that indicate the end of a  				 * command. */ -    CONST char *termPtr;	/* Set by Tcl_ParseBraces/QuotedString to +    const char *termPtr;	/* Set by Tcl_ParseBraces/QuotedString to  				 * point to char after terminating one. */      int scanned; -    if ((start == NULL) && (numBytes>0)) { +    if ((start == NULL) && (numBytes != 0)) {  	if (interp != NULL) { -	    Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC); +	    Tcl_SetObjResult(interp, Tcl_NewStringObj( +		    "can't parse a NULL pointer", -1));  	}  	return TCL_ERROR;      } @@ -306,37 +300,52 @@ Tcl_ParseCommand(       */      parsePtr->commandStart = src; +    type = CHAR_TYPE(*src); +    scanned = 1;	/* Can't have missing whitepsace before first word. */      while (1) {  	int expandWord = 0; +	/* Are we at command termination? */ + +	if ((numBytes == 0) || (type & terminators) != 0) { +	    parsePtr->term = src; +	    parsePtr->commandSize = src + (numBytes != 0) +		    - parsePtr->commandStart; +	    return TCL_OK; +	} + +	/* Are we missing white space after previous word? */ + +	if (scanned == 0) { +	    if (src[-1] == '"') { +		if (interp != NULL) { +		    Tcl_SetObjResult(interp, Tcl_NewStringObj( +			    "extra characters after close-quote", -1)); +		} +		parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; +	    } else { +		if (interp != NULL) { +		    Tcl_SetObjResult(interp, Tcl_NewStringObj( +			    "extra characters after close-brace", -1)); +		} +		parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; +	    } +	    parsePtr->term = src; +	error: +	    Tcl_FreeParse(parsePtr); +	    parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; +	    return TCL_ERROR; +	} +  	/*  	 * Create the token for the word.  	 */ -	if (parsePtr->numTokens == parsePtr->tokensAvailable) { -	    TclExpandTokenArray(parsePtr); -	} +	TclGrowParseTokenArray(parsePtr, 1);  	wordIndex = parsePtr->numTokens;  	tokenPtr = &parsePtr->tokenPtr[wordIndex];  	tokenPtr->type = TCL_TOKEN_WORD; -	/* -	 * Skip white space before the word. Also skip a backslash-newline -	 * sequence: it should be treated just like white space. -	 */ - -	scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); -	src += scanned; -	numBytes -= scanned; -	if (numBytes == 0) { -	    parsePtr->term = src; -	    break; -	} -	if ((type & terminators) != 0) { -	    parsePtr->term = src; -	    src++; -	    break; -	}  	tokenPtr->start = src;  	parsePtr->numTokens++;  	parsePtr->numWords++; @@ -349,52 +358,41 @@ Tcl_ParseCommand(      parseWord:  	if (*src == '"') { -	    if (Tcl_ParseQuotedString(interp, src, numBytes, -		    parsePtr, 1, &termPtr) != TCL_OK) { +	    if (Tcl_ParseQuotedString(interp, src, numBytes, parsePtr, 1, +		    &termPtr) != TCL_OK) {  		goto error;  	    }  	    src = termPtr;  	    numBytes = parsePtr->end - src;  	} else if (*src == '{') { -	    static char expPfx[] = "expand"; -	    CONST size_t expPfxLen = sizeof(expPfx) - 1;  	    int expIdx = wordIndex + 1;  	    Tcl_Token *expPtr; -	    if (Tcl_ParseBraces(interp, src, numBytes, -		    parsePtr, 1, &termPtr) != TCL_OK) { +	    if (Tcl_ParseBraces(interp, src, numBytes, parsePtr, 1, +		    &termPtr) != TCL_OK) {  		goto error;  	    }  	    src = termPtr;  	    numBytes = parsePtr->end - src;  	    /* -	     * Check whether the braces contained the word expansion prefix. +	     * Check whether the braces contained the word expansion prefix +	     * {*}  	     */  	    expPtr = &parsePtr->tokenPtr[expIdx]; -	    if ( -		(0 == expandWord) -		/* Haven't seen prefix already */ -		&& (1 == parsePtr->numTokens - expIdx) -		/* Only one token */ -		&& (((expPfxLen == (size_t) expPtr->size) +	    if ((0 == expandWord) +		    /* Haven't seen prefix already */ +		    && (1 == parsePtr->numTokens - expIdx) +		    /* Only one token */ +		    && (((1 == (size_t) expPtr->size)  			    /* Same length as prefix */ -			    && (0 == strncmp(expPfx,expPtr->start,expPfxLen))) -#ifdef ALLOW_EMPTY_EXPAND -			/* -			 * Allow {} in addition to {expand} -			 */ -			|| (0 == (size_t) expPtr->size) -#endif -		    ) -		/* Is the prefix */ -		&& (numBytes > 0) -		&& (TclParseWhiteSpace(termPtr, numBytes, parsePtr, -			    &type) == 0) -		&& (type != TYPE_COMMAND_END) -		/* Non-whitespace follows */ -		) { +			    && (expPtr->start[0] == '*'))) +			    /* Is the prefix */ +		    && (numBytes > 0) && (0 == ParseWhiteSpace(termPtr, +			    numBytes, &parsePtr->incomplete, &type)) +		    && (type != TYPE_COMMAND_END) +		    /* Non-whitespace follows */) {  		expandWord = 1;  		parsePtr->numTokens--;  		goto parseWord; @@ -421,69 +419,232 @@ Tcl_ParseCommand(  	tokenPtr = &parsePtr->tokenPtr[wordIndex];  	tokenPtr->size = src - tokenPtr->start;  	tokenPtr->numComponents = parsePtr->numTokens - (wordIndex + 1); -	if ((tokenPtr->numComponents == 1) -		&& (tokenPtr[1].type == TCL_TOKEN_TEXT)) { -	    tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; -	}  	if (expandWord) { -	    tokenPtr->type = TCL_TOKEN_EXPAND_WORD; -	} - -	/* -	 * Do two additional checks: (a) make sure we're really at the end of -	 * a word (there might have been garbage left after a quoted or braced -	 * word), and (b) check for the end of the command. -	 */ +	    int i, isLiteral = 1; -	scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); -	if (scanned) { -	    src += scanned; -	    numBytes -= scanned; -	    continue; -	} +	    /* +	     * When a command includes a word that is an expanded literal; for +	     * example, {*}{1 2 3}, the parser performs that expansion +	     * immediately, generating several TCL_TOKEN_SIMPLE_WORDs instead +	     * of a single TCL_TOKEN_EXPAND_WORD that the Tcl_ParseCommand() +	     * caller might have to expand. This notably makes it simpler for +	     * those callers that wish to track line endings, such as those +	     * that implement key parts of TIP 280. +	     * +	     * First check whether the thing to be expanded is a literal, +	     * in the sense of being composed entirely of TCL_TOKEN_TEXT +	     * tokens. +	     */ -	if (numBytes == 0) { -	    parsePtr->term = src; -	    break; -	} -	if ((type & terminators) != 0) { -	    parsePtr->term = src; -	    src++; -	    break; -	} -	if (src[-1] == '"') { -	    if (interp != NULL) { -		Tcl_SetResult(interp, "extra characters after close-quote", -			TCL_STATIC); +	    for (i = 1; i <= tokenPtr->numComponents; i++) { +		if (tokenPtr[i].type != TCL_TOKEN_TEXT) { +		    isLiteral = 0; +		    break; +		}  	    } -	    parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; -	} else { -	    if (interp != NULL) { -		Tcl_SetResult(interp, "extra characters after close-brace", -			TCL_STATIC); + +	    if (isLiteral) { +		int elemCount = 0, code = TCL_OK, literal = 1; +		const char *nextElem, *listEnd, *elemStart; + +		/* +		 * The word to be expanded is a literal, so determine the +		 * boundaries of the literal string to be treated as a list +		 * and expanded. That literal string starts at +		 * tokenPtr[1].start, and includes all bytes up to, but not +		 * including (tokenPtr[tokenPtr->numComponents].start + +		 * tokenPtr[tokenPtr->numComponents].size) +		 */ + +		listEnd = (tokenPtr[tokenPtr->numComponents].start + +			tokenPtr[tokenPtr->numComponents].size); +		nextElem = tokenPtr[1].start; + +		/* +		 * Step through the literal string, parsing and counting list +		 * elements. +		 */ + +		while (nextElem < listEnd) { +		    int size; + +		    code = TclFindElement(NULL, nextElem, listEnd - nextElem, +			    &elemStart, &nextElem, &size, &literal); +		    if ((code != TCL_OK) || !literal) { +			break; +		    } +		    if (elemStart < listEnd) { +			elemCount++; +		    } +		} + +		if ((code != TCL_OK) || !literal) { +		    /* +		     * Some list element could not be parsed, or is not +		     * present as a literal substring of the script.  The +		     * compiler cannot handle list elements that get generated +		     * by a call to TclCopyAndCollapse(). Defer  the +		     * handling of  this to  compile/eval time, where  code is +		     * already  in place to  report the  "attempt to  expand a +		     * non-list" error or expand lists that require +		     * substitution. +		     */ + +		    tokenPtr->type = TCL_TOKEN_EXPAND_WORD; +		} else if (elemCount == 0) { +		    /* +		     * We are expanding a literal empty list. This means that +		     * the expanding word completely disappears, leaving no +		     * word generated this pass through the loop. Adjust +		     * accounting appropriately. +		     */ + +		    parsePtr->numWords--; +		    parsePtr->numTokens = wordIndex; +		} else { +		    /* +		     * Recalculate the number of Tcl_Tokens needed to store +		     * tokens representing the expanded list. +		     */ + +		    const char *listStart; +		    int growthNeeded = wordIndex + 2*elemCount +			    - parsePtr->numTokens; + +		    parsePtr->numWords += elemCount - 1; +		    if (growthNeeded > 0) { +			TclGrowParseTokenArray(parsePtr, growthNeeded); +			tokenPtr = &parsePtr->tokenPtr[wordIndex]; +		    } +		    parsePtr->numTokens = wordIndex + 2*elemCount; + +		    /* +		     * Generate a TCL_TOKEN_SIMPLE_WORD token sequence for +		     * each element of the literal list we are expanding in +		     * place. Take care with the start and size fields of each +		     * token so they point to the right literal characters in +		     * the original script to represent the right expanded +		     * word value. +		     */ + +		    listStart = nextElem = tokenPtr[1].start; +		    while (nextElem < listEnd) { +			int quoted; + +			tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; +			tokenPtr->numComponents = 1; + +			tokenPtr++; +			tokenPtr->type = TCL_TOKEN_TEXT; +			tokenPtr->numComponents = 0; +			TclFindElement(NULL, nextElem, listEnd - nextElem, +				&(tokenPtr->start), &nextElem, +				&(tokenPtr->size), NULL); + +			quoted = (tokenPtr->start[-1] == '{' +				|| tokenPtr->start[-1] == '"') +				&& tokenPtr->start > listStart; +			tokenPtr[-1].start = tokenPtr->start - quoted; +			tokenPtr[-1].size = tokenPtr->start + tokenPtr->size +				- tokenPtr[-1].start + quoted; + +			tokenPtr++; +		    } +		} +	    } else { +		/* +		 * The word to be expanded is not a literal, so defer +		 * expansion to compile/eval time by marking with a +		 * TCL_TOKEN_EXPAND_WORD token. +		 */ + +		tokenPtr->type = TCL_TOKEN_EXPAND_WORD;  	    } -	    parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; +	} else if ((tokenPtr->numComponents == 1) +		&& (tokenPtr[1].type == TCL_TOKEN_TEXT)) { +	    tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;  	} -	parsePtr->term = src; -	goto error; + +	/* Parse the whitespace between words. */ + +	scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); +	src += scanned; +	numBytes -= scanned;      } +} + +/* + *---------------------------------------------------------------------- + * + * TclIsSpaceProc -- + * + *	Report whether byte is in the set of whitespace characters used by + *	Tcl to separate words in scripts or elements in lists. + * + * Results: + *	Returns 1, if byte is in the set, 0 otherwise. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ -    parsePtr->commandSize = src - parsePtr->commandStart; -    return TCL_OK; +int +TclIsSpaceProc( +    char byte) +{ +    return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n'; +} + +/* + *---------------------------------------------------------------------- + * + * TclIsBareword-- + * + *	Report whether byte is one that can be part of a "bareword". + *	This concept is named in expression parsing, where it determines + *	what can be a legal function name, but is the same definition used + *	in determining what variable names can be parsed as variable + *	substitutions without the benefit of enclosing braces.  The set of + *	ASCII chars that are accepted are the numeric chars ('0'-'9'), + *	the alphabetic chars ('a'-'z', 'A'-'Z')	and underscore ('_'). + * + * Results: + *	Returns 1, if byte is in the accepted set of chars, 0 otherwise. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ -  error: -    Tcl_FreeParse(parsePtr); -    parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; -    return TCL_ERROR; +int +TclIsBareword( +    char byte) +{ +    if (byte < '0' || byte > 'z') { +	return 0; +    } +    if (byte <= '9' || byte >= 'a') { +	return 1; +    } +    if (byte == '_') { +	return 1; +    } +    if (byte < 'A' || byte > 'Z') { +	return 0; +    } +    return 1;  }  /*   *----------------------------------------------------------------------   * - * TclParseWhiteSpace -- + * ParseWhiteSpace --   * - *	Scans up to numBytes bytes starting at src, consuming white space as - *	defined by Tcl's parsing rules. + *	Scans up to numBytes bytes starting at src, consuming white space + *	between words as defined by Tcl's parsing rules.   *   * Results:   *	Returns the number of bytes recognized as white space. Records at @@ -497,18 +658,17 @@ Tcl_ParseCommand(   *----------------------------------------------------------------------   */ -int -TclParseWhiteSpace( -    CONST char *src,		/* First character to parse. */ +static int +ParseWhiteSpace( +    const char *src,		/* First character to parse. */      register int numBytes,	/* Max number of bytes to scan. */ -    Tcl_Parse *parsePtr,	/* Information about parse in progress. -				 * Updated if parsing indicates an incomplete -				 * command. */ +    int *incompletePtr,		/* Set this boolean memory to true if parsing +				 * indicates an incomplete command. */      char *typePtr)		/* Points to location to store character type  				 * of character that ends run of whitespace */  {      register char type = TYPE_NORMAL; -    register CONST char *p = src; +    register const char *p = src;      while (1) {  	while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) { @@ -525,9 +685,9 @@ TclParseWhiteSpace(  	    if (p[1] != '\n') {  		break;  	    } -	    p+=2; +	    p += 2;  	    if (--numBytes == 0) { -		parsePtr->incomplete = 1; +		*incompletePtr = 1;  		break;  	    }  	    continue; @@ -541,6 +701,47 @@ TclParseWhiteSpace(  /*   *----------------------------------------------------------------------   * + * TclParseAllWhiteSpace -- + * + *	Scans up to numBytes bytes starting at src, consuming all white space + *	including the command-terminating newline characters. + * + * Results: + *	Returns the number of bytes recognized as white space. + * + *---------------------------------------------------------------------- + */ + +static int +ParseAllWhiteSpace( +    const char *src,		/* First character to parse. */ +    int numBytes,		/* Max number of byes to scan */ +    int *incompletePtr)		/* Set true if parse is incomplete. */ +{ +    char type; +    const char *p = src; + +    do { +	int scanned = ParseWhiteSpace(p, numBytes, incompletePtr, &type); + +	p += scanned; +	numBytes -= scanned; +    } while (numBytes && (*p == '\n') && (p++, --numBytes)); +    return (p-src); +} + +int +TclParseAllWhiteSpace( +    const char *src,		/* First character to parse. */ +    int numBytes)		/* Max number of byes to scan */ +{ +    int dummy; +    return ParseAllWhiteSpace(src, numBytes, &dummy); +} + +/* + *---------------------------------------------------------------------- + *   * TclParseHex --   *   *	Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing @@ -562,23 +763,23 @@ TclParseWhiteSpace(  int  TclParseHex( -    CONST char *src,		/* First character to parse. */ +    const char *src,		/* First character to parse. */      int numBytes,		/* Max number of byes to scan */ -    Tcl_UniChar *resultPtr)	/* Points to storage provided by caller where -				 * the Tcl_UniChar resulting from the +    int *resultPtr)	/* Points to storage provided by caller where +				 * the character resulting from the  				 * conversion is to be written. */  { -    Tcl_UniChar result = 0; -    register CONST char *p = src; +    int result = 0; +    register const char *p = src;      while (numBytes--) {  	unsigned char digit = UCHAR(*p); -	if (!isxdigit(digit)) { +	if (!isxdigit(digit) || (result > 0x10fff)) {  	    break;  	} -	++p; +	p++;  	result <<= 4;  	if (digit >= 'a') { @@ -603,21 +804,21 @@ TclParseHex(   *	sequence as defined by Tcl's parsing rules.   *   * Results: - * 	Records at readPtr the number of bytes making up the backslash - * 	sequence. Records at dst the UTF-8 encoded equivalent of that - * 	backslash sequence. Returns the number of bytes written to dst, at - * 	most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results - * 	are not needed, but the return value is the same either way. + *	Records at readPtr the number of bytes making up the backslash + *	sequence. Records at dst the UTF-8 encoded equivalent of that + *	backslash sequence. Returns the number of bytes written to dst, at + *	most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + *	are not needed, but the return value is the same either way.   *   * Side effects: - * 	None. + *	None.   *   *----------------------------------------------------------------------   */  int  TclParseBackslash( -    CONST char *src,		/* Points to the backslash character of a a +    const char *src,		/* Points to the backslash character of a a  				 * backslash sequence. */      int numBytes,		/* Max number of bytes to scan. */      int *readPtr,		/* NULL, or points to storage where the number @@ -627,8 +828,9 @@ TclParseBackslash(  				 * written. At most TCL_UTF_MAX bytes will be  				 * written there. */  { -    register CONST char *p = src+1; -    Tcl_UniChar result; +    register const char *p = src+1; +    Tcl_UniChar unichar; +    int result;      int count;      char buf[TCL_UTF_MAX]; @@ -685,7 +887,7 @@ TclParseBackslash(  	result = 0xb;  	break;      case 'x': -	count += TclParseHex(p+1, numBytes-1, &result); +	count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);  	if (count == 2) {  	    /*  	     * No hexadigits -> This is just "x". @@ -700,7 +902,7 @@ TclParseBackslash(  	}  	break;      case 'u': -	count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result); +	count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);  	if (count == 2) {  	    /*  	     * No hexadigits -> This is just "u". @@ -708,6 +910,15 @@ TclParseBackslash(  	    result = 'u';  	}  	break; +    case 'U': +	count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); +	if (count == 2) { +	    /* +	     * No hexadigits -> This is just "U". +	     */ +	    result = 'U'; +	} +	break;      case '\n':  	count--;  	do { @@ -726,21 +937,21 @@ TclParseBackslash(  	 */  	if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) {	/* INTL: digit */ -	    result = (unsigned char)(*p - '0'); +	    result = *p - '0';  	    p++;  	    if ((numBytes == 2) || !isdigit(UCHAR(*p))	/* INTL: digit */  		    || (UCHAR(*p) >= '8')) {  		break;  	    }  	    count = 3; -	    result = (unsigned char)((result << 3) + (*p - '0')); +	    result = (result << 3) + (*p - '0');  	    p++;  	    if ((numBytes == 3) || !isdigit(UCHAR(*p))	/* INTL: digit */ -		    || (UCHAR(*p) >= '8')) { +		    || (UCHAR(*p) >= '8') || (result >= 0x20)) {  		break;  	    }  	    count = 4; -	    result = (unsigned char)((result << 3) + (*p - '0')); +	    result = UCHAR((result << 3) + (*p - '0'));  	    break;  	} @@ -752,14 +963,15 @@ TclParseBackslash(  	 */  	if (Tcl_UtfCharComplete(p, numBytes - 1)) { -	    count = Tcl_UtfToUniChar(p, &result) + 1;	/* +1 for '\' */ +	    count = Tcl_UtfToUniChar(p, &unichar) + 1;	/* +1 for '\' */  	} else {  	    char utfBytes[TCL_UTF_MAX];  	    memcpy(utfBytes, p, (size_t) (numBytes - 1));  	    utfBytes[numBytes - 1] = '\0'; -	    count = Tcl_UtfToUniChar(utfBytes, &result) + 1; +	    count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;  	} +	result = unichar;  	break;      } @@ -767,7 +979,7 @@ TclParseBackslash(      if (readPtr != NULL) {  	*readPtr = count;      } -    return Tcl_UniCharToUtf((int) result, dst); +    return Tcl_UniCharToUtf(result, dst);  }  /* @@ -779,33 +991,30 @@ TclParseBackslash(   *	defined by Tcl's parsing rules.   *   * Results: - * 	Records in parsePtr information about the parse. Returns the number of - * 	bytes consumed. + *	Records in parsePtr information about the parse. Returns the number of + *	bytes consumed.   *   * Side effects: - * 	None. + *	None.   *   *----------------------------------------------------------------------   */  static int  ParseComment( -    CONST char *src,		/* First character to parse. */ +    const char *src,		/* First character to parse. */      register int numBytes,	/* Max number of bytes to scan. */      Tcl_Parse *parsePtr)	/* Information about parse in progress.  				 * Updated if parsing indicates an incomplete  				 * command. */  { -    register CONST char *p = src; -    while (numBytes) { -	char type; -	int scanned; +    register const char *p = src; +    int incomplete = parsePtr->incomplete; -	do { -	    scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); -	    p += scanned; -	    numBytes -= scanned; -	} while (numBytes && (*p == '\n') && (p++,numBytes--)); +    while (numBytes) { +	int scanned = ParseAllWhiteSpace(p, numBytes, &incomplete); +	p += scanned; +	numBytes -= scanned;  	if ((numBytes == 0) || (*p != '#')) {  	    break; @@ -814,34 +1023,28 @@ ParseComment(  	    parsePtr->commentStart = p;  	} +	p++; +	numBytes--;  	while (numBytes) { +	    if (*p == '\n') { +		p++; +		numBytes--; +		break; +	    }  	    if (*p == '\\') { -		scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); -		if (scanned) { -		    p += scanned; -		    numBytes -= scanned; -		} else { -		    /* -		     * General backslash substitution in comments isn't part -		     * of the formal spec, but test parse-15.47 and history -		     * indicate that it has been the de facto rule. Don't -		     * change it now. -		     */ - -		    TclParseBackslash(p, numBytes, &scanned, NULL); -		    p += scanned; -		    numBytes -= scanned; -		} -	    } else {  		p++;  		numBytes--; -		if (p[-1] == '\n') { +		if (numBytes == 0) {  		    break;  		}  	    } +	    incomplete = (*p == '\n'); +	    p++; +	    numBytes--;  	}  	parsePtr->commentSize = p - parsePtr->commentStart;      } +    parsePtr->incomplete = incomplete;      return (p - src);  } @@ -872,7 +1075,7 @@ ParseComment(  static int  ParseTokens( -    register CONST char *src,	/* First character to parse. */ +    register const char *src,	/* First character to parse. */      register int numBytes,	/* Max number of bytes to scan. */      int mask,			/* Specifies when to stop parsing. The parse  				 * stops at the first unquoted character whose @@ -887,12 +1090,11 @@ ParseTokens(  				 * termination information. */  {      char type; -    int originalTokens, varToken; +    int originalTokens;      int noSubstCmds = !(flags & TCL_SUBST_COMMANDS);      int noSubstVars = !(flags & TCL_SUBST_VARIABLES);      int noSubstBS = !(flags & TCL_SUBST_BACKSLASHES);      Tcl_Token *tokenPtr; -    Tcl_Parse nested;      /*       * Each iteration through the following loop adds one token of type @@ -903,9 +1105,7 @@ ParseTokens(      originalTokens = parsePtr->numTokens;      while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) { -	if (parsePtr->numTokens == parsePtr->tokensAvailable) { -	    TclExpandTokenArray(parsePtr); -	} +	TclGrowParseTokenArray(parsePtr, 1);  	tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];  	tokenPtr->start = src;  	tokenPtr->numComponents = 0; @@ -924,6 +1124,8 @@ ParseTokens(  	    tokenPtr->size = src - tokenPtr->start;  	    parsePtr->numTokens++;  	} else if (*src == '$') { +	    int varToken; +  	    if (noSubstVars) {  		tokenPtr->type = TCL_TOKEN_TEXT;  		tokenPtr->size = 1; @@ -939,13 +1141,15 @@ ParseTokens(  	     */  	    varToken = parsePtr->numTokens; -	    if (Tcl_ParseVarName(parsePtr->interp, src, numBytes, -		    parsePtr, 1) != TCL_OK) { +	    if (Tcl_ParseVarName(parsePtr->interp, src, numBytes, parsePtr, +		    1) != TCL_OK) {  		return TCL_ERROR;  	    }  	    src += parsePtr->tokenPtr[varToken].size;  	    numBytes -= parsePtr->tokenPtr[varToken].size;  	} else if (*src == '[') { +	    Tcl_Parse *nestedPtr; +  	    if (noSubstCmds) {  		tokenPtr->type = TCL_TOKEN_TEXT;  		tokenPtr->size = 1; @@ -963,25 +1167,22 @@ ParseTokens(  	    src++;  	    numBytes--; +	    nestedPtr = TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));  	    while (1) { -		if (Tcl_ParseCommand(parsePtr->interp, src, -			numBytes, 1, &nested) != TCL_OK) { -		    parsePtr->errorType = nested.errorType; -		    parsePtr->term = nested.term; -		    parsePtr->incomplete = nested.incomplete; +		const char *curEnd; + +		if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, +			nestedPtr) != TCL_OK) { +		    parsePtr->errorType = nestedPtr->errorType; +		    parsePtr->term = nestedPtr->term; +		    parsePtr->incomplete = nestedPtr->incomplete; +		    TclStackFree(parsePtr->interp, nestedPtr);  		    return TCL_ERROR;  		} -		src = nested.commandStart + nested.commandSize; -		numBytes = parsePtr->end - src; - -		/* -		 * This is equivalent to Tcl_FreeParse(&nested), but -		 * presumably inlined here for sake of runtime optimization -		 */ - -		if (nested.tokenPtr != nested.staticTokens) { -		    ckfree((char *) nested.tokenPtr); -		} +		curEnd = src + numBytes; +		src = nestedPtr->commandStart + nestedPtr->commandSize; +		numBytes = curEnd - src; +		Tcl_FreeParse(nestedPtr);  		/*  		 * Check for the closing ']' that ends the command @@ -989,21 +1190,24 @@ ParseTokens(  		 * parsed command.  		 */ -		if ((nested.term < parsePtr->end) && (*nested.term == ']') -			&& !nested.incomplete) { +		if ((nestedPtr->term < parsePtr->end) +			&& (*(nestedPtr->term) == ']') +			&& !(nestedPtr->incomplete)) {  		    break;  		}  		if (numBytes == 0) {  		    if (parsePtr->interp != NULL) { -			Tcl_SetResult(parsePtr->interp, -				"missing close-bracket", TCL_STATIC); +			Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +				"missing close-bracket", -1));  		    }  		    parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;  		    parsePtr->term = tokenPtr->start;  		    parsePtr->incomplete = 1; +		    TclStackFree(parsePtr->interp, nestedPtr);  		    return TCL_ERROR;  		}  	    } +	    TclStackFree(parsePtr->interp, nestedPtr);  	    tokenPtr->type = TCL_TOKEN_COMMAND;  	    tokenPtr->size = src - tokenPtr->start;  	    parsePtr->numTokens++; @@ -1074,9 +1278,7 @@ ParseTokens(  	 * empty range, so that there is always at least one token added.  	 */ -	if (parsePtr->numTokens == parsePtr->tokensAvailable) { -	    TclExpandTokenArray(parsePtr); -	} +	TclGrowParseTokenArray(parsePtr, 1);  	tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];  	tokenPtr->start = src;  	tokenPtr->numComponents = 0; @@ -1114,7 +1316,7 @@ Tcl_FreeParse(  				 * call to Tcl_ParseCommand. */  {      if (parsePtr->tokenPtr != parsePtr->staticTokens) { -	ckfree((char *) parsePtr->tokenPtr); +	ckfree(parsePtr->tokenPtr);  	parsePtr->tokenPtr = parsePtr->staticTokens;      }  } @@ -1122,44 +1324,6 @@ Tcl_FreeParse(  /*   *----------------------------------------------------------------------   * - * TclExpandTokenArray -- - * - *	This function is invoked when the current space for tokens in a - *	Tcl_Parse structure fills up; it allocates memory to grow the token - *	array - * - * Results: - *	None. - * - * Side effects: - *	Memory is allocated for a new larger token array; the memory for the - *	old array is freed, if it had been dynamically allocated. - * - *---------------------------------------------------------------------- - */ - -void -TclExpandTokenArray( -    Tcl_Parse *parsePtr)	/* Parse structure whose token space has -				 * overflowed. */ -{ -    int newCount; -    Tcl_Token *newPtr; - -    newCount = parsePtr->tokensAvailable*2; -    newPtr = (Tcl_Token *) ckalloc((unsigned) (newCount * sizeof(Tcl_Token))); -    memcpy((VOID *) newPtr, (VOID *) parsePtr->tokenPtr, -	    (size_t) (parsePtr->tokensAvailable * sizeof(Tcl_Token))); -    if (parsePtr->tokenPtr != parsePtr->staticTokens) { -	ckfree((char *) parsePtr->tokenPtr); -    } -    parsePtr->tokenPtr = newPtr; -    parsePtr->tokensAvailable = newCount; -} - -/* - *---------------------------------------------------------------------- - *   * Tcl_ParseVarName --   *   *	Given a string starting with a $ sign, parse off a variable name and @@ -1188,7 +1352,7 @@ int  Tcl_ParseVarName(      Tcl_Interp *interp,		/* Interpreter to use for error reporting; if  				 * NULL, then no error message is provided. */ -    CONST char *start,		/* Start of variable substitution string. +    const char *start,		/* Start of variable substitution string.  				 * First character must be "$". */      register int numBytes,	/* Total number of bytes in string. If < 0,  				 * the string consists of all bytes up to the @@ -1201,10 +1365,8 @@ Tcl_ParseVarName(  				 * reinitialize it. */  {      Tcl_Token *tokenPtr; -    register CONST char *src; -    unsigned char c; -    int varIndex, offset; -    Tcl_UniChar ch; +    register const char *src; +    int varIndex;      unsigned array;      if ((numBytes == 0) || (start == NULL)) { @@ -1224,9 +1386,7 @@ Tcl_ParseVarName(       */      src = start; -    if ((parsePtr->numTokens + 2) > parsePtr->tokensAvailable) { -	TclExpandTokenArray(parsePtr); -    } +    TclGrowParseTokenArray(parsePtr, 2);      tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];      tokenPtr->type = TCL_TOKEN_VARIABLE;      tokenPtr->start = src; @@ -1270,9 +1430,9 @@ Tcl_ParseVarName(  	    src++;  	}  	if (numBytes == 0) { -	    if (interp != NULL) { -		Tcl_SetResult(interp, "missing close-brace for variable name", -			TCL_STATIC); +	    if (parsePtr->interp != NULL) { +		Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +			"missing close-brace for variable name", -1));  	    }  	    parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;  	    parsePtr->term = tokenPtr->start-1; @@ -1289,22 +1449,12 @@ Tcl_ParseVarName(  	tokenPtr->numComponents = 0;  	while (numBytes) { -	    if (Tcl_UtfCharComplete(src, numBytes)) { -		offset = Tcl_UtfToUniChar(src, &ch); -	    } else { -		char utfBytes[TCL_UTF_MAX]; - -		memcpy(utfBytes, src, (size_t) numBytes); -		utfBytes[numBytes] = '\0'; -		offset = Tcl_UtfToUniChar(utfBytes, &ch); -	    } -	    c = UCHAR(ch); -	    if (isalnum(c) || (c == '_')) { /* INTL: ISO only, UCHAR. */ -		src += offset; -		numBytes -= offset; +	    if (TclIsBareword(*src)) { +		src += 1; +		numBytes -= 1;  		continue;  	    } -	    if ((c == ':') && (numBytes != 1) && (src[1] == ':')) { +	    if ((src[0] == ':') && (numBytes != 1) && (src[1] == ':')) {  		src += 2;  		numBytes -= 2;  		while (numBytes && (*src == ':')) { @@ -1337,10 +1487,10 @@ Tcl_ParseVarName(  		    TCL_SUBST_ALL, parsePtr)) {  		goto error;  	    } -	    if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')) { +	    if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){  		if (parsePtr->interp != NULL) { -		    Tcl_SetResult(parsePtr->interp, "missing )", -			    TCL_STATIC); +		    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +			    "missing )", -1));  		}  		parsePtr->errorType = TCL_PARSE_MISSING_PAREN;  		parsePtr->term = src; @@ -1394,35 +1544,40 @@ Tcl_ParseVarName(   *----------------------------------------------------------------------   */ -CONST char * +const char *  Tcl_ParseVar( -    Tcl_Interp *interp,			/* Context for looking up variable. */ -    register CONST char *start,		/* Start of variable substitution. -					 * First character must be "$". */ -    CONST char **termPtr)		/* If non-NULL, points to word to fill -					 * in with character just after last -					 * one in the variable specifier. */ +    Tcl_Interp *interp,		/* Context for looking up variable. */ +    register const char *start,	/* Start of variable substitution. First +				 * character must be "$". */ +    const char **termPtr)	/* If non-NULL, points to word to fill in with +				 * character just after last one in the +				 * variable specifier. */  { -    Tcl_Parse parse;      register Tcl_Obj *objPtr;      int code; +    Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); -    if (Tcl_ParseVarName(interp, start, -1, &parse, 0) != TCL_OK) { +    if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { +	TclStackFree(interp, parsePtr);  	return NULL;      }      if (termPtr != NULL) { -	*termPtr = start + parse.tokenPtr->size; +	*termPtr = start + parsePtr->tokenPtr->size;      } -    if (parse.numTokens == 1) { +    if (parsePtr->numTokens == 1) {  	/*  	 * There isn't a variable name after all: the $ is just a $.  	 */ +	TclStackFree(interp, parsePtr);  	return "$";      } -    code = TclSubstTokens(interp, parse.tokenPtr, parse.numTokens, NULL); +    code = TclSubstTokens(interp, parsePtr->tokenPtr, parsePtr->numTokens, +	    NULL, 1, NULL, NULL); +    Tcl_FreeParse(parsePtr); +    TclStackFree(interp, parsePtr);      if (code != TCL_OK) {  	return NULL;      } @@ -1432,16 +1587,13 @@ Tcl_ParseVar(       * At this point we should have an object containing the value of a       * variable. Just return the string from that object.       * -     * This should have returned the object for the user to manage, but -     * instead we have some weak reference to the string value in the object, -     * which is why we make sure the object exists after resetting the result. -     * This isn't ideal, but it's the best we can do with the current -     * documented interface. -- hobbs +     * Since TclSubstTokens above returned TCL_OK, we know that objPtr +     * is shared.  It is in both the interp result and the value of the +     * variable.  Returning the string relies on that to be true.       */ -    if (!Tcl_IsShared(objPtr)) { -	Tcl_IncrRefCount(objPtr); -    } +    assert( Tcl_IsShared(objPtr) ); +      Tcl_ResetResult(interp);      return TclGetString(objPtr);  } @@ -1478,26 +1630,25 @@ int  Tcl_ParseBraces(      Tcl_Interp *interp,		/* Interpreter to use for error reporting; if  				 * NULL, then no error message is provided. */ -    CONST char *start,		/* Start of string enclosed in braces. The +    const char *start,		/* Start of string enclosed in braces. The  				 * first character must be {'. */      register int numBytes,	/* Total number of bytes in string. If < 0,  				 * the string consists of all bytes up to the  				 * first null character. */      register Tcl_Parse *parsePtr, -    				/* Structure to fill in with information about +				/* Structure to fill in with information about  				 * the string. */      int append,			/* Non-zero means append tokens to existing  				 * information in parsePtr; zero means ignore  				 * existing tokens in parsePtr and  				 * reinitialize it. */ -    CONST char **termPtr)	/* If non-NULL, points to word in which to +    const char **termPtr)	/* If non-NULL, points to word in which to  				 * store a pointer to the character just after  				 * the terminating '}' if the parse was  				 * successful. */ -  {      Tcl_Token *tokenPtr; -    register CONST char *src; +    register const char *src;      int startIndex, level, length;      if ((numBytes == 0) || (start == NULL)) { @@ -1514,9 +1665,7 @@ Tcl_ParseBraces(      src = start;      startIndex = parsePtr->numTokens; -    if (parsePtr->numTokens == parsePtr->tokensAvailable) { -	TclExpandTokenArray(parsePtr); -    } +    TclGrowParseTokenArray(parsePtr, 1);      tokenPtr = &parsePtr->tokenPtr[startIndex];      tokenPtr->type = TCL_TOKEN_TEXT;      tokenPtr->start = src+1; @@ -1579,9 +1728,7 @@ Tcl_ParseBraces(  		if (tokenPtr->size != 0) {  		    parsePtr->numTokens++;  		} -		if ((parsePtr->numTokens+1) >= parsePtr->tokensAvailable) { -		    TclExpandTokenArray(parsePtr); -		} +		TclGrowParseTokenArray(parsePtr, 2);  		tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];  		tokenPtr->type = TCL_TOKEN_BS;  		tokenPtr->start = src; @@ -1607,7 +1754,7 @@ Tcl_ParseBraces(      parsePtr->errorType = TCL_PARSE_MISSING_BRACE;      parsePtr->term = start;      parsePtr->incomplete = 1; -    if (interp == NULL) { +    if (parsePtr->interp == NULL) {  	/*  	 * Skip straight to the exit code since we have no interpreter to put  	 * error message in. @@ -1616,7 +1763,8 @@ Tcl_ParseBraces(  	goto error;      } -    Tcl_SetResult(interp, "missing close-brace", TCL_STATIC); +    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +	    "missing close-brace", -1));      /*       * Guess if the problem is due to comments by searching the source string @@ -1628,7 +1776,7 @@ Tcl_ParseBraces(      {  	register int openBrace = 0; -	for (; src > start; src--) { +	while (--src > start) {  	    switch (*src) {  	    case '{':  		openBrace = 1; @@ -1637,10 +1785,9 @@ Tcl_ParseBraces(  		openBrace = 0;  		break;  	    case '#' : -		if (openBrace && (isspace(UCHAR(src[-1])))) { -		    Tcl_AppendResult(interp, -			    ": possible unbalanced brace in comment", -			    (char *) NULL); +		if (openBrace && TclIsSpaceProc(src[-1])) { +		    Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), +			    ": possible unbalanced brace in comment", -1);  		    goto error;  		}  		break; @@ -1685,19 +1832,19 @@ int  Tcl_ParseQuotedString(      Tcl_Interp *interp,		/* Interpreter to use for error reporting; if  				 * NULL, then no error message is provided. */ -    CONST char *start,		/* Start of the quoted string. The first +    const char *start,		/* Start of the quoted string. The first  				 * character must be '"'. */      register int numBytes,	/* Total number of bytes in string. If < 0,  				 * the string consists of all bytes up to the  				 * first null character. */      register Tcl_Parse *parsePtr, -    				/* Structure to fill in with information about +				/* Structure to fill in with information about  				 * the string. */      int append,			/* Non-zero means append tokens to existing  				 * information in parsePtr; zero means ignore  				 * existing tokens in parsePtr and  				 * reinitialize it. */ -    CONST char **termPtr)	/* If non-NULL, points to word in which to +    const char **termPtr)	/* If non-NULL, points to word in which to  				 * store a pointer to the character just after  				 * the quoted string's terminating close-quote  				 * if the parse succeeds. */ @@ -1713,13 +1860,14 @@ Tcl_ParseQuotedString(  	TclParseInit(interp, start, numBytes, parsePtr);      } -    if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, -	    TCL_SUBST_ALL, parsePtr)) { +    if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, TCL_SUBST_ALL, +	    parsePtr)) {  	goto error;      }      if (*parsePtr->term != '"') { -	if (interp != NULL) { -	    Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC); +	if (parsePtr->interp != NULL) { +	    Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( +		    "missing \"", -1));  	}  	parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;  	parsePtr->term = start; @@ -1739,35 +1887,44 @@ Tcl_ParseQuotedString(  /*   *----------------------------------------------------------------------   * - * Tcl_SubstObj -- + * TclSubstParse --   * - *	This function performs the substitutions specified on the given string - *	as described in the user documentation for the "subst" Tcl command. + *	Token parser used by the [subst] command. Parses the string made up of + *	'numBytes' bytes starting at 'bytes'. Parsing is controlled by the + *	flags argument to provide support for the -nobackslashes, -nocommands, + *	and -novariables options, as represented by the flag values + *	TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES.   *   * Results: - *	A Tcl_Obj* containing the substituted string, or NULL to indicate that - *	an error occurred. + *	None.   *   * Side effects: - *	See the user documentation. + *	The Tcl_Parse struct '*parsePtr' is filled with parse results. + *	The caller is expected to eventually call Tcl_FreeParse() to properly + *	cleanup the value written there. + * + *	If a parse error occurs, the Tcl_InterpState value '*statePtr' is + *	filled with the state created by that error. When *statePtr is written + *	to, the caller is expected to make the required calls to either + *	Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the + *	value written there.   *   *----------------------------------------------------------------------   */ -Tcl_Obj * -Tcl_SubstObj( -    Tcl_Interp *interp,		/* Interpreter in which substitution occurs */ -    Tcl_Obj *objPtr,		/* The value to be substituted. */ -    int flags)			/* What substitutions to do. */ +void +TclSubstParse( +    Tcl_Interp *interp, +    const char *bytes, +    int numBytes, +    int flags, +    Tcl_Parse *parsePtr, +    Tcl_InterpState *statePtr)  { -    int length, tokensLeft, code; -    Tcl_Parse parse; -    Tcl_Token *endTokenPtr; -    Tcl_Obj *result; -    Tcl_Obj *errMsg = NULL; -    CONST char *p = Tcl_GetStringFromObj(objPtr, &length); +    int length = numBytes; +    const char *p = bytes; -    TclParseInit(interp, p, length, &parse); +    TclParseInit(interp, p, length, parsePtr);      /*       * First parse the string rep of objPtr, as if it were enclosed as a @@ -1775,14 +1932,13 @@ Tcl_SubstObj(       * inhibit types of substitution.       */ -    if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, &parse)) { +    if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {  	/* -	 * There was a parse error. Save the error message for possible -	 * reporting later. +	 * There was a parse error. Save the interpreter state for possible +	 * error reporting later.  	 */ -	errMsg = Tcl_GetObjResult(interp); -	Tcl_IncrRefCount(errMsg); +	*statePtr = Tcl_SaveInterpState(interp, TCL_ERROR);  	/*  	 * We need to re-parse to get the portion of the string we can [subst] @@ -1797,18 +1953,19 @@ Tcl_SubstObj(  	 */  	do { -	    parse.numTokens = 0; -	    parse.tokensAvailable = NUM_STATIC_TOKENS; -	    parse.end = parse.term; -	    parse.incomplete = 0; -	    parse.errorType = TCL_PARSE_SUCCESS; -	} while (TCL_OK != ParseTokens(p, parse.end - p, 0, flags, &parse)); +	    parsePtr->numTokens = 0; +	    parsePtr->tokensAvailable = NUM_STATIC_TOKENS; +	    parsePtr->end = parsePtr->term; +	    parsePtr->incomplete = 0; +	    parsePtr->errorType = TCL_PARSE_SUCCESS; +	} while (TCL_OK != +		ParseTokens(p, parsePtr->end - p, 0, flags, parsePtr));  	/*  	 * The good parse will have to be followed by {, (, or [.  	 */ -	switch (*parse.term) { +	switch (*(parsePtr->term)) {  	case '{':  	    /*  	     * Parse error was a missing } in a ${varname} variable @@ -1825,7 +1982,7 @@ Tcl_SubstObj(  	     * array variable substitution at the toplevel.  	     */ -	    if (*(parse.term - 1) == '$') { +	    if (*(parsePtr->term - 1) == '$') {  		/*  		 * Special case where removing the array index left us with  		 * just a dollar sign (array variable with name the empty @@ -1844,15 +2001,15 @@ Tcl_SubstObj(  		 */  		Tcl_Token *varTokenPtr = -			parse.tokenPtr + parse.numTokens - 2; +			parsePtr->tokenPtr + parsePtr->numTokens - 2;  		if (varTokenPtr->type != TCL_TOKEN_VARIABLE) { -		    Tcl_Panic("Tcl_SubstObj: programming error"); +		    Tcl_Panic("TclSubstParse: programming error");  		}  		if (varTokenPtr[1].type != TCL_TOKEN_TEXT) { -		    Tcl_Panic("Tcl_SubstObj: programming error"); +		    Tcl_Panic("TclSubstParse: programming error");  		} -		parse.numTokens -= 2; +		parsePtr->numTokens -= 2;  	    }  	    break;  	case '[': @@ -1861,9 +2018,9 @@ Tcl_SubstObj(  	     * substitution.  	     */ -	    parse.end = p + length; -	    p = parse.term + 1; -	    length = parse.end - p; +	    parsePtr->end = p + length; +	    p = parsePtr->term + 1; +	    length = parsePtr->end - p;  	    if (length == 0) {  		/*  		 * No commands, just an unmatched [. As in previous cases, @@ -1878,15 +2035,16 @@ Tcl_SubstObj(  		 */  		Tcl_Token *tokenPtr; -		Tcl_Parse nested; -		CONST char *lastTerm = parse.term; +		const char *lastTerm = parsePtr->term; +		Tcl_Parse *nestedPtr = +			TclStackAlloc(interp, sizeof(Tcl_Parse));  		while (TCL_OK == -			Tcl_ParseCommand(NULL, p, length, 0, &nested)) { -		    Tcl_FreeParse(&nested); -		    p = nested.term + (nested.term < nested.end); -		    length = nested.end - p; -		    if ((length == 0) && (nested.term == nested.end)) { +			Tcl_ParseCommand(NULL, p, length, 0, nestedPtr)) { +		    Tcl_FreeParse(nestedPtr); +		    p = nestedPtr->term + (nestedPtr->term < nestedPtr->end); +		    length = nestedPtr->end - p; +		    if ((length == 0) && (nestedPtr->term == nestedPtr->end)) {  			/*  			 * If we run out of string, blame the missing close  			 * bracket on the last command, and do not evaluate it @@ -1895,10 +2053,11 @@ Tcl_SubstObj(  			break;  		    } -		    lastTerm = nested.term; +		    lastTerm = nestedPtr->term;  		} +		TclStackFree(interp, nestedPtr); -		if (lastTerm == parse.term) { +		if (lastTerm == parsePtr->term) {  		    /*  		     * Parse error in first command. No commands to subst, add  		     * no more tokens. @@ -1911,73 +2070,19 @@ Tcl_SubstObj(  		 * got parsed.  		 */ -		if (parse.numTokens == parse.tokensAvailable) { -		    TclExpandTokenArray(&parse); -		} -		tokenPtr = &parse.tokenPtr[parse.numTokens]; -		tokenPtr->start = parse.term; +		TclGrowParseTokenArray(parsePtr, 1); +		tokenPtr = &(parsePtr->tokenPtr[parsePtr->numTokens]); +		tokenPtr->start = parsePtr->term;  		tokenPtr->numComponents = 0;  		tokenPtr->type = TCL_TOKEN_COMMAND;  		tokenPtr->size = lastTerm - tokenPtr->start + 1; -		parse.numTokens++; +		parsePtr->numTokens++;  	    }  	    break;  	default: -	    Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]); -	} -    } - -    /* -     * Next, substitute the parsed tokens just as in normal Tcl evaluation. -     */ - -    endTokenPtr = parse.tokenPtr + parse.numTokens; -    tokensLeft = parse.numTokens; -    code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, -	    &tokensLeft); -    if (code == TCL_OK) { -	Tcl_FreeParse(&parse); -	if (errMsg != NULL) { -	    Tcl_SetObjResult(interp, errMsg); -	    Tcl_DecrRefCount(errMsg); -	    return NULL; -	} -	return Tcl_GetObjResult(interp); -    } - -    result = Tcl_NewObj(); -    while (1) { -	switch (code) { -	case TCL_ERROR: -	    Tcl_FreeParse(&parse); -	    Tcl_DecrRefCount(result); -	    if (errMsg != NULL) { -		Tcl_DecrRefCount(errMsg); -	    } -	    return NULL; -	case TCL_BREAK: -	    tokensLeft = 0;		/* Halt substitution */ -	default: -	    Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp)); +	    Tcl_Panic("bad parse in TclSubstParse: %c", p[length]);  	} - -	if (tokensLeft == 0) { -	    Tcl_FreeParse(&parse); -	    if (errMsg != NULL) { -		if (code != TCL_BREAK) { -		    Tcl_DecrRefCount(result); -		    Tcl_SetObjResult(interp, errMsg); -		    Tcl_DecrRefCount(errMsg); -		    return NULL; -		} -		Tcl_DecrRefCount(errMsg); -	    } -	    return result; -	} - -	code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, -		&tokensLeft);      }  } @@ -1992,13 +2097,13 @@ Tcl_SubstObj(   *	non-TCL_OK completion code arises.   *   * Results: - * 	The return value is a standard Tcl completion code. The result in - * 	interp is the substituted value, or an error message if TCL_ERROR is - * 	returned. If tokensLeftPtr is not NULL, then it points to an int where - * 	the number of tokens remaining to be processed is written. + *	The return value is a standard Tcl completion code. The result in + *	interp is the substituted value, or an error message if TCL_ERROR is + *	returned. If tokensLeftPtr is not NULL, then it points to an int where + *	the number of tokens remaining to be processed is written.   *   * Side effects: - * 	Can be anything, depending on the types of substitution done. + *	Can be anything, depending on the types of substitution done.   *   *----------------------------------------------------------------------   */ @@ -2012,12 +2117,35 @@ TclSubstTokens(  				 * evaluate and concatenate. */      int count,			/* Number of tokens to consider at tokenPtr.  				 * Must be at least 1. */ -    int *tokensLeftPtr)		/* If not NULL, points to memory where an +    int *tokensLeftPtr,		/* If not NULL, points to memory where an  				 * integer representing the number of tokens  				 * left to be substituted will be written */ +    int line,			/* The line the script starts on. */ +    int *clNextOuter,		/* Information about an outer context for */ +    const char *outerScript)	/* continuation line data. This is set by +				 * EvalEx() to properly handle [...]-nested +				 * commands. The 'outerScript' refers to the +				 * most-outer script containing the embedded +				 * command, which is refered to by 'script'. +				 * The 'clNextOuter' refers to the current +				 * entry in the table of continuation lines in +				 * this "master script", and the character +				 * offsets are relative to the 'outerScript' +				 * as well. +				 * +				 * If outerScript == script, then this call is +				 * for words in the outer-most script or +				 * command. See Tcl_EvalEx and TclEvalObjEx +				 * for the places generating arguments for +				 * which this is true. */  {      Tcl_Obj *result;      int code = TCL_OK; +#define NUM_STATIC_POS 20 +    int isLiteral, maxNumCL, numCL, i, adjust; +    int *clPosition = NULL; +    Interp *iPtr = (Interp *) interp; +    int inFile = iPtr->evalFlags & TCL_EVAL_FILE;      /*       * Each pass through this loop will substitute one token, and its @@ -2029,10 +2157,35 @@ TclSubstTokens(       * of Tcl_SetObjResult(interp, Tcl_GetObjResult(interp)) and omit them.       */ +    /* +     * For the handling of continuation lines in literals we first check if +     * this is actually a literal. For if not we can forego the additional +     * processing. Otherwise we pre-allocate a small table to store the +     * locations of all continuation lines we find in this literal, if any. +     * The table is extended if needed. +     */ + +    numCL = 0; +    maxNumCL = 0; +    isLiteral = 1; +    for (i=0 ; i < count; i++) { +	if ((tokenPtr[i].type != TCL_TOKEN_TEXT) +		&& (tokenPtr[i].type != TCL_TOKEN_BS)) { +	    isLiteral = 0; +	    break; +	} +    } + +    if (isLiteral) { +	maxNumCL = NUM_STATIC_POS; +	clPosition = ckalloc(maxNumCL * sizeof(int)); +    } + +    adjust = 0;      result = NULL;      for (; count>0 && code==TCL_OK ; count--, tokenPtr++) {  	Tcl_Obj *appendObj = NULL; -	CONST char *append = NULL; +	const char *append = NULL;  	int appendByteLength = 0;  	char utfCharBytes[TCL_UTF_MAX]; @@ -2043,21 +2196,79 @@ TclSubstTokens(  	    break;  	case TCL_TOKEN_BS: -	    appendByteLength = Tcl_UtfBackslash(tokenPtr->start, (int *) NULL, -		    utfCharBytes); +	    appendByteLength = TclParseBackslash(tokenPtr->start, +		    tokenPtr->size, NULL, utfCharBytes);  	    append = utfCharBytes; + +	    /* +	     * If the backslash sequence we found is in a literal, and +	     * represented a continuation line, we compute and store its +	     * location (as char offset to the beginning of the _result_ +	     * script). We may have to extend the table of locations. +	     * +	     * Note that the continuation line information is relevant even if +	     * the word we are processing is not a literal, as it can affect +	     * nested commands. See the branch for TCL_TOKEN_COMMAND below, +	     * where the adjustment we are tracking here is taken into +	     * account. The good thing is that we do not need a table of +	     * everything, just the number of lines we have to add as +	     * correction. +	     */ + +	    if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') +		    && (tokenPtr->start[1] == '\n')) { +		if (isLiteral) { +		    int clPos; + +		    if (result == 0) { +			clPos = 0; +		    } else { +			TclGetStringFromObj(result, &clPos); +		    } + +		    if (numCL >= maxNumCL) { +			maxNumCL *= 2; +			clPosition = ckrealloc(clPosition, +				maxNumCL * sizeof(int)); +		    } +		    clPosition[numCL] = clPos; +		    numCL++; +		} +		adjust++; +	    }  	    break;  	case TCL_TOKEN_COMMAND: { -	    Interp *iPtr = (Interp *) interp; - +	    /* TIP #280: Transfer line information to nested command */  	    iPtr->numLevels++;  	    code = TclInterpReady(interp);  	    if (code == TCL_OK) { -		code = Tcl_EvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, -			0); +		/* +		 * Test cases: info-30.{6,8,9} +		 */ + +		int theline; + +		TclAdvanceContinuations(&line, &clNextOuter, +			tokenPtr->start - outerScript); +		theline = line + adjust; +		code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, +			0, theline, clNextOuter, outerScript); + +		TclAdvanceLines(&line, tokenPtr->start+1, +			tokenPtr->start + tokenPtr->size - 1); + +		/* +		 * Restore flag reset by nested eval for future bracketed +		 * commands and their cmdframe setup +		 */ + +		if (inFile) { +		    iPtr->evalFlags |= TCL_EVAL_FILE; +		}  	    }  	    iPtr->numLevels--; +	    TclResetCancellation(interp, 0);  	    appendObj = Tcl_GetObjResult(interp);  	    break;  	} @@ -2072,7 +2283,7 @@ TclSubstTokens(  		 */  		code = TclSubstTokens(interp, tokenPtr+2, -			tokenPtr->numComponents - 1, NULL); +			tokenPtr->numComponents - 1, NULL, line, NULL, NULL);  		arrayIndex = Tcl_GetObjResult(interp);  		Tcl_IncrRefCount(arrayIndex);  	    } @@ -2156,6 +2367,27 @@ TclSubstTokens(      if (code != TCL_ERROR) {		/* Keep error message in result! */  	if (result != NULL) {  	    Tcl_SetObjResult(interp, result); + +	    /* +	     * If the code found continuation lines (which implies that this +	     * word is a literal), then we store the accumulated table of +	     * locations in the thread-global data structure for the bytecode +	     * compiler to find later, assuming that the literal is a script +	     * which will be compiled. +	     */ + +	    if (numCL) { +		TclContinuationsEnter(result, numCL, clPosition); +	    } + +	    /* +	     * Release the temp table we used to collect the locations of +	     * continuation lines, if any. +	     */ + +	    if (maxNumCL) { +		ckfree(clPosition); +	    }  	} else {  	    Tcl_ResetResult(interp);  	} @@ -2189,19 +2421,18 @@ TclSubstTokens(   *----------------------------------------------------------------------   */ -static int +static inline int  CommandComplete( -    CONST char *script,		/* Script to check. */ +    const char *script,		/* Script to check. */      int numBytes)		/* Number of bytes in script. */  {      Tcl_Parse parse; -    CONST char *p, *end; +    const char *p, *end;      int result;      p = script;      end = p + numBytes; -    while (Tcl_ParseCommand((Tcl_Interp *) NULL, p, end - p, 0, &parse) -	    == TCL_OK) { +    while (Tcl_ParseCommand(NULL, p, end - p, 0, &parse) == TCL_OK) {  	p = parse.commandStart + parse.commandSize;  	if (p >= end) {  	    break; @@ -2239,7 +2470,7 @@ CommandComplete(  int  Tcl_CommandComplete( -    CONST char *script)		/* Script to check. */ +    const char *script)		/* Script to check. */  {      return CommandComplete(script, (int) strlen(script));  } @@ -2267,64 +2498,13 @@ TclObjCommandComplete(      Tcl_Obj *objPtr)		/* Points to object holding script to  				 * check. */  { -    CONST char *script;      int length; +    const char *script = TclGetStringFromObj(objPtr, &length); -    script = Tcl_GetStringFromObj(objPtr, &length);      return CommandComplete(script, length);  }  /* - *---------------------------------------------------------------------- - * - * TclIsLocalScalar -- - * - *	Check to see if a given string is a legal scalar variable name with no - *	namespace qualifiers or substitutions. - * - * Results: - *	Returns 1 if the variable is a local scalar. - * - * Side effects: - *	None. - * - *---------------------------------------------------------------------- - */ - -int -TclIsLocalScalar( -    CONST char *src, -    int len) -{ -    CONST char *p; -    CONST char *lastChar = src + (len - 1); - -    for (p=src ; p<=lastChar ; p++) { -	if ((CHAR_TYPE(*p) != TYPE_NORMAL) && -		(CHAR_TYPE(*p) != TYPE_COMMAND_END)) { -	    /* -	     * TCL_COMMAND_END is returned for the last character of the -	     * string. By this point we know it isn't an array or namespace -	     * reference. -	     */ - -	    return 0; -	} -	if (*p == '(') { -	    if (*lastChar == ')') { /* we have an array element */ -		return 0; -	    } -	} else if (*p == ':') { -	    if ((p != lastChar) && *(p+1) == ':') { /* qualified name */ -		return 0; -	    } -	} -    } - -    return 1; -} - -/*   * Local Variables:   * mode: c   * c-basic-offset: 4 | 
