1 files changed, 138 insertions, 166 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 96c2a10..3c984bf 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  */
-
+ 
 #include "tclInt.h"
 
 /*
@@ -182,13 +182,13 @@ static int		ParseWhiteSpace(const char *src, int numBytes,
  *
  * TclParseInit --
  *
- * 	Initialize the fields of a Tcl_Parse struct.
+ *	Initialize the fields of a Tcl_Parse struct.
  *
  * Results:
- * 	None.
+ *	None.
  *
  * Side effects:
- * 	The Tcl_Parse struct pointed to by parsePtr gets initialized.
+ *	The Tcl_Parse struct pointed to by parsePtr gets initialized.
  *
  *----------------------------------------------------------------------
  */
@@ -251,7 +251,7 @@ Tcl_ParseCommand(
 				 * command terminator. If zero, then close
 				 * bracket has no special meaning. */
     register Tcl_Parse *parsePtr)
-    				/* Structure to fill in with information about
+				/* Structure to fill in with information about
 				 * the parsed command; any previous
 				 * information in the structure is ignored. */
 {
@@ -496,9 +496,10 @@ Tcl_ParseCommand(
 		     * tokens representing the expanded list.
 		     */
 
-		    CONST char *listStart;
+		    const char *listStart;
 		    int growthNeeded = wordIndex + 2*elemCount
 			    - parsePtr->numTokens;
+
 		    parsePtr->numWords += elemCount - 1;
 		    if (growthNeeded > 0) {
 			TclGrowParseTokenArray(parsePtr, growthNeeded);
@@ -672,7 +673,7 @@ ParseWhiteSpace(
 	    if (p[1] != '\n') {
 		break;
 	    }
-	    p+=2;
+	    p += 2;
 	    if (--numBytes == 0) {
 		*incompletePtr = 1;
 		break;
@@ -743,21 +744,21 @@ int
 TclParseHex(
     const char *src,		/* First character to parse. */
     int numBytes,		/* Max number of byes to scan */
-    Tcl_UniChar *resultPtr)	/* Points to storage provided by caller where
-				 * the Tcl_UniChar resulting from the
+    int *resultPtr)	/* Points to storage provided by caller where
+				 * the character resulting from the
 				 * conversion is to be written. */
 {
-    Tcl_UniChar result = 0;
+    int result = 0;
     register const char *p = src;
 
     while (numBytes--) {
 	unsigned char digit = UCHAR(*p);
 
-	if (!isxdigit(digit)) {
+	if (!isxdigit(digit) || (result > 0x10fff)) {
 	    break;
 	}
 
-	++p;
+	p++;
 	result <<= 4;
 
 	if (digit >= 'a') {
@@ -782,14 +783,14 @@ TclParseHex(
  *	sequence as defined by Tcl's parsing rules.
  *
  * Results:
- * 	Records at readPtr the number of bytes making up the backslash
- * 	sequence. Records at dst the UTF-8 encoded equivalent of that
- * 	backslash sequence. Returns the number of bytes written to dst, at
- * 	most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
- * 	are not needed, but the return value is the same either way.
+ *	Records at readPtr the number of bytes making up the backslash
+ *	sequence. Records at dst the UTF-8 encoded equivalent of that
+ *	backslash sequence. Returns the number of bytes written to dst, at
+ *	most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
+ *	are not needed, but the return value is the same either way.
  *
  * Side effects:
- * 	None.
+ *	None.
  *
  *----------------------------------------------------------------------
  */
@@ -807,7 +808,8 @@ TclParseBackslash(
 				 * written there. */
 {
     register const char *p = src+1;
-    Tcl_UniChar result;
+    Tcl_UniChar unichar;
+    int result;
     int count;
     char buf[TCL_UTF_MAX];
 
@@ -864,7 +866,7 @@ TclParseBackslash(
 	result = 0xb;
 	break;
     case 'x':
-	count += TclParseHex(p+1, numBytes-2, &result);
+	count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);
 	if (count == 2) {
 	    /*
 	     * No hexadigits -> This is just "x".
@@ -887,6 +889,15 @@ TclParseBackslash(
 	    result = 'u';
 	}
 	break;
+    case 'U':
+	count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
+	if (count == 2) {
+	    /*
+	     * No hexadigits -> This is just "U".
+	     */
+	    result = 'U';
+	}
+	break;
     case '\n':
 	count--;
 	do {
@@ -905,21 +916,21 @@ TclParseBackslash(
 	 */
 
 	if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) {	/* INTL: digit */
-	    result = (unsigned char)(*p - '0');
+	    result = *p - '0';
 	    p++;
 	    if ((numBytes == 2) || !isdigit(UCHAR(*p))	/* INTL: digit */
 		    || (UCHAR(*p) >= '8')) {
 		break;
 	    }
 	    count = 3;
-	    result = (unsigned char)((result << 3) + (*p - '0'));
+	    result = (result << 3) + (*p - '0');
 	    p++;
 	    if ((numBytes == 3) || !isdigit(UCHAR(*p))	/* INTL: digit */
-		    || (UCHAR(*p) >= '8')) {
+		    || (UCHAR(*p) >= '8') || (result >= 0x20)) {
 		break;
 	    }
 	    count = 4;
-	    result = (unsigned char)((result << 3) + (*p - '0'));
+	    result = UCHAR((result << 3) + (*p - '0'));
 	    break;
 	}
 
@@ -931,14 +942,15 @@ TclParseBackslash(
 	 */
 
 	if (Tcl_UtfCharComplete(p, numBytes - 1)) {
-	    count = Tcl_UtfToUniChar(p, &result) + 1;	/* +1 for '\' */
+	    count = Tcl_UtfToUniChar(p, &unichar) + 1;	/* +1 for '\' */
 	} else {
 	    char utfBytes[TCL_UTF_MAX];
 
 	    memcpy(utfBytes, p, (size_t) (numBytes - 1));
 	    utfBytes[numBytes - 1] = '\0';
-	    count = Tcl_UtfToUniChar(utfBytes, &result) + 1;
+	    count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;
 	}
+	result = unichar;
 	break;
     }
 
@@ -946,7 +958,7 @@ TclParseBackslash(
     if (readPtr != NULL) {
 	*readPtr = count;
     }
-    return Tcl_UniCharToUtf((int) result, dst);
+    return Tcl_UniCharToUtf(result, dst);
 }
 
 /*
@@ -958,11 +970,11 @@ TclParseBackslash(
  *	defined by Tcl's parsing rules.
  *
  * Results:
- * 	Records in parsePtr information about the parse. Returns the number of
- * 	bytes consumed.
+ *	Records in parsePtr information about the parse. Returns the number of
+ *	bytes consumed.
  *
  * Side effects:
- * 	None.
+ *	None.
  *
  *----------------------------------------------------------------------
  */
@@ -1115,7 +1127,7 @@ ParseTokens(
 	    }
 
 	    /*
-	     * This is a variable reference.  Call Tcl_ParseVarName to do all
+	     * This is a variable reference. Call Tcl_ParseVarName to do all
 	     * the dirty work of parsing the name.
 	     */
 
@@ -1139,15 +1151,14 @@ ParseTokens(
 	    }
 
 	    /*
-	     * Command substitution.  Call Tcl_ParseCommand recursively (and
+	     * Command substitution. Call Tcl_ParseCommand recursively (and
 	     * repeatedly) to parse the nested command(s), then throw away the
 	     * parse information.
 	     */
 
 	    src++;
 	    numBytes--;
-	    nestedPtr = (Tcl_Parse *)
-		    TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
+	    nestedPtr = TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
 	    while (1) {
 		if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1,
 			nestedPtr) != TCL_OK) {
@@ -1293,7 +1304,7 @@ Tcl_FreeParse(
 				 * call to Tcl_ParseCommand. */
 {
     if (parsePtr->tokenPtr != parsePtr->staticTokens) {
-	ckfree((char *) parsePtr->tokenPtr);
+	ckfree(parsePtr->tokenPtr);
 	parsePtr->tokenPtr = parsePtr->staticTokens;
     }
 }
@@ -1544,8 +1555,7 @@ Tcl_ParseVar(
 {
     register Tcl_Obj *objPtr;
     int code;
-    Tcl_Parse *parsePtr = (Tcl_Parse *)
-	    TclStackAlloc(interp, sizeof(Tcl_Parse));
+    Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse));
 
     if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) {
 	TclStackFree(interp, parsePtr);
@@ -1628,7 +1638,7 @@ Tcl_ParseBraces(
 				 * the string consists of all bytes up to the
 				 * first null character. */
     register Tcl_Parse *parsePtr,
-    				/* Structure to fill in with information about
+				/* Structure to fill in with information about
 				 * the string. */
     int append,			/* Non-zero means append tokens to existing
 				 * information in parsePtr; zero means ignore
@@ -1829,7 +1839,7 @@ Tcl_ParseQuotedString(
 				 * the string consists of all bytes up to the
 				 * first null character. */
     register Tcl_Parse *parsePtr,
-    				/* Structure to fill in with information about
+				/* Structure to fill in with information about
 				 * the string. */
     int append,			/* Non-zero means append tokens to existing
 				 * information in parsePtr; zero means ignore
@@ -1877,33 +1887,42 @@ Tcl_ParseQuotedString(
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_SubstObj --
- *
- *	This function performs the substitutions specified on the given string
- *	as described in the user documentation for the "subst" Tcl command.
+ * TclSubstParse --
  *
+ *	Token parser used by the [subst] command. Parses the string made up of
+ *	'numBytes' bytes starting at 'bytes'. Parsing is controlled by the
+ *	flags argument to provide support for the -nobackslashes, -nocommands,
+ *	and -novariables options, as represented by the flag values
+ *	TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES.
+ *	
  * Results:
- *	A Tcl_Obj* containing the substituted string, or NULL to indicate that
- *	an error occurred.
+ *	None.
  *
  * Side effects:
- *	See the user documentation.
+ *	The Tcl_Parse struct '*parsePtr' is filled with parse results.
+ *	The caller is expected to eventually call Tcl_FreeParse() to properly
+ *	cleanup the value written there.
+ *
+ *	If a parse error occurs, the Tcl_InterpState value '*statePtr' is
+ *	filled with the state created by that error. When *statePtr is written
+ *	to, the caller is expected to make the required calls to either
+ *	Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the
+ *	value written there.
  *
  *----------------------------------------------------------------------
  */
 
-Tcl_Obj *
-Tcl_SubstObj(
-    Tcl_Interp *interp,		/* Interpreter in which substitution occurs */
-    Tcl_Obj *objPtr,		/* The value to be substituted. */
-    int flags)			/* What substitutions to do. */
+void
+TclSubstParse(
+    Tcl_Interp *interp,
+    const char *bytes,
+    int numBytes,
+    int flags,
+    Tcl_Parse *parsePtr,
+    Tcl_InterpState *statePtr)
 {
-    int length, tokensLeft, code;
-    Tcl_Token *endTokenPtr;
-    Tcl_Obj *result, *errMsg = NULL;
-    const char *p = TclGetStringFromObj(objPtr, &length);
-    Tcl_Parse *parsePtr = (Tcl_Parse *)
-	    TclStackAlloc(interp, sizeof(Tcl_Parse));
+    int length = numBytes;
+    const char *p = bytes;
 
     TclParseInit(interp, p, length, parsePtr);
 
@@ -1915,12 +1934,11 @@ Tcl_SubstObj(
 
     if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {
 	/*
-	 * There was a parse error. Save the error message for possible
-	 * reporting later.
+	 * There was a parse error. Save the interpreter state for possible
+	 * error reporting later.
 	 */
 
-	errMsg = Tcl_GetObjResult(interp);
-	Tcl_IncrRefCount(errMsg);
+	*statePtr = Tcl_SaveInterpState(interp, TCL_ERROR);
 
 	/*
 	 * We need to re-parse to get the portion of the string we can [subst]
@@ -1986,10 +2004,10 @@ Tcl_SubstObj(
 			parsePtr->tokenPtr + parsePtr->numTokens - 2;
 
 		if (varTokenPtr->type != TCL_TOKEN_VARIABLE) {
-		    Tcl_Panic("Tcl_SubstObj: programming error");
+		    Tcl_Panic("TclSubstParse: programming error");
 		}
 		if (varTokenPtr[1].type != TCL_TOKEN_TEXT) {
-		    Tcl_Panic("Tcl_SubstObj: programming error");
+		    Tcl_Panic("TclSubstParse: programming error");
 		}
 		parsePtr->numTokens -= 2;
 	    }
@@ -2018,7 +2036,7 @@ Tcl_SubstObj(
 
 		Tcl_Token *tokenPtr;
 		const char *lastTerm = parsePtr->term;
-		Tcl_Parse *nestedPtr = (Tcl_Parse *)
+		Tcl_Parse *nestedPtr =
 			TclStackAlloc(interp, sizeof(Tcl_Parse));
 
 		while (TCL_OK ==
@@ -2063,63 +2081,8 @@ Tcl_SubstObj(
 	    break;
 
 	default:
-	    Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]);
-	}
-    }
-
-    /*
-     * Next, substitute the parsed tokens just as in normal Tcl evaluation.
-     */
-
-    endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens;
-    tokensLeft = parsePtr->numTokens;
-    code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
-	    &tokensLeft, 1, NULL, NULL);
-    if (code == TCL_OK) {
-	Tcl_FreeParse(parsePtr);
-	TclStackFree(interp, parsePtr);
-	if (errMsg != NULL) {
-	    Tcl_SetObjResult(interp, errMsg);
-	    Tcl_DecrRefCount(errMsg);
-	    return NULL;
-	}
-	return Tcl_GetObjResult(interp);
-    }
-
-    result = Tcl_NewObj();
-    while (1) {
-	switch (code) {
-	case TCL_ERROR:
-	    Tcl_FreeParse(parsePtr);
-	    TclStackFree(interp, parsePtr);
-	    Tcl_DecrRefCount(result);
-	    if (errMsg != NULL) {
-		Tcl_DecrRefCount(errMsg);
-	    }
-	    return NULL;
-	case TCL_BREAK:
-	    tokensLeft = 0;		/* Halt substitution */
-	default:
-	    Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp));
+	    Tcl_Panic("bad parse in TclSubstParse: %c", p[length]);
 	}
-
-	if (tokensLeft == 0) {
-	    Tcl_FreeParse(parsePtr);
-	    TclStackFree(interp, parsePtr);
-	    if (errMsg != NULL) {
-		if (code != TCL_BREAK) {
-		    Tcl_DecrRefCount(result);
-		    Tcl_SetObjResult(interp, errMsg);
-		    Tcl_DecrRefCount(errMsg);
-		    return NULL;
-		}
-		Tcl_DecrRefCount(errMsg);
-	    }
-	    return result;
-	}
-
-	code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
-		&tokensLeft, 1, NULL, NULL);
     }
 }
 
@@ -2134,13 +2097,13 @@ Tcl_SubstObj(
  *	non-TCL_OK completion code arises.
  *
  * Results:
- * 	The return value is a standard Tcl completion code. The result in
- * 	interp is the substituted value, or an error message if TCL_ERROR is
- * 	returned. If tokensLeftPtr is not NULL, then it points to an int where
- * 	the number of tokens remaining to be processed is written.
+ *	The return value is a standard Tcl completion code. The result in
+ *	interp is the substituted value, or an error message if TCL_ERROR is
+ *	returned. If tokensLeftPtr is not NULL, then it points to an int where
+ *	the number of tokens remaining to be processed is written.
  *
  * Side effects:
- * 	Can be anything, depending on the types of substitution done.
+ *	Can be anything, depending on the types of substitution done.
  *
  *----------------------------------------------------------------------
  */
@@ -2158,29 +2121,30 @@ TclSubstTokens(
 				 * integer representing the number of tokens
 				 * left to be substituted will be written */
     int line,			/* The line the script starts on. */
-    int*  clNextOuter,       /* Information about an outer context for */
-    CONST char* outerScript) /* continuation line data. This is set by
-			      * EvalEx() to properly handle [...]-nested
-			      * commands. The 'outerScript' refers to the
-			      * most-outer script containing the embedded
-			      * command, which is refered to by 'script'. The
-			      * 'clNextOuter' refers to the current entry in
-			      * the table of continuation lines in this
-			      * "master script", and the character offsets are
-			      * relative to the 'outerScript' as well.
-			      *
-			      * If outerScript == script, then this call is for
-			      * words in the outer-most script/command. See
-			      * Tcl_EvalEx() and TclEvalObjEx() for the places
-			      * generating arguments for which this is true.
-			      */
+    int *clNextOuter,		/* Information about an outer context for */
+    const char *outerScript)	/* continuation line data. This is set by
+				 * EvalEx() to properly handle [...]-nested
+				 * commands. The 'outerScript' refers to the
+				 * most-outer script containing the embedded
+				 * command, which is refered to by 'script'.
+				 * The 'clNextOuter' refers to the current
+				 * entry in the table of continuation lines in
+				 * this "master script", and the character
+				 * offsets are relative to the 'outerScript'
+				 * as well.
+				 *
+				 * If outerScript == script, then this call is
+				 * for words in the outer-most script or
+				 * command. See Tcl_EvalEx and TclEvalObjEx
+				 * for the places generating arguments for
+				 * which this is true. */
 {
     Tcl_Obj *result;
     int code = TCL_OK;
 #define NUM_STATIC_POS 20
     int isLiteral, maxNumCL, numCL, i, adjust;
-    int* clPosition = NULL;
-    Interp* iPtr = (Interp*) interp;
+    int *clPosition = NULL;
+    Interp *iPtr = (Interp *) interp;
     int inFile = iPtr->evalFlags & TCL_EVAL_FILE;
 
     /*
@@ -2197,24 +2161,24 @@ TclSubstTokens(
      * For the handling of continuation lines in literals we first check if
      * this is actually a literal. For if not we can forego the additional
      * processing. Otherwise we pre-allocate a small table to store the
-     * locations of all continuation lines we find in this literal, if
-     * any. The table is extended if needed.
+     * locations of all continuation lines we find in this literal, if any.
+     * The table is extended if needed.
      */
 
-    numCL     = 0;
-    maxNumCL  = 0;
+    numCL = 0;
+    maxNumCL = 0;
     isLiteral = 1;
     for (i=0 ; i < count; i++) {
-	if ((tokenPtr[i].type != TCL_TOKEN_TEXT) &&
-	    (tokenPtr[i].type != TCL_TOKEN_BS)) {
+	if ((tokenPtr[i].type != TCL_TOKEN_TEXT)
+		&& (tokenPtr[i].type != TCL_TOKEN_BS)) {
 	    isLiteral = 0;
 	    break;
 	}
     }
 
     if (isLiteral) {
-	maxNumCL   = NUM_STATIC_POS;
-	clPosition = (int*) ckalloc (maxNumCL*sizeof(int));
+	maxNumCL = NUM_STATIC_POS;
+	clPosition = ckalloc(maxNumCL * sizeof(int));
     }
 
     adjust = 0;
@@ -2235,6 +2199,7 @@ TclSubstTokens(
 	    appendByteLength = TclParseBackslash(tokenPtr->start,
 		    tokenPtr->size, NULL, utfCharBytes);
 	    append = utfCharBytes;
+
 	    /*
 	     * If the backslash sequence we found is in a literal, and
 	     * represented a continuation line, we compute and store its
@@ -2250,10 +2215,11 @@ TclSubstTokens(
 	     * correction.
 	     */
 
-	    if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') &&
-		(tokenPtr->start[1] == '\n')) {
+	    if ((appendByteLength == 1) && (utfCharBytes[0] == ' ')
+		    && (tokenPtr->start[1] == '\n')) {
 		if (isLiteral) {
 		    int clPos;
+
 		    if (result == 0) {
 			clPos = 0;
 		    } else {
@@ -2262,19 +2228,18 @@ TclSubstTokens(
 
 		    if (numCL >= maxNumCL) {
 			maxNumCL *= 2;
-			clPosition = (int*) ckrealloc ((char*)clPosition,
-						       maxNumCL*sizeof(int));
+			clPosition = ckrealloc(clPosition,
+				maxNumCL * sizeof(int));
 		    }
 		    clPosition[numCL] = clPos;
-		    numCL ++;
+		    numCL++;
 		}
-		adjust ++;
+		adjust++;
 	    }
 	    break;
 
 	case TCL_TOKEN_COMMAND: {
-	    Interp *iPtr = (Interp *) interp;
-
+	    /* TIP #280: Transfer line information to nested command */
 	    iPtr->numLevels++;
 	    code = TclInterpReady(interp);
 	    if (code == TCL_OK) {
@@ -2283,21 +2248,27 @@ TclSubstTokens(
 		 */
 
 		int theline;
-		TclAdvanceContinuations (&line, &clNextOuter,
-					 tokenPtr->start - outerScript);
+
+		TclAdvanceContinuations(&line, &clNextOuter,
+			tokenPtr->start - outerScript);
 		theline = line + adjust;
-		/* TIP #280: Transfer line information to nested command */
 		code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2,
 			0, theline, clNextOuter, outerScript);
+
+		TclAdvanceLines(&line, tokenPtr->start+1,
+			tokenPtr->start + tokenPtr->size - 1);
+
 		/*
 		 * Restore flag reset by nested eval for future bracketed
 		 * commands and their cmdframe setup
 		 */
-	        if (inFile) {
+
+		if (inFile) {
 		    iPtr->evalFlags |= TCL_EVAL_FILE;
 		}
 	    }
 	    iPtr->numLevels--;
+	    TclResetCancellation(interp, 0);
 	    appendObj = Tcl_GetObjResult(interp);
 	    break;
 	}
@@ -2396,6 +2367,7 @@ TclSubstTokens(
     if (code != TCL_ERROR) {		/* Keep error message in result! */
 	if (result != NULL) {
 	    Tcl_SetObjResult(interp, result);
+
 	    /*
 	     * If the code found continuation lines (which implies that this
 	     * word is a literal), then we store the accumulated table of
@@ -2414,7 +2386,7 @@ TclSubstTokens(
 	     */
 
 	    if (maxNumCL) {
-		ckfree ((char*) clPosition);
+		ckfree(clPosition);
 	    }
 	} else {
 	    Tcl_ResetResult(interp);
@@ -2558,8 +2530,8 @@ TclIsLocalScalar(
     const char *lastChar = src + (len - 1);
 
     for (p=src ; p<=lastChar ; p++) {
-	if ((CHAR_TYPE(*p) != TYPE_NORMAL) &&
-		(CHAR_TYPE(*p) != TYPE_COMMAND_END)) {
+	if ((CHAR_TYPE(*p) != TYPE_NORMAL)
+		&& (CHAR_TYPE(*p) != TYPE_COMMAND_END)) {
 	    /*
 	     * TCL_COMMAND_END is returned for the last character of the
 	     * string. By this point we know it isn't an array or namespace