2 files changed, 41 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f96c5c..4dc8280 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2007-07-11  Don Porter  <dgp@users.sourceforge.net>
+
+	* generic/tclCompExpr.c:	Added a "parseOnly" flag argument to
+	ParseExpr() to indicate whether the caller is Tcl_ParseExpr(), with an
+	end goal of filling a Tcl_Parse with Tcl_Tokens representing the
+	parsed expression, or TclCompileExpr() with the goal of compiling and
+	executing the expression.  In the latter case, more aggressive
+	conversion of QUOTED and BRACED lexeme to literals is done.  In the
+	former case, all such conversion is avoided, since Tcl_Token production
+	would revert it anyway.  This enables simplifications to the
+	GenerateTokensForLiteral() routine as well.
+
 2007-07-10  Don Porter  <dgp@users.sourceforge.net>
 
 	* generic/tclCompExpr.c:	Added a field for operator precedence
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index f601427..63b8216 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCompExpr.c,v 1.65 2007/07/10 21:37:44 dgp Exp $
+ * RCS: @(#) $Id: tclCompExpr.c,v 1.66 2007/07/11 14:43:47 dgp Exp $
  */
 
 #include "tclInt.h"
@@ -395,7 +395,7 @@ static int		GenerateTokensForLiteral(const char *script,
 static int		ParseExpr(Tcl_Interp *interp, const char *start,
 			    int numBytes, OpNode **opTreePtr,
 			    Tcl_Obj *litList, Tcl_Obj *funcList,
-			    Tcl_Parse *parsePtr);
+			    Tcl_Parse *parsePtr, int parseOnly);
 static int		ParseLexeme(const char *start, int numBytes,
 			    unsigned char *lexemePtr, Tcl_Obj **literalPtr);
 
@@ -442,9 +442,14 @@ ParseExpr(
 				 * allocated OpNode tree should go. */
     Tcl_Obj *litList,		/* List to append literals to. */
     Tcl_Obj *funcList,		/* List to append function names to. */
-    Tcl_Parse *parsePtr)	/* Structure to fill with tokens representing
+    Tcl_Parse *parsePtr,	/* Structure to fill with tokens representing
 				 * those operands that require run time
 				 * substitutions. */
+    int parseOnly)		/* A boolean indicating whether the caller's
+				 * aim is just a parse, or whether it will go
+				 * on to compile the expression.  Different
+				 * optimizations are appropriate for the
+				 * two scenarios. */
 {
     OpNode *nodes = NULL;	/* Pointer to the OpNode storage array where
 				 * we build the parse tree. */
@@ -457,8 +462,6 @@ ParseExpr(
     int code = TCL_OK;		/* Return code */
     int scanned = 0;		/* Capture number of byte scanned by 
 				 * parsing routines. */
-
-    /* These variables hold the state of the parser */
     unsigned char lexeme = START;	/* Most recent lexeme parsed. */
     int lastOpen = 0;		/* Index of the OpNode of the OPEN_PAREN
 				 * operator we most recently matched. */
@@ -814,36 +817,27 @@ ParseExpr(
 	    tokenPtr = parsePtr->tokenPtr + wordIndex;
 	    tokenPtr->size = scanned;
 	    tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1;
-	    if ((lexeme == QUOTED) || (lexeme == BRACED)) {
+	    if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) {
 
 		/*
-		 * When a braced or quoted word within an expression
-		 * is simple enough, we can store it as a literal rather
-		 * than in its tokenized form.  This is an advantage since
-		 * the compiled bytecode is going to need the argument in
-		 * Tcl_Obj form eventually, so it's to our advantage to just
-		 * get there now, and avoid the need to convert from Tcl_Token
-		 * form again later.  Currently we only store literals
-		 * for things parsed as single TEXT tokens (known as
-		 * TCL_TOKEN_SIMPLE_WORD in other contexts).  In this
-		 * simple case, the literal string we store is identical
-		 * to a substring of the original expression.
+		 * When this expression is destined to be compiled, and a
+		 * braced or quoted word within an expression is known at
+		 * compile time (no runtime substitutions in it), we can
+		 * store it as a literal rather than in its tokenized form.
+		 * This is an advantage since the compiled bytecode is going
+		 * to need the argument in Tcl_Obj form eventually, so it's
+		 * just as well to get there now.  Another advantage is that
+		 * with this conversion, larger constant expressions might
+		 * be grown and optimized.
 		 *
-		 * TODO: We ought to be able to store as a literal any
-		 * word which is known at compile-time, including those that
-		 * contain backslash substitution.  This can be helpful to
-		 * store multi-line strings that include escaped newlines,
-		 * or strings that include multi-byte characters expressed
-		 * in \uHHHH form.  Removing the first two tests here is
-		 * sufficient to make that change, but will lead to a
-		 * Tcl_Panic() in GenerateTokensForLiteral() until that routine
-		 * is revised to handle such literals.
+		 * On the contrary, if the end goal of this parse is to
+		 * fill a Tcl_Parse for a caller of Tcl_ParseExpr(), then it's
+		 * wasteful to convert to a literal only to convert back again
+		 * later.
 		 */
 
 		literal = Tcl_NewObj();
-		if (tokenPtr->numComponents == 1
-			&& tokenPtr[1].type == TCL_TOKEN_TEXT
-			&& TclWordKnownAtCompileTime(tokenPtr, literal)) {
+		if (TclWordKnownAtCompileTime(tokenPtr, literal)) {
 		    Tcl_ListObjAppendElement(NULL, litList, literal);
 		    lastParsed = OT_LITERAL;
 		    parsePtr->numTokens = wordIndex;
@@ -1159,7 +1153,7 @@ GenerateTokensForLiteral(
     int nextLiteral,
     Tcl_Parse *parsePtr)
 {
-    int scanned, closer = 0;
+    int scanned;
     const char *start = script;
     Tcl_Token *destPtr;
     unsigned char lexeme;
@@ -1171,28 +1165,14 @@ GenerateTokensForLiteral(
     scanned = TclParseAllWhiteSpace(start, numBytes);
     start +=scanned;
     scanned = ParseLexeme(start, numBytes-scanned, &lexeme, NULL);
-    if ((lexeme != NUMBER) && (lexeme != BAREWORD)) {
-	Tcl_Obj *literal;
-	const char *bytes;
-
-	Tcl_ListObjIndex(NULL, litList, nextLiteral, &literal);
-	bytes = Tcl_GetStringFromObj(literal, &scanned);
-	start++;
-	if (memcmp(bytes, start, (size_t) scanned) == 0) {
-	    closer = 1;
-	} else {
-	    /* TODO */
-	    Tcl_Panic("figure this out");
-	}
-    }
 
     if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
 	TclExpandTokenArray(parsePtr);
     }
     destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
     destPtr->type = TCL_TOKEN_SUB_EXPR;
-    destPtr->start = start-closer;
-    destPtr->size = scanned+2*closer;
+    destPtr->start = start;
+    destPtr->size = scanned;
     destPtr->numComponents = 1;
     destPtr++;
     destPtr->type = TCL_TOKEN_TEXT;
@@ -1201,7 +1181,7 @@ GenerateTokensForLiteral(
     destPtr->numComponents = 0;
     parsePtr->numTokens += 2;
 
-    return (start + scanned + closer - script);
+    return (start + scanned - script);
 }
 
 /*
@@ -1505,7 +1485,7 @@ Tcl_ParseExpr(
 	    (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse));
 				/* Holds the Tcl_Tokens of substitutions */
     int code = ParseExpr(interp, start, numBytes, &opTree, litList,
-	    funcList, exprParsePtr);
+	    funcList, exprParsePtr, 1 /* parseOnly */);
     int errorType = exprParsePtr->errorType;
     const char* term = exprParsePtr->term;
 
@@ -1816,7 +1796,7 @@ TclCompileExpr(
 				/* Holds the Tcl_Tokens of substitutions */
 
     int code = ParseExpr(interp, script, numBytes, &opTree, litList,
-	    funcList, parsePtr);
+	    funcList, parsePtr, 0 /* parseOnly */);
 
     if (code == TCL_OK) {
 	int litObjc, needsNumConversion = 1;