summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--generic/tclCompExpr.c78
2 files changed, 41 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f96c5c..4dc8280 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2007-07-11 Don Porter <dgp@users.sourceforge.net>
+
+ * generic/tclCompExpr.c: Added a "parseOnly" flag argument to
+ ParseExpr() to indicate whether the caller is Tcl_ParseExpr(), with an
+ end goal of filling a Tcl_Parse with Tcl_Tokens representing the
+ parsed expression, or TclCompileExpr() with the goal of compiling and
+ executing the expression. In the latter case, more aggressive
+ conversion of QUOTED and BRACED lexeme to literals is done. In the
+ former case, all such conversion is avoided, since Tcl_Token production
+ would revert it anyway. This enables simplifications to the
+ GenerateTokensForLiteral() routine as well.
+
2007-07-10 Don Porter <dgp@users.sourceforge.net>
* generic/tclCompExpr.c: Added a field for operator precedence
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index f601427..63b8216 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompExpr.c,v 1.65 2007/07/10 21:37:44 dgp Exp $
+ * RCS: @(#) $Id: tclCompExpr.c,v 1.66 2007/07/11 14:43:47 dgp Exp $
*/
#include "tclInt.h"
@@ -395,7 +395,7 @@ static int GenerateTokensForLiteral(const char *script,
static int ParseExpr(Tcl_Interp *interp, const char *start,
int numBytes, OpNode **opTreePtr,
Tcl_Obj *litList, Tcl_Obj *funcList,
- Tcl_Parse *parsePtr);
+ Tcl_Parse *parsePtr, int parseOnly);
static int ParseLexeme(const char *start, int numBytes,
unsigned char *lexemePtr, Tcl_Obj **literalPtr);
@@ -442,9 +442,14 @@ ParseExpr(
* allocated OpNode tree should go. */
Tcl_Obj *litList, /* List to append literals to. */
Tcl_Obj *funcList, /* List to append function names to. */
- Tcl_Parse *parsePtr) /* Structure to fill with tokens representing
+ Tcl_Parse *parsePtr, /* Structure to fill with tokens representing
* those operands that require run time
* substitutions. */
+ int parseOnly) /* A boolean indicating whether the caller's
+ * aim is just a parse, or whether it will go
+ * on to compile the expression. Different
+ * optimizations are appropriate for the
+ * two scenarios. */
{
OpNode *nodes = NULL; /* Pointer to the OpNode storage array where
* we build the parse tree. */
@@ -457,8 +462,6 @@ ParseExpr(
int code = TCL_OK; /* Return code */
int scanned = 0; /* Capture number of byte scanned by
* parsing routines. */
-
- /* These variables hold the state of the parser */
unsigned char lexeme = START; /* Most recent lexeme parsed. */
int lastOpen = 0; /* Index of the OpNode of the OPEN_PAREN
* operator we most recently matched. */
@@ -814,36 +817,27 @@ ParseExpr(
tokenPtr = parsePtr->tokenPtr + wordIndex;
tokenPtr->size = scanned;
tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1;
- if ((lexeme == QUOTED) || (lexeme == BRACED)) {
+ if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) {
/*
- * When a braced or quoted word within an expression
- * is simple enough, we can store it as a literal rather
- * than in its tokenized form. This is an advantage since
- * the compiled bytecode is going to need the argument in
- * Tcl_Obj form eventually, so it's to our advantage to just
- * get there now, and avoid the need to convert from Tcl_Token
- * form again later. Currently we only store literals
- * for things parsed as single TEXT tokens (known as
- * TCL_TOKEN_SIMPLE_WORD in other contexts). In this
- * simple case, the literal string we store is identical
- * to a substring of the original expression.
+ * When this expression is destined to be compiled, and a
+ * braced or quoted word within an expression is known at
+ * compile time (no runtime substitutions in it), we can
+ * store it as a literal rather than in its tokenized form.
+ * This is an advantage since the compiled bytecode is going
+ * to need the argument in Tcl_Obj form eventually, so it's
+ * just as well to get there now. Another advantage is that
+ * with this conversion, larger constant expressions might
+ * be grown and optimized.
*
- * TODO: We ought to be able to store as a literal any
- * word which is known at compile-time, including those that
- * contain backslash substitution. This can be helpful to
- * store multi-line strings that include escaped newlines,
- * or strings that include multi-byte characters expressed
- * in \uHHHH form. Removing the first two tests here is
- * sufficient to make that change, but will lead to a
- * Tcl_Panic() in GenerateTokensForLiteral() until that routine
- * is revised to handle such literals.
+ * On the contrary, if the end goal of this parse is to
+ * fill a Tcl_Parse for a caller of Tcl_ParseExpr(), then it's
+ * wasteful to convert to a literal only to convert back again
+ * later.
*/
literal = Tcl_NewObj();
- if (tokenPtr->numComponents == 1
- && tokenPtr[1].type == TCL_TOKEN_TEXT
- && TclWordKnownAtCompileTime(tokenPtr, literal)) {
+ if (TclWordKnownAtCompileTime(tokenPtr, literal)) {
Tcl_ListObjAppendElement(NULL, litList, literal);
lastParsed = OT_LITERAL;
parsePtr->numTokens = wordIndex;
@@ -1159,7 +1153,7 @@ GenerateTokensForLiteral(
int nextLiteral,
Tcl_Parse *parsePtr)
{
- int scanned, closer = 0;
+ int scanned;
const char *start = script;
Tcl_Token *destPtr;
unsigned char lexeme;
@@ -1171,28 +1165,14 @@ GenerateTokensForLiteral(
scanned = TclParseAllWhiteSpace(start, numBytes);
start +=scanned;
scanned = ParseLexeme(start, numBytes-scanned, &lexeme, NULL);
- if ((lexeme != NUMBER) && (lexeme != BAREWORD)) {
- Tcl_Obj *literal;
- const char *bytes;
-
- Tcl_ListObjIndex(NULL, litList, nextLiteral, &literal);
- bytes = Tcl_GetStringFromObj(literal, &scanned);
- start++;
- if (memcmp(bytes, start, (size_t) scanned) == 0) {
- closer = 1;
- } else {
- /* TODO */
- Tcl_Panic("figure this out");
- }
- }
if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
TclExpandTokenArray(parsePtr);
}
destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
destPtr->type = TCL_TOKEN_SUB_EXPR;
- destPtr->start = start-closer;
- destPtr->size = scanned+2*closer;
+ destPtr->start = start;
+ destPtr->size = scanned;
destPtr->numComponents = 1;
destPtr++;
destPtr->type = TCL_TOKEN_TEXT;
@@ -1201,7 +1181,7 @@ GenerateTokensForLiteral(
destPtr->numComponents = 0;
parsePtr->numTokens += 2;
- return (start + scanned + closer - script);
+ return (start + scanned - script);
}
/*
@@ -1505,7 +1485,7 @@ Tcl_ParseExpr(
(Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse));
/* Holds the Tcl_Tokens of substitutions */
int code = ParseExpr(interp, start, numBytes, &opTree, litList,
- funcList, exprParsePtr);
+ funcList, exprParsePtr, 1 /* parseOnly */);
int errorType = exprParsePtr->errorType;
const char* term = exprParsePtr->term;
@@ -1816,7 +1796,7 @@ TclCompileExpr(
/* Holds the Tcl_Tokens of substitutions */
int code = ParseExpr(interp, script, numBytes, &opTree, litList,
- funcList, parsePtr);
+ funcList, parsePtr, 0 /* parseOnly */);
if (code == TCL_OK) {
int litObjc, needsNumConversion = 1;