summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCompExpr.c600
-rw-r--r--generic/tclParse.c40
2 files changed, 355 insertions, 285 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 9bda88e..7bab21a 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompExpr.c,v 1.53.2.7 2007/07/12 14:29:54 dgp Exp $
+ * RCS: @(#) $Id: tclCompExpr.c,v 1.53.2.8 2007/07/19 22:52:57 dgp Exp $
*/
#include "tclInt.h"
@@ -29,7 +29,10 @@
typedef struct OpNode {
int left; /* "Pointer" to the left operand. */
int right; /* "Pointer" to the right operand. */
- int parent; /* "Pointer" to the parent operand. */
+ union {
+ int parent; /* "Pointer" to the parent operand. */
+ int prev; /* "Pointer" joining incomplete tree stack */
+ } p;
unsigned char lexeme; /* Code that identifies the operator. */
unsigned char precedence; /* Precedence of the operator */
} OpNode;
@@ -83,12 +86,17 @@ enum OperandTypes {
* the inorder traversals of the completed tree we perform are known to visit
* the leaves in the same order as the original parse.
*
- * Those OpNodes that are themselves (roots of subexpression trees that are)
- * operands of some operator store in their parent field a "pointer" to the
- * OpNode of that operator. The parent field permits a destructive inorder
- * traversal of the tree within a non-recursive routine (ConvertTreeToTokens()
- * and CompileExprTree()). This means that even expression trees of great
- * depth pose no risk of blowing the C stack.
+ * In a completed parse tree, those OpNodes that are themselves (roots of
+ * subexpression trees that are) operands of some operator store in their
+ * p.parent field a "pointer" to the OpNode of that operator. The p.parent
+ * field permits a destructive inorder traversal of the tree within a
+ * non-recursive routine (ConvertTreeToTokens() and CompileExprTree()). This
+ * means that even expression trees of great depth pose no risk of blowing
+ * the C stack.
+ *
+ * While the parse tree is being constructed, the same memory space is used
+ * to hold the p.prev field which chains together a stack of incomplete
+ * trees awaiting their right operands.
*
* The lexeme field is filled in with the lexeme of the operator that is
* returned by the ParseLexeme() routine. Only lexemes for unary and
@@ -352,7 +360,6 @@ static const unsigned char prec[] = {
PREC_OPEN_PAREN, /* OPEN_PAREN */
PREC_UNARY, /* NOT*/
PREC_UNARY, /* BIT_NOT*/
- 0, 0, 0, 0, 0, 0, 0, 0,
};
/*
@@ -384,14 +391,12 @@ static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes,
Tcl_Obj *const litObjv[], Tcl_Obj *funcList,
Tcl_Token *tokenPtr, int *convertPtr,
CompileEnv *envPtr);
-static void ConvertTreeToTokens(Tcl_Interp *interp,
- const char *start, int numBytes, OpNode *nodes,
- Tcl_Obj *litList, Tcl_Token *tokenPtr,
+static void ConvertTreeToTokens(const char *start, int numBytes,
+ OpNode *nodes, Tcl_Token *tokenPtr,
Tcl_Parse *parsePtr);
static int CopyTokens(Tcl_Token *sourcePtr, Tcl_Parse *parsePtr);
static int GenerateTokensForLiteral(const char *script,
- int numBytes, Tcl_Obj *litList, int nextLiteral,
- Tcl_Parse *parsePtr);
+ int numBytes, Tcl_Parse *parsePtr);
static int ParseExpr(Tcl_Interp *interp, const char *start,
int numBytes, OpNode **opTreePtr,
Tcl_Obj *litList, Tcl_Obj *funcList,
@@ -435,9 +440,7 @@ static int
ParseExpr(
Tcl_Interp *interp, /* Used for error reporting. */
const char *start, /* Start of source string to parse. */
- int numBytes, /* Number of bytes in string. If < 0, the
- * string consists of all bytes up to the
- * first null character. */
+ int numBytes, /* Number of bytes in string. */
OpNode **opTreePtr, /* Points to space where a pointer to the
* allocated OpNode tree should go. */
Tcl_Obj *litList, /* List to append literals to. */
@@ -459,22 +462,26 @@ ParseExpr(
* for most expressions to parse with no need
* for array growth and reallocation. */
int nodesUsed = 0; /* Number of OpNodes filled. */
- int code = TCL_OK; /* Return code */
int scanned = 0; /* Capture number of byte scanned by
* parsing routines. */
- unsigned char lexeme = START; /* Most recent lexeme parsed. */
- int lastOpen = 0; /* Index of the OpNode of the OPEN_PAREN
- * operator we most recently matched. */
- int lastParsed = 0; /* Stores info about what the lexeme parsed
+ int lastParsed; /* Stores info about what the lexeme parsed
* the previous pass through the parsing loop
* was. If it was an operator, lastParsed is
* the index of the OpNode for that operator.
- * If it was not and operator, lastParsed holds
+ * If it was not an operator, lastParsed holds
* an OperandTypes value encoding what we
- * need to know about it. The initial value
- * is 0 indicating that as we start the "last
- * thing we parsed" was the START lexeme stored
- * in node 0. */
+ * need to know about it. */
+ int incomplete; /* Index of the most recent incomplete tree
+ * in the OpNode array. Heads a stack of
+ * incomplete trees linked by p.prev. */
+ int complete = OT_NONE; /* "Index" of the complete tree (that is, a
+ * complete subexpression) determined at the
+ * moment. OT_NONE is a nonsense value
+ * used only to silence compiler warnings.
+ * During a parse, complete will always hold
+ * an index or an OperandTypes value pointing
+ * to an actual leaf at the time the complete
+ * tree is needed. */
/* These variables control generation of the error message. */
Tcl_Obj *msg = NULL; /* The error message. */
@@ -496,32 +503,33 @@ ParseExpr(
* error message readable, we impose this limit
* on the substring size we extract. */
- if (numBytes < 0) {
- numBytes = (start ? strlen(start) : 0);
- }
-
TclParseInit(interp, start, numBytes, parsePtr);
nodes = (OpNode *) attemptckalloc(nodesAvailable * sizeof(OpNode));
if (nodes == NULL) {
TclNewLiteralStringObj(msg, "not enough memory to parse expression");
- code = TCL_ERROR;
- } else {
- /*
- * Initialize the parse tree with the special "START" node.
- */
-
- nodes->lexeme = lexeme;
- nodes->precedence = prec[lexeme];
- nodes->left = OT_NONE;
- nodes->right = OT_NONE;
- nodes->parent = -1;
- nodesUsed++;
+ goto error;
}
- while ((code == TCL_OK) && (lexeme != END)) {
+ /* Initialize the parse tree with the special "START" node. */
+ nodes->lexeme = START;
+ nodes->precedence = prec[START];
+ nodes->left = OT_NONE;
+ nodes->right = OT_NONE;
+ incomplete = lastParsed = nodesUsed;
+ nodesUsed++;
+
+ /*
+ * Main parsing loop parses one lexeme per iteration. We exit the
+ * loop only when there's a syntax error with a "goto error" which
+ * takes us to the error handling code following the loop, or when
+ * we've successfully completed the parse and we return to the caller.
+ */
+
+ while (1) {
OpNode *nodePtr; /* Points to the OpNode we may fill this
* pass through the loop. */
+ unsigned char lexeme; /* The lexeme we parse this iteration. */
Tcl_Obj *literal; /* Filled by the ParseLexeme() call when
* a literal is parsed that has a Tcl_Obj
* rep worth preserving. */
@@ -549,47 +557,40 @@ ParseExpr(
if (newPtr == NULL) {
TclNewLiteralStringObj(msg,
"not enough memory to parse expression");
- code = TCL_ERROR;
- continue;
+ goto error;
}
nodesAvailable = size;
nodes = newPtr;
}
nodePtr = nodes + nodesUsed;
- /*
- * Skip white space between lexemes.
- */
-
+ /* Skip white space between lexemes. */
scanned = TclParseAllWhiteSpace(start, numBytes);
start += scanned;
numBytes -= scanned;
scanned = ParseLexeme(start, numBytes, &lexeme, &literal);
- /*
- * Use context to categorize the lexemes that are ambiguous.
- */
-
+ /* Use context to categorize the lexemes that are ambiguous. */
if ((NODE_TYPE & lexeme) == 0) {
switch (lexeme) {
case INVALID:
msg = Tcl_ObjPrintf(
"invalid character \"%.*s\"", scanned, start);
- code = TCL_ERROR;
- continue;
+ goto error;
case INCOMPLETE:
msg = Tcl_ObjPrintf(
"incomplete operator \"%.*s\"", scanned, start);
- code = TCL_ERROR;
- continue;
+ goto error;
case BAREWORD:
+
/*
* Most barewords in an expression are a syntax error.
* The exceptions are that when a bareword is followed by
* an open paren, it might be a function call, and when the
* bareword is a legal literal boolean value, we accept that
* as well.
+
*/
if (start[scanned+TclParseAllWhiteSpace(
start+scanned, numBytes-scanned)] == '(') {
@@ -601,6 +602,7 @@ ParseExpr(
* it, so we keep a separate list of all the function
* names we've parsed in the order we found them.
*/
+
Tcl_ListObjAppendElement(NULL, funcList, literal);
} else {
int b;
@@ -622,29 +624,27 @@ ParseExpr(
" or \"%.*s%s(...)\" or ...",
(scanned < limit) ? scanned : limit - 3,
start, (scanned < limit) ? "" : "...");
- code = TCL_ERROR;
- continue;
+ goto error;
}
}
break;
case PLUS:
case MINUS:
if (IsOperator(lastParsed)) {
+
/*
* A "+" or "-" coming just after another operator
* must be interpreted as a unary operator.
*/
+
lexeme |= UNARY;
} else {
lexeme |= BINARY;
}
}
- }
-
- /*
- * Handle lexeme based on its category.
- */
+ } /* Uncategorized lexemes */
+ /* Handle lexeme based on its category. */
switch (NODE_TYPE & lexeme) {
/*
@@ -656,24 +656,15 @@ ParseExpr(
case LEAF: {
Tcl_Token *tokenPtr;
- const char *end;
+ const char *end = start;
int wordIndex;
+ int code = TCL_OK;
/*
- * Store away any literals on the list now, so they'll
- * be available for our caller to free if we error out
- * of this routine. [Bug 1705778, leak K23]
+ * A leaf operand appearing just after something that's not an
+ * operator is a syntax error.
*/
- switch (lexeme) {
- case NUMBER:
- case BOOLEAN:
- Tcl_ListObjAppendElement(NULL, litList, literal);
- break;
- default:
- break;
- }
-
if (NotOperator(lastParsed)) {
msg = Tcl_ObjPrintf("missing operator at %s", mark);
if (lastStart[0] == '0') {
@@ -688,14 +679,19 @@ ParseExpr(
scanned = 0;
insertMark = 1;
parsePtr->errorType = TCL_PARSE_BAD_NUMBER;
- code = TCL_ERROR;
- continue;
+
+ /* Free any literal to avoid a memleak. */
+ if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) {
+ Tcl_DecrRefCount(literal);
+ }
+ goto error;
}
switch (lexeme) {
case NUMBER:
case BOOLEAN:
- lastParsed = OT_LITERAL;
+ Tcl_ListObjAppendElement(NULL, litList, literal);
+ complete = lastParsed = OT_LITERAL;
start += scanned;
numBytes -= scanned;
continue;
@@ -721,41 +717,27 @@ ParseExpr(
case QUOTED:
code = Tcl_ParseQuotedString(interp, start, numBytes,
parsePtr, 1, &end);
- if (code != TCL_OK) {
- /* TODO: This adjustment of scanned is untested and
- * and uncommented. Correct that. Its only possible
- * purpose is to influence the error message. */
- scanned = parsePtr->term - start;
- scanned += (scanned < numBytes);
- continue;
- }
scanned = end - start;
break;
case BRACED:
code = Tcl_ParseBraces(interp, start, numBytes,
parsePtr, 1, &end);
- if (code != TCL_OK) {
- continue;
- }
scanned = end - start;
break;
case VARIABLE:
code = Tcl_ParseVarName(interp, start, numBytes, parsePtr, 1);
- if (code != TCL_OK) {
- /* TODO: This adjustment of scanned is untested and
- * and uncommented. Correct that. Its only possible
- * purpose is to influence the error message. */
- scanned = parsePtr->term - start;
- scanned += (scanned < numBytes);
- continue;
- }
+
+ /*
+ * Handle the quirk that Tcl_ParseVarName reports a successful
+ * parse even when it gets only a "$" with no variable name.
+ */
+
tokenPtr = parsePtr->tokenPtr + wordIndex + 1;
- if (tokenPtr->type != TCL_TOKEN_VARIABLE) {
+ if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) {
TclNewLiteralStringObj(msg, "invalid character \"$\"");
- code = TCL_ERROR;
- continue;
+ goto error;
}
scanned = tokenPtr->size;
break;
@@ -799,20 +781,34 @@ ParseExpr(
TclStackFree(interp, nestedPtr);
end = start;
start = tokenPtr->start;
- if (code != TCL_OK) {
- /* TODO: This adjustment of scanned is untested and
- * and uncommented. Correct that. Its only possible
- * purpose is to influence the error message. */
- scanned = parsePtr->term - start;
- scanned += (scanned < numBytes);
- continue;
- }
scanned = end - start;
tokenPtr->size = scanned;
parsePtr->numTokens++;
break;
}
}
+ if (code != TCL_OK) {
+
+ /*
+ * Here we handle all the syntax errors generated by
+ * the Tcl_Token generating parsing routines called in the
+ * switch just above. If the value of parsePtr->incomplete
+ * is 1, then the error was an unbalanced '[', '(', '{',
+ * or '"' and parsePtr->term is pointing to that unbalanced
+ * character. If the value of parsePtr->incomplete is 0,
+ * then the error is one of lacking whitespace following a
+ * quoted word, for example: expr {[an error {foo}bar]},
+ * and parsePtr->term points to where the whitespace is
+ * missing. We reset our values of start and scanned so that
+ * when our error message is constructed, the location of
+ * the syntax error is sure to appear in it, even if the
+ * quoted expression is truncated.
+ */
+
+ start = parsePtr->term;
+ scanned = parsePtr->incomplete;
+ goto error;
+ }
tokenPtr = parsePtr->tokenPtr + wordIndex;
tokenPtr->size = scanned;
@@ -839,56 +835,84 @@ ParseExpr(
literal = Tcl_NewObj();
if (TclWordKnownAtCompileTime(tokenPtr, literal)) {
Tcl_ListObjAppendElement(NULL, litList, literal);
- lastParsed = OT_LITERAL;
+ complete = lastParsed = OT_LITERAL;
parsePtr->numTokens = wordIndex;
break;
}
Tcl_DecrRefCount(literal);
}
- lastParsed = OT_TOKENS;
+ complete = lastParsed = OT_TOKENS;
break;
- }
+ } /* case LEAF */
case UNARY:
+
+ /*
+ * A unary operator appearing just after something that's not an
+ * operator is a syntax error -- something trying to be the left
+ * operand of an operator that doesn't take one.
+ */
+
if (NotOperator(lastParsed)) {
msg = Tcl_ObjPrintf("missing operator at %s", mark);
scanned = 0;
insertMark = 1;
- code = TCL_ERROR;
- continue;
+ goto error;
}
- lastParsed = nodesUsed;
- nodePtr->lexeme = lexeme;
- nodePtr->precedence = prec[lexeme];
- nodePtr->left = OT_NONE;
- nodePtr->right = OT_NONE;
- nodePtr->parent = nodePtr - nodes - 1;
+
+ /* Create an OpNode for the unary operator */
+ nodePtr->lexeme = lexeme; /* Remember the operator... */
+ nodePtr->precedence = prec[lexeme]; /* ... and its precedence. */
+ nodePtr->left = OT_NONE; /* No left operand */
+ nodePtr->right = OT_NONE; /* Right operand not
+ * yet known. */
+
+ /*
+ * This unary operator is a new incomplete tree, so push it
+ * onto our stack of incomplete trees. Also remember it as
+ * the last lexeme we parsed.
+ */
+
+ nodePtr->p.prev = incomplete;
+ incomplete = lastParsed = nodesUsed;
nodesUsed++;
break;
case BINARY: {
- OpNode *otherPtr = NULL;
+ OpNode *incompletePtr;
unsigned char precedence = prec[lexeme];
+ /*
+ * A binary operator appearing just after another operator is a
+ * syntax error -- one of the two operators is missing an operand.
+ */
+
if (IsOperator(lastParsed)) {
if ((lexeme == CLOSE_PAREN)
&& (nodePtr[-1].lexeme == OPEN_PAREN)) {
if (nodePtr[-2].lexeme == FUNCTION) {
+
/*
* Normally, "()" is a syntax error, but as a special
* case accept it as an argument list for a function.
+ * Treat this as a special LEAF lexeme, and restart
+ * the parsing loop with zero characters scanned.
+ * We'll parse the ")" again the next time through,
+ * but with the OT_EMPTY leaf as the subexpression
+ * between the parens.
*/
scanned = 0;
- lastParsed = OT_EMPTY;
+ complete = lastParsed = OT_EMPTY;
+
+ /* TODO: explain */
nodePtr[-1].left--;
break;
}
msg = Tcl_ObjPrintf("empty subexpression at %s", mark);
scanned = 0;
insertMark = 1;
- code = TCL_ERROR;
- continue;
+ goto error;
}
if (nodePtr[-1].precedence > precedence) {
@@ -920,185 +944,240 @@ ParseExpr(
scanned = 0;
insertMark = 1;
}
- code = TCL_ERROR;
- continue;
+ goto error;
}
- if (lastParsed == OT_NONE) {
- otherPtr = nodes + lastOpen - 1;
- lastParsed = lastOpen;
- } else {
- otherPtr = nodePtr - 1;
- }
+ /*
+ * Here is where the tree comes together. At this point, we
+ * have a stack of incomplete trees corresponding to
+ * substrings that are incomplete expressions, followed by
+ * a complete tree corresponding to a substring that is itself
+ * a complete expression, followed by the binary operator we have
+ * just parsed. The incomplete trees can each be completed by
+ * adding a right operand.
+ *
+ * To illustrate with an example, when we parse the expression
+ * "1+2*3-4" and we reach this point having just parsed the "-"
+ * operator, we have these incomplete trees: START, "1+", and
+ * "2*". Next we have the complete subexpression "3". Last is
+ * the "-" we've just parsed.
+ *
+ * The next step is to join our complete tree to an operator.
+ * The choice is governed by the precedence and associativity
+ * of the competing operators. If we connect it as the right
+ * operand of our most recent incomplete tree, we get a new
+ * complete tree, and we can repeat the process. The while
+ * loop following repeats this until precedence indicates it
+ * is time to join the complete tree as the left operand of
+ * the just parsed binary operator.
+ *
+ * Continuing the example, the first pass through the loop
+ * will join "3" to "2*"; the next pass will join "2*3" to
+ * "1+". Then we'll exit the loop and join "1+2*3" to "-".
+ * When we return to parse another lexeme, our stack of
+ * incomplete trees is START and "1+2*3-".
+ */
+
while (1) {
- /*
- * lastParsed is "index" of item to be linked.
- * otherPtr points to competing operator.
- */
+ incompletePtr = nodes + incomplete;
- if (otherPtr->precedence < precedence) {
+ if (incompletePtr->precedence < precedence) {
break;
}
- if (otherPtr->precedence == precedence) {
- /*
- * Right association rules for exponentiation.
- */
+ if (incompletePtr->precedence == precedence) {
+ /* Right association rules for exponentiation. */
if (lexeme == EXPON) {
break;
}
/*
- * Special association rules for the ternary operators.
+ * Special association rules for the conditional operators.
* The "?" and ":" operators have equal precedence, but
* must be linked up in sensible pairs.
*/
- if ((otherPtr->lexeme == QUESTION)
- && (NotOperator(lastParsed)
- || (nodes[lastParsed].lexeme != COLON))) {
+ if ((incompletePtr->lexeme == QUESTION)
+ && (NotOperator(complete)
+ || (nodes[complete].lexeme != COLON))) {
break;
}
- if ((otherPtr->lexeme == COLON) && (lexeme == QUESTION)) {
+ if ((incompletePtr->lexeme == COLON)
+ && (lexeme == QUESTION)) {
break;
}
}
- /*
- * We should link the lastParsed item to the otherPtr as its
- * right operand. First make some syntax checks.
- */
+ /* Some special syntax checks... */
- if ((otherPtr->lexeme == OPEN_PAREN)
+ /* Parens must balance */
+ if ((incompletePtr->lexeme == OPEN_PAREN)
&& (lexeme != CLOSE_PAREN)) {
TclNewLiteralStringObj(msg, "unbalanced open paren");
parsePtr->errorType = TCL_PARSE_MISSING_PAREN;
- code = TCL_ERROR;
- break;
+ goto error;
}
- if ((otherPtr->lexeme == QUESTION)
- && (NotOperator(lastParsed)
- || (nodes[lastParsed].lexeme != COLON))) {
+
+ /* Right operand of "?" must be ":" */
+ if ((incompletePtr->lexeme == QUESTION)
+ && (NotOperator(complete)
+ || (nodes[complete].lexeme != COLON))) {
msg = Tcl_ObjPrintf(
"missing operator \":\" at %s", mark);
scanned = 0;
insertMark = 1;
- code = TCL_ERROR;
- break;
+ goto error;
}
- if (IsOperator(lastParsed)
- && (nodes[lastParsed].lexeme == COLON)
- && (otherPtr->lexeme != QUESTION)) {
+
+ /* Operator ":" may only be right operand of "?" */
+ if (IsOperator(complete)
+ && (nodes[complete].lexeme == COLON)
+ && (incompletePtr->lexeme != QUESTION)) {
TclNewLiteralStringObj(msg,
- "unexpected operator \":\" without preceding \"?\"");
- code = TCL_ERROR;
- break;
+ "unexpected operator \":\" "
+ "without preceding \"?\"");
+ goto error;
}
/*
- * Link orphan as right operand of otherPtr.
+ * Attach complete tree as right operand of most recent
+ * incomplete tree.
*/
- otherPtr->right = lastParsed;
- if (lastParsed >= 0) {
- nodes[lastParsed].parent = otherPtr - nodes;
+ incompletePtr->right = complete;
+ if (IsOperator(complete)) {
+ nodes[complete].p.parent = incomplete;
}
- lastParsed = otherPtr - nodes;
- if (otherPtr->lexeme == OPEN_PAREN) {
+ if (incompletePtr->lexeme == START) {
+
/*
- * CLOSE_PAREN can only close one OPEN_PAREN.
+ * Completing the START tree indicates we're done.
+ * Transfer the parse tree to the caller and return.
*/
- break;
+ *opTreePtr = nodes;
+ return TCL_OK;
}
- if (otherPtr->lexeme == START) {
- /*
- * Don't backtrack beyond the start.
- */
+ /*
+ * With a right operand attached, last incomplete tree has
+ * become the complete tree. Pop it from the incomplete
+ * tree stack.
+ */
+ complete = incomplete;
+ incomplete = incompletePtr->p.prev;
+
+ /* CLOSE_PAREN can only close one OPEN_PAREN. */
+ if (incompletePtr->lexeme == OPEN_PAREN) {
break;
}
- otherPtr = nodes + otherPtr->parent;
- }
- if (code != TCL_OK) {
- continue;
}
+ /* More syntax checks... */
+
+ /* Parens must balance. */
if (lexeme == CLOSE_PAREN) {
- if (otherPtr->lexeme == START) {
+ if (incompletePtr->lexeme != OPEN_PAREN) {
TclNewLiteralStringObj(msg, "unbalanced close paren");
- code = TCL_ERROR;
- continue;
+ goto error;
}
- lastParsed = OT_NONE;
- lastOpen = otherPtr - nodes;
- otherPtr->left++;
-
- /*
- * Create no node for a CLOSE_PAREN lexeme.
- */
-
- break;
}
+
+ /* Commas must appear only in function argument lists. */
if (lexeme == COMMA) {
- if ((otherPtr->lexeme != OPEN_PAREN)
- || (otherPtr[-1].lexeme != FUNCTION)) {
+ if ((incompletePtr->lexeme != OPEN_PAREN)
+ || (incompletePtr[-1].lexeme != FUNCTION)) {
TclNewLiteralStringObj(msg,
"unexpected \",\" outside function argument list");
- code = TCL_ERROR;
- continue;
+ goto error;
}
- otherPtr->left++;
+
+ /* TODO: explain */
+ incompletePtr->left++;
}
- if (IsOperator(lastParsed) && (nodes[lastParsed].lexeme == COLON)) {
+
+ /* Operator ":" may only be right operand of "?" */
+ if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) {
TclNewLiteralStringObj(msg,
"unexpected operator \":\" without preceding \"?\"");
- code = TCL_ERROR;
- continue;
- }
- if (lexeme == END) {
- continue;
+ goto error;
}
- /*
- * Link orphan as left operand of new node.
- */
+ /* Create no node for a CLOSE_PAREN lexeme. */
+ if (lexeme == CLOSE_PAREN) {
+ /* TODO: explain */
+ incompletePtr->left++;
+ break;
+ }
+
+ /* Link complete tree as left operand of new node. */
nodePtr->lexeme = lexeme;
nodePtr->precedence = precedence;
- nodePtr->right = -1;
- nodePtr->left = lastParsed;
- if (lastParsed < 0) {
- nodePtr->parent = nodePtr - nodes - 1;
- } else {
- nodePtr->parent = nodes[lastParsed].parent;
- nodes[lastParsed].parent = nodePtr - nodes;
+ nodePtr->right = OT_NONE;
+ nodePtr->left = complete;
+ if (IsOperator(complete)) {
+ nodes[complete].p.parent = nodesUsed;
}
- lastParsed = nodesUsed;
+
+ /*
+ * With a left operand attached and a right operand missing,
+ * the just-parsed binary operator is root of a new incomplete
+ * tree. Push it onto the stack of incomplete trees.
+ */
+
+ nodePtr->p.prev = incomplete;
+ incomplete = lastParsed = nodesUsed;
nodesUsed++;
break;
- }
- }
+ } /* case BINARY */
+ } /* lexeme handler */
+ /* Advance past the just-parsed lexeme */
start += scanned;
numBytes -= scanned;
+ } /* main parsing loop */
+
+ error:
+
+ /*
+ * We only get here if there's been an error.
+ * Any errors that didn't get a suitable parsePtr->errorType,
+ * get recorded as syntax errors.
+ */
+
+ if (parsePtr->errorType == TCL_PARSE_SUCCESS) {
+ parsePtr->errorType = TCL_PARSE_SYNTAX;
}
- if (code != TCL_OK && nodes != NULL) {
+ /* Free any partial parse tree we've built. */
+ if (nodes != NULL) {
ckfree((char*) nodes);
}
- if (code == TCL_OK) {
- *opTreePtr = nodes;
- } else if (interp == NULL) {
+
+ if (interp == NULL) {
+ /* Nowhere to report an error message, so just free it */
if (msg) {
Tcl_DecrRefCount(msg);
}
} else {
+
+ /*
+ * Construct the complete error message. Start with the simple
+ * error message, pulled from the interp result if necessary...
+ */
+
if (msg == NULL) {
msg = Tcl_GetObjResult(interp);
}
+
+ /*
+ * Add a detailed quote from the bad expression, displaying and
+ * sometimes marking the precise location of the syntax error.
+ */
+
Tcl_AppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"",
((start - limit) < parsePtr->string) ? "" : "...",
((start - limit) < parsePtr->string)
@@ -1111,12 +1190,16 @@ ParseExpr(
? parsePtr->end - (start + scanned) : limit-3,
start + scanned,
(start + scanned + limit > parsePtr->end) ? "" : "...");
+
+ /* Next, append any postscript message. */
if (post != NULL) {
Tcl_AppendToObj(msg, ";\n", -1);
Tcl_AppendObjToObj(msg, post);
Tcl_DecrRefCount(post);
}
Tcl_SetObjResult(interp, msg);
+
+ /* Finally, place context information in the errorInfo. */
numBytes = parsePtr->end - parsePtr->string;
Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
"\n (parsing expression \"%.*s%s\")",
@@ -1124,10 +1207,7 @@ ParseExpr(
parsePtr->string, (numBytes < limit) ? "" : "..."));
}
- if (code != TCL_OK && parsePtr->errorType == TCL_PARSE_SUCCESS) {
- parsePtr->errorType = TCL_PARSE_SYNTAX;
- }
- return code;
+ return TCL_ERROR;
}
/*
@@ -1149,8 +1229,6 @@ static int
GenerateTokensForLiteral(
const char *script,
int numBytes,
- Tcl_Obj *litList,
- int nextLiteral,
Tcl_Parse *parsePtr)
{
int scanned;
@@ -1158,10 +1236,7 @@ GenerateTokensForLiteral(
Tcl_Token *destPtr;
unsigned char lexeme;
- /*
- * Have to reparse to get pointers into source string.
- */
-
+ /* Have to reparse to get pointers into source string. */
scanned = TclParseAllWhiteSpace(start, numBytes);
start +=scanned;
scanned = ParseLexeme(start, numBytes-scanned, &lexeme, NULL);
@@ -1247,16 +1322,13 @@ CopyTokens(
static void
ConvertTreeToTokens(
- Tcl_Interp *interp,
const char *start,
int numBytes,
OpNode *nodes,
- Tcl_Obj *litList,
Tcl_Token *tokenPtr,
Tcl_Parse *parsePtr)
{
OpNode *nodePtr = nodes;
- int nextLiteral = 0;
int scanned, copied, tokenIdx;
unsigned char lexeme;
Tcl_Token *destPtr;
@@ -1298,7 +1370,7 @@ ConvertTreeToTokens(
destPtr->numComponents = 0;
parsePtr->numTokens += 2;
}
- start +=scanned;
+ start += scanned;
numBytes -= scanned;
}
switch (right) {
@@ -1306,7 +1378,7 @@ ConvertTreeToTokens(
break;
case OT_LITERAL:
scanned = GenerateTokensForLiteral(start, numBytes,
- litList, nextLiteral++, parsePtr);
+ parsePtr);
start +=scanned;
numBytes -= scanned;
break;
@@ -1346,7 +1418,7 @@ ConvertTreeToTokens(
destPtr->size = start - destPtr->start;
destPtr->numComponents = parsePtr->numTokens - tokenIdx - 1;
}
- nodePtr = nodes + nodePtr->parent;
+ nodePtr = nodes + nodePtr->p.parent;
}
break;
case BINARY:
@@ -1372,7 +1444,7 @@ ConvertTreeToTokens(
switch (left) {
case OT_LITERAL:
scanned = GenerateTokensForLiteral(start, numBytes,
- litList, nextLiteral++, parsePtr);
+ parsePtr);
start +=scanned;
numBytes -= scanned;
break;
@@ -1412,7 +1484,7 @@ ConvertTreeToTokens(
switch (right) {
case OT_LITERAL:
scanned = GenerateTokensForLiteral(start, numBytes,
- litList, nextLiteral++, parsePtr);
+ parsePtr);
start +=scanned;
numBytes -= scanned;
break;
@@ -1434,7 +1506,7 @@ ConvertTreeToTokens(
destPtr->size = start - destPtr->start;
destPtr->numComponents = parsePtr->numTokens-tokenIdx-1;
}
- nodePtr = nodes + nodePtr->parent;
+ nodePtr = nodes + nodePtr->p.parent;
}
break;
}
@@ -1478,34 +1550,34 @@ Tcl_ParseExpr(
* the parsed expression; any previous
* information in the structure is ignored. */
{
+ int code;
OpNode *opTree = NULL; /* Will point to the tree of operators */
Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */
Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/
Tcl_Parse *exprParsePtr =
(Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse));
/* Holds the Tcl_Tokens of substitutions */
- int code = ParseExpr(interp, start, numBytes, &opTree, litList,
- funcList, exprParsePtr, 1 /* parseOnly */);
- int errorType = exprParsePtr->errorType;
- const char* term = exprParsePtr->term;
if (numBytes < 0) {
numBytes = (start ? strlen(start) : 0);
}
+ code = ParseExpr(interp, start, numBytes, &opTree, litList,
+ funcList, exprParsePtr, 1 /* parseOnly */);
+ Tcl_DecrRefCount(funcList);
+ Tcl_DecrRefCount(litList);
+
TclParseInit(interp, start, numBytes, parsePtr);
if (code == TCL_OK) {
- ConvertTreeToTokens(interp, start, numBytes, opTree, litList,
- exprParsePtr->tokenPtr, parsePtr);
+ ConvertTreeToTokens(start, numBytes,
+ opTree, exprParsePtr->tokenPtr, parsePtr);
} else {
- parsePtr->term = term;
- parsePtr->errorType = errorType;
+ parsePtr->term = exprParsePtr->term;
+ parsePtr->errorType = exprParsePtr->errorType;
}
Tcl_FreeParse(exprParsePtr);
TclStackFree(interp, exprParsePtr);
- Tcl_DecrRefCount(funcList);
- Tcl_DecrRefCount(litList);
ckfree((char *) opTree);
return code;
}
@@ -1783,9 +1855,7 @@ int
TclCompileExpr(
Tcl_Interp *interp, /* Used for error reporting. */
const char *script, /* The source script to compile. */
- int numBytes, /* Number of bytes in script. If < 0, the
- * string consists of all bytes up to the
- * first null character. */
+ int numBytes, /* Number of bytes in script. */
CompileEnv *envPtr) /* Holds resulting instructions. */
{
OpNode *opTree = NULL; /* Will point to the tree of operators */
@@ -1944,7 +2014,7 @@ CompileExprTree(
TclEmitOpcode(instruction[nodePtr->lexeme], envPtr);
*convertPtr = 0;
}
- nodePtr = nodes + nodePtr->parent;
+ nodePtr = nodes + nodePtr->p.parent;
}
break;
case BINARY:
@@ -2089,7 +2159,7 @@ CompileExprTree(
jumpPtr = jumpPtr->next;
TclStackFree(interp, freePtr);
}
- nodePtr = nodes + nodePtr->parent;
+ nodePtr = nodes + nodePtr->p.parent;
}
break;
}
@@ -2149,7 +2219,7 @@ TclSingleOpCmd(
nodes[1].lexeme = lexeme;
nodes[1].left = OT_LITERAL;
nodes[1].right = OT_LITERAL;
- nodes[1].parent = 0;
+ nodes[1].p.parent = 0;
return OpCmd(interp, nodes, objv+1);
}
@@ -2188,10 +2258,10 @@ TclSortingOpCmd(
litObjv[2*(i-1)] = objv[i];
nodes[2*(i-1)].lexeme = AND;
nodes[2*(i-1)].left = lastAnd;
- nodes[lastAnd].parent = 2*(i-1);
+ nodes[lastAnd].p.parent = 2*(i-1);
nodes[2*(i-1)].right = 2*(i-1)+1;
- nodes[2*(i-1)+1].parent= 2*(i-1);
+ nodes[2*(i-1)+1].p.parent= 2*(i-1);
lastAnd = 2*(i-1);
}
@@ -2202,7 +2272,7 @@ TclSortingOpCmd(
nodes[2*(objc-2)-1].right = OT_LITERAL;
nodes[0].right = lastAnd;
- nodes[lastAnd].parent = 0;
+ nodes[lastAnd].p.parent = 0;
code = OpCmd(interp, nodes, litObjv);
@@ -2246,7 +2316,7 @@ TclVariadicOpCmd(
nodes[1].lexeme = lexeme;
nodes[1].left = OT_LITERAL;
nodes[1].right = OT_LITERAL;
- nodes[1].parent = 0;
+ nodes[1].p.parent = 0;
} else {
if (lexeme == DIVIDE) {
litObjv[0] = Tcl_NewDoubleObj(1.0);
@@ -2260,7 +2330,7 @@ TclVariadicOpCmd(
nodes[1].lexeme = lexeme;
nodes[1].left = OT_LITERAL;
nodes[1].right = OT_LITERAL;
- nodes[1].parent = 0;
+ nodes[1].p.parent = 0;
}
code = OpCmd(interp, nodes, litObjv);
@@ -2279,7 +2349,7 @@ TclVariadicOpCmd(
nodes[i].left = OT_LITERAL;
nodes[i].right = lastOp;
if (lastOp >= 0) {
- nodes[lastOp].parent = i;
+ nodes[lastOp].p.parent = i;
}
lastOp = i;
}
@@ -2288,14 +2358,14 @@ TclVariadicOpCmd(
nodes[i].lexeme = lexeme;
nodes[i].left = lastOp;
if (lastOp >= 0) {
- nodes[lastOp].parent = i;
+ nodes[lastOp].p.parent = i;
}
nodes[i].right = OT_LITERAL;
lastOp = i;
}
}
nodes[0].right = lastOp;
- nodes[lastOp].parent = 0;
+ nodes[lastOp].p.parent = 0;
code = OpCmd(interp, nodes, objv+1);
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 1732007..84ffa84 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclParse.c,v 1.52.2.3 2007/06/25 18:53:31 dgp Exp $
+ * RCS: @(#) $Id: tclParse.c,v 1.52.2.4 2007/07/19 22:52:58 dgp Exp $
*/
#include "tclInt.h"
@@ -174,8 +174,8 @@ static CONST char charTypeTable[] = {
static int CommandComplete(CONST char *script, int numBytes);
static int ParseComment(CONST char *src, int numBytes,
Tcl_Parse *parsePtr);
-static int ParseTokens(Tcl_Interp *interp, CONST char *src, int numBytes,
- int mask, int flags, Tcl_Parse *parsePtr);
+static int ParseTokens(CONST char *src, int numBytes, int mask,
+ int flags, Tcl_Parse *parsePtr);
static int ParseWhiteSpace(CONST char *src, int numBytes,
int *incompletePtr, char *typePtr);
@@ -398,7 +398,7 @@ Tcl_ParseCommand(
* the work.
*/
- if (ParseTokens(interp, src, numBytes, TYPE_SPACE|terminators,
+ if (ParseTokens(src, numBytes, TYPE_SPACE|terminators,
TCL_SUBST_ALL, parsePtr) != TCL_OK) {
goto error;
}
@@ -1028,7 +1028,6 @@ ParseComment(
static int
ParseTokens(
- Tcl_Interp *interp,
register CONST char *src, /* First character to parse. */
register int numBytes, /* Max number of bytes to scan. */
int mask, /* Specifies when to stop parsing. The parse
@@ -1121,14 +1120,15 @@ ParseTokens(
src++;
numBytes--;
- nestedPtr = (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse));
+ nestedPtr = (Tcl_Parse *)
+ TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
while (1) {
if (Tcl_ParseCommand(parsePtr->interp, src,
numBytes, 1, nestedPtr) != TCL_OK) {
parsePtr->errorType = nestedPtr->errorType;
parsePtr->term = nestedPtr->term;
parsePtr->incomplete = nestedPtr->incomplete;
- TclStackFree(interp, nestedPtr);
+ TclStackFree(parsePtr->interp, nestedPtr);
return TCL_ERROR;
}
src = nestedPtr->commandStart + nestedPtr->commandSize;
@@ -1153,11 +1153,11 @@ ParseTokens(
parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
parsePtr->term = tokenPtr->start;
parsePtr->incomplete = 1;
- TclStackFree(interp, nestedPtr);
+ TclStackFree(parsePtr->interp, nestedPtr);
return TCL_ERROR;
}
}
- TclStackFree(interp, nestedPtr);
+ TclStackFree(parsePtr->interp, nestedPtr);
tokenPtr->type = TCL_TOKEN_COMMAND;
tokenPtr->size = src - tokenPtr->start;
parsePtr->numTokens++;
@@ -1425,9 +1425,9 @@ Tcl_ParseVarName(
src++;
}
if (numBytes == 0) {
- if (interp != NULL) {
- Tcl_SetResult(interp, "missing close-brace for variable name",
- TCL_STATIC);
+ if (parsePtr->interp != NULL) {
+ Tcl_SetResult(parsePtr->interp,
+ "missing close-brace for variable name", TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;
parsePtr->term = tokenPtr->start-1;
@@ -1488,7 +1488,7 @@ Tcl_ParseVarName(
* any number of substitutions.
*/
- if (TCL_OK != ParseTokens(interp, src+1, numBytes-1, TYPE_CLOSE_PAREN,
+ if (TCL_OK != ParseTokens(src+1, numBytes-1, TYPE_CLOSE_PAREN,
TCL_SUBST_ALL, parsePtr)) {
goto error;
}
@@ -1764,7 +1764,7 @@ Tcl_ParseBraces(
parsePtr->errorType = TCL_PARSE_MISSING_BRACE;
parsePtr->term = start;
parsePtr->incomplete = 1;
- if (interp == NULL) {
+ if (parsePtr->interp == NULL) {
/*
* Skip straight to the exit code since we have no interpreter to put
* error message in.
@@ -1773,7 +1773,7 @@ Tcl_ParseBraces(
goto error;
}
- Tcl_SetResult(interp, "missing close-brace", TCL_STATIC);
+ Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC);
/*
* Guess if the problem is due to comments by searching the source string
@@ -1795,7 +1795,7 @@ Tcl_ParseBraces(
break;
case '#' :
if (openBrace && (isspace(UCHAR(src[-1])))) {
- Tcl_AppendResult(interp,
+ Tcl_AppendResult(parsePtr->interp,
": possible unbalanced brace in comment",
(char *) NULL);
goto error;
@@ -1870,12 +1870,12 @@ Tcl_ParseQuotedString(
TclParseInit(interp, start, numBytes, parsePtr);
}
- if (TCL_OK != ParseTokens(interp, start+1, numBytes-1, TYPE_QUOTE,
+ if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE,
TCL_SUBST_ALL, parsePtr)) {
goto error;
}
if (*parsePtr->term != '"') {
- if (interp != NULL) {
+ if (parsePtr->interp != NULL) {
Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;
@@ -1932,7 +1932,7 @@ Tcl_SubstObj(
* inhibit types of substitution.
*/
- if (TCL_OK != ParseTokens(interp, p, length, /* mask */ 0, flags, parsePtr)) {
+ if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {
/*
* There was a parse error. Save the error message for possible
* reporting later.
@@ -1960,7 +1960,7 @@ Tcl_SubstObj(
parsePtr->incomplete = 0;
parsePtr->errorType = TCL_PARSE_SUCCESS;
} while (TCL_OK !=
- ParseTokens(interp, p, parsePtr->end - p, 0, flags, parsePtr));
+ ParseTokens(p, parsePtr->end - p, 0, flags, parsePtr));
/*
* The good parse will have to be followed by {, (, or [.