diff options
author | dgp <dgp@users.sourceforge.net> | 2007-08-06 20:20:59 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2007-08-06 20:20:59 (GMT) |
commit | 6defebeafdc2b6272b3a90a42a80de6e53225cfc (patch) | |
tree | d4236aa1c2cb95109767a51731d7177898c49a6f | |
parent | d8bf2f02b72a097b5669a09b66a62f75d7952850 (diff) | |
download | tcl-6defebeafdc2b6272b3a90a42a80de6e53225cfc.zip tcl-6defebeafdc2b6272b3a90a42a80de6e53225cfc.tar.gz tcl-6defebeafdc2b6272b3a90a42a80de6e53225cfc.tar.bz2 |
* tests/parseExpr.test: Update source file name of expr parser code.
* generic/tclCompExpr.c: Added a "mark" field to the OpNode
struct, which is used to guide tree traversal. This field costs
nothing since alignement requirements used the memory already.
Rewrote ConvertTreeToTokens() to use the new field, which permitted
consolidation of utility routines CopyTokens() and
GenerateTokensForLiteral().
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 547 | ||||
-rw-r--r-- | tests/parseExpr.test | 6 |
3 files changed, 299 insertions, 265 deletions
@@ -1,3 +1,14 @@ +2007-08-06 Don Porter <dgp@users.sourceforge.net> + + * tests/parseExpr.test: Update source file name of expr parser code. + + * generic/tclCompExpr.c: Added a "mark" field to the OpNode + struct, which is used to guide tree traversal. This field costs + nothing since alignement requirements used the memory already. + Rewrote ConvertTreeToTokens() to use the new field, which permitted + consolidation of utility routines CopyTokens() and + GenerateTokensForLiteral(). + 2007-08-06 Kevin B. Kenny <kennykb@users.sf.net> * generic/tclGetDate.y: Added a cast to the definition of YYFREE to diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index f744974..2e4704f 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompExpr.c,v 1.72 2007/07/18 21:10:45 dgp Exp $ + * RCS: @(#) $Id: tclCompExpr.c,v 1.73 2007/08/06 20:20:59 dgp Exp $ */ #include "tclInt.h" @@ -35,6 +35,7 @@ typedef struct OpNode { } p; unsigned char lexeme; /* Code that identifies the operator. */ unsigned char precedence; /* Precedence of the operator */ + unsigned char mark; /* Mark used to control inorder traversal. */ } OpNode; /* @@ -103,6 +104,20 @@ enum OperandTypes { * binary operators get stored in an OpNode. Other lexmes get different * treatement. * + * The precedence field provides a place to store the precedence of the + * operator, so it need not be looked up again and again. + * + * The mark field is use to control the inorder traversal of the tree, so + * that it can be done non-recursively. The mark values are: + */ + +enum Marks { + MARK_LEFT, /* Next step of traversal is to visit left subtree */ + MARK_RIGHT, /* Next step of traversal is to visit right subtree */ + MARK_PARENT, /* Next step of traversal is to return to parent */ +}; + +/* * Each lexeme belongs to one of four categories, which determine * its place in the parse tree. We use the two high bits of the * (unsigned char) value to store a NODE_TYPE code. @@ -394,9 +409,6 @@ static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, static void ConvertTreeToTokens(const char *start, int numBytes, OpNode *nodes, Tcl_Token *tokenPtr, Tcl_Parse *parsePtr); -static int CopyTokens(Tcl_Token *sourcePtr, Tcl_Parse *parsePtr); -static int GenerateTokensForLiteral(const char *script, - int numBytes, Tcl_Parse *parsePtr); static int ParseExpr(Tcl_Interp *interp, const char *start, int numBytes, OpNode **opTreePtr, Tcl_Obj *litList, Tcl_Obj *funcList, @@ -516,6 +528,7 @@ ParseExpr( nodes->precedence = prec[START]; nodes->left = OT_NONE; nodes->right = OT_NONE; + nodes->mark = MARK_RIGHT; incomplete = lastParsed = nodesUsed; nodesUsed++; @@ -715,19 +728,19 @@ ParseExpr( switch (lexeme) { case QUOTED: - code = Tcl_ParseQuotedString(interp, start, numBytes, + code = Tcl_ParseQuotedString(NULL, start, numBytes, parsePtr, 1, &end); scanned = end - start; break; case BRACED: - code = Tcl_ParseBraces(interp, start, numBytes, + code = Tcl_ParseBraces(NULL, start, numBytes, parsePtr, 1, &end); scanned = end - start; break; case VARIABLE: - code = Tcl_ParseVarName(interp, start, numBytes, parsePtr, 1); + code = Tcl_ParseVarName(NULL, start, numBytes, parsePtr, 1); /* * Handle the quirk that Tcl_ParseVarName reports a successful @@ -866,6 +879,7 @@ ParseExpr( nodePtr->left = OT_NONE; /* No left operand */ nodePtr->right = OT_NONE; /* Right operand not * yet known. */ + nodePtr->mark = MARK_RIGHT; /* * This unary operator is a new incomplete tree, so push it @@ -1117,6 +1131,7 @@ ParseExpr( nodePtr->lexeme = lexeme; nodePtr->precedence = precedence; nodePtr->right = OT_NONE; + nodePtr->mark = MARK_LEFT; nodePtr->left = complete; if (IsOperator(complete)) { nodes[complete].p.parent = nodesUsed; @@ -1213,103 +1228,16 @@ ParseExpr( /* *---------------------------------------------------------------------- * - * GenerateTokensForLiteral -- - * - * Results: - * Number of bytes scanned. - * - * Side effects: - * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the - * literal. - * - *---------------------------------------------------------------------- - */ - -static int -GenerateTokensForLiteral( - const char *script, - int numBytes, - Tcl_Parse *parsePtr) -{ - int scanned; - const char *start = script; - Tcl_Token *destPtr; - unsigned char lexeme; - - /* Have to reparse to get pointers into source string. */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - scanned = ParseLexeme(start, numBytes-scanned, &lexeme, NULL); - - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 1; - destPtr++; - destPtr->type = TCL_TOKEN_TEXT; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - parsePtr->numTokens += 2; - - return (start + scanned - script); -} - -/* - *---------------------------------------------------------------------- - * - * CopyTokens -- - * - * Results: - * Number of bytes scanned. - * - * Side effects: - * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the - * literal. - * - *---------------------------------------------------------------------- - */ - -static int -CopyTokens( - Tcl_Token *sourcePtr, - Tcl_Parse *parsePtr) -{ - int toCopy = sourcePtr->numComponents + 1; - Tcl_Token *destPtr; - - if (sourcePtr->numComponents == sourcePtr[1].numComponents + 1) { - while (parsePtr->numTokens + toCopy - 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - memcpy(destPtr, sourcePtr, (size_t) toCopy * sizeof(Tcl_Token)); - destPtr->type = TCL_TOKEN_SUB_EXPR; - parsePtr->numTokens += toCopy; - } else { - while (parsePtr->numTokens + toCopy >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - *destPtr = *sourcePtr; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->numComponents++; - destPtr++; - memcpy(destPtr, sourcePtr, (size_t) toCopy * sizeof(Tcl_Token)); - parsePtr->numTokens += toCopy + 1; - } - return toCopy; -} - -/* - *---------------------------------------------------------------------- - * * ConvertTreeToTokens -- * + * Given a string, the numBytes bytes starting at start, and an OpNode + * tree and Tcl_Token array created by passing that same string to + * ParseExpr(), this function writes into *parsePtr the sequence of + * Tcl_Tokens needed so to satisfy the historical interface provided + * by Tcl_ParseExpr(). Note that this routine exists only for the sake + * of the public Tcl_ParseExpr() routine. It is not used by Tcl itself + * at all. + * * Results: * None. * @@ -1328,187 +1256,282 @@ ConvertTreeToTokens( Tcl_Token *tokenPtr, Tcl_Parse *parsePtr) { + int subExprTokenIdx = 0; OpNode *nodePtr = nodes; - int scanned, copied, tokenIdx; - unsigned char lexeme; - Tcl_Token *destPtr; + int next = nodePtr->right; while (1) { - switch (NODE_TYPE & nodePtr->lexeme) { - case UNARY: - if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; + Tcl_Token *subExprTokenPtr; + int scanned, parentIdx; + unsigned char lexeme; - nodePtr->right = OT_NONE; - if (nodePtr->lexeme != START) { - /* - * Find operator in string. - */ + /* + * Advance the mark so the next exit from this node won't retrace + * steps over ground already covered. + */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - if (lexeme != nodePtr->lexeme) { - if (lexeme != (nodePtr->lexeme & ~NODE_TYPE)) { - Tcl_Panic("lexeme mismatch"); - } - } - if (nodePtr->lexeme != OPEN_PAREN) { - if (parsePtr->numTokens + 1 - >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - nodePtr->right = OT_NONE - parsePtr->numTokens; - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start; - destPtr++; - destPtr->type = TCL_TOKEN_OPERATOR; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - parsePtr->numTokens += 2; - } - start += scanned; - numBytes -= scanned; - } - switch (right) { - case OT_EMPTY: - break; - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + right; + nodePtr->mark++; + + /* Handle next child node or leaf */ + switch (next) { + case OT_EMPTY: + /* No tokens and no characters for the OT_EMPTY leaf. */ + break; + + case OT_LITERAL: + /* Skip any white space that comes before the literal */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* Reparse the literal to get pointers into source string */ + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->start = start; + subExprTokenPtr->size = scanned; + subExprTokenPtr->numComponents = 1; + subExprTokenPtr[1].type = TCL_TOKEN_TEXT; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + subExprTokenPtr[1].numComponents = 0; + + parsePtr->numTokens += 2; + start +=scanned; + numBytes -= scanned; + break; + + case OT_TOKENS: { + /* + * tokenPtr points to a token sequence that came from parsing + * a Tcl word. A Tcl word is made up of a sequence of one or + * more elements. When the word is only a single element, it's + * been the historical practice to replace the TCL_TOKEN_WORD + * token directly with a TCL_TOKEN_SUB_EXPR token. However, + * when the word has multiple elements, a TCL_TOKEN_WORD token + * is kept as a grouping device so that TCL_TOKEN_SUB_EXPR + * always has only one element. Wise or not, these are the + * rules the Tcl expr parser has followed, and for the sake + * of those few callers of Tcl_ParseExpr() we do not change + * them now. Internally, we can do better. + */ + + int toCopy = tokenPtr->numComponents + 1; + + if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { + /* + * Single element word. Copy tokens and convert the leading + * token to TCL_TOKEN_SUB_EXPR. + */ + while (parsePtr->numTokens + toCopy - 1 + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); } + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + parsePtr->numTokens += toCopy; } else { - if (nodePtr->lexeme == START) { - /* - * We're done. - */ - - return; + /* + * Multiple element word. Create a TCL_TOKEN_SUB_EXPR + * token to lead, with fields initialized from the leading + * token, then copy entire set of word tokens. + */ + while (parsePtr->numTokens + toCopy + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); } - if (nodePtr->lexeme == OPEN_PAREN) { - /* - * Skip past matching close paren. - */ + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *subExprTokenPtr = *tokenPtr; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->numComponents++; + subExprTokenPtr++; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + parsePtr->numTokens += toCopy + 1; + } - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - start +=scanned; - numBytes -= scanned; - } else { - tokenIdx = OT_NONE - nodePtr->right; - nodePtr->right = OT_NONE; - destPtr = parsePtr->tokenPtr + tokenIdx; - destPtr->size = start - destPtr->start; - destPtr->numComponents = parsePtr->numTokens - tokenIdx - 1; + scanned = tokenPtr->start + tokenPtr->size - start; + start +=scanned; + numBytes -= scanned; + tokenPtr += toCopy; + break; + } + + default: + /* Advance to the child node, which is an operator. */ + nodePtr = nodes + next; + + /* Skip any white space that comes before the subexpression */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* Generate tokens for the operator / subexpression... */ + switch (nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + /* + * Historical practice has been to have no Tcl_Tokens for + * these operators. + */ + break; + + default: { + /* + * Remember the index of the last subexpression we were + * working on -- that of our parent. We'll stack it later. + */ + + parentIdx = subExprTokenIdx; + + /* + * Verify space for the two leading Tcl_Tokens representing + * the subexpression rooted by this operator. The first + * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second + * of type TCL_TOKEN_OPERATOR. + */ + + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); } - nodePtr = nodes + nodePtr->p.parent; + subExprTokenIdx = parsePtr->numTokens; + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + parsePtr->numTokens += 2; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr[1].type = TCL_TOKEN_OPERATOR; + + /* + * Our current position scanning the string is the starting + * point for this subexpression. + */ + + subExprTokenPtr->start = start; + + /* + * Eventually, we know that the numComponents field of the + * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means + * we can make other use of this field for now to track the + * stack of subexpressions we have pending. + */ + + subExprTokenPtr[1].numComponents = parentIdx; + break; + } } break; - case BINARY: - if (nodePtr->left > OT_NONE) { - int left = nodePtr->left; + } - nodePtr->left = OT_NONE; - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - nodePtr->left = OT_NONE - parsePtr->numTokens; - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start; - destPtr++; - destPtr->type = TCL_TOKEN_OPERATOR; - parsePtr->numTokens += 2; - } - switch (left) { - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + left; - } - } else if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; + /* Determine which way to exit the node on this pass. */ + router: + switch (nodePtr->mark) { + case MARK_LEFT: + next = nodePtr->left; + break; - nodePtr->right = OT_NONE; + case MARK_RIGHT: + next = nodePtr->right; + + /* Skip any white space that comes before the operator */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* + * Here we scan from the string the operator corresponding to + * nodePtr->lexeme. + */ + + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + switch(nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + /* No tokens for these lexemes -> nothing to do. */ + break; + + default: + /* + * Record in the TCL_TOKEN_OPERATOR token the pointers into + * the string marking where the operator is. + */ + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + break; + } + + start +=scanned; + numBytes -= scanned; + break; + + case MARK_PARENT: + switch (nodePtr->lexeme) { + case START: + /* When we get back to the START node, we're done. */ + return; + + case COMMA: + case COLON: + /* No tokens for these lexemes -> nothing to do. */ + break; + + case OPEN_PAREN: + /* Skip past matching close paren. */ scanned = TclParseAllWhiteSpace(start, numBytes); start +=scanned; numBytes -= scanned; scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - if (lexeme != nodePtr->lexeme) { - if (lexeme != (nodePtr->lexeme & ~NODE_TYPE)) { - Tcl_Panic("lexeme mismatch"); - } - } - - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - tokenIdx = OT_NONE - nodePtr->left; - destPtr = parsePtr->tokenPtr + tokenIdx + 1; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - } start +=scanned; numBytes -= scanned; - switch (right) { - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + right; - } - } else { - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - tokenIdx = OT_NONE - nodePtr->left; - nodePtr->left = OT_NONE; - destPtr = parsePtr->tokenPtr + tokenIdx; - destPtr->size = start - destPtr->start; - destPtr->numComponents = parsePtr->numTokens-tokenIdx-1; - } - nodePtr = nodes + nodePtr->p.parent; + break; + + default: { + + /* + * Before we leave this node/operator/subexpression for the + * last time, finish up its tokens.... + * + * Our current position scanning the string is where the + * substring for the subexpression ends. + */ + + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr->size = start - subExprTokenPtr->start; + + /* + * All the Tcl_Tokens allocated and filled belong to + * this subexpresion. The first token is the leading + * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) + * are its components. + */ + + subExprTokenPtr->numComponents = + (parsePtr->numTokens - subExprTokenIdx) - 1; + + /* + * Finally, as we return up the tree to our parent, pop the + * parent subexpression off our subexpression stack, and + * fill in the zero numComponents for the operator Tcl_Token. + */ + + parentIdx = subExprTokenPtr[1].numComponents; + subExprTokenPtr[1].numComponents = 0; + subExprTokenIdx = parentIdx; + break; } - break; + } + + /* Since we're returning to parent, skip child handling code. */ + nodePtr = nodes + nodePtr->p.parent; + goto router; } } } diff --git a/tests/parseExpr.test b/tests/parseExpr.test index f89727d..3a79fcb 100644 --- a/tests/parseExpr.test +++ b/tests/parseExpr.test @@ -1,5 +1,5 @@ # This file contains a collection of tests for the procedures in the -# file tclParseExpr.c. Sourcing this file into Tcl runs the tests and +# file tclCompExpr.c. Sourcing this file into Tcl runs the tests and # generates output for errors. No output means no errors were found. # # Copyright (c) 1997 Sun Microsystems, Inc. @@ -8,14 +8,14 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: parseExpr.test,v 1.26 2007/07/16 19:50:46 dgp Exp $ +# RCS: @(#) $Id: parseExpr.test,v 1.27 2007/08/06 20:21:00 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 namespace import -force ::tcltest::* } -# Note that the Tcl expression parser (tclParseExpr.c) does not check +# Note that the Tcl expression parser (tclCompExpr.c) does not check # the semantic validity of the expressions it parses. It does not check, # for example, that a math function actually exists, or that the operands # of "<<" are integers. |