diff options
author | dgp <dgp@users.sourceforge.net> | 2006-07-05 05:34:42 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2006-07-05 05:34:42 (GMT) |
commit | f8202fbf0e8d9c875afd03460d20b5b83c0aa10c (patch) | |
tree | dd2a47b4c7d1a184335dede04e8b90d2073b63cf /generic | |
parent | 78a8ff907a6cc9f17b52a310bef7c37890273c3c (diff) | |
download | tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.zip tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.gz tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.bz2 |
* generic/tclParseExpr.c: Completely new expression parser
that builds a parse tree instead of operating with deep recursion.
This corrects reports of stack-blowing crashes parsing long
expressions [Bug 906201] and replaces a fundamentally O(N^2)
algorithm with an O(N) one [RFE 903765]. The new parser is better
able to generate error messages that clearly report both the nature
and context of the syntax error [Bugs 1029267, 1381715]. For now,
the code for the old parser is still present and can be activated
with a "#define OLD_EXPR_PARSER 1". This is for the sake of a clean
implementation patch, and for ease of benchmarking. The new parser
is non-recursive, so much lighter in stack consumption, but it
does use more heap, so there may be cases where parsing of long
expressions that succeeded with the old parser will lead to out
of memory panics with the new one. There are still more improvements
possible on that point, though significant progress may require
changes to the Tcl_Token specifications documented for the public
Tcl_Parse*() routines.
***POTENTIAL INCOMPATIBILITY*** for any callers that rely on the
exact (usually terrible) error messages generated by the old parser.
This includes a large number of tests in the test suite.
* generic/tclInt.h: Replaced TclParseWhiteSpace() with
* generic/tclParse.c: TclParseAllWhiteSpace() which is what
* generic/tclParseExpr.c: all the callers really needed.
Breaking whitespace runs at newlines is useful only to the command
parsing function, and it can call the file scoped routine
ParseWhiteSpace() to do that.
* tests/expr-old.test: Removed knownBug constraints that masked
* tests/expr.test: failures due to revised error messages.
* tests/parseExpr.test:
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclInt.h | 5 | ||||
-rw-r--r-- | generic/tclParse.c | 64 | ||||
-rw-r--r-- | generic/tclParseExpr.c | 1150 |
3 files changed, 1192 insertions, 27 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h index 0ef3b96..35addc8 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.271 2006/06/16 18:35:55 dgp Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.272 2006/07/05 05:34:44 dgp Exp $ */ #ifndef _TCLINT @@ -2137,8 +2137,7 @@ MODULE_SCOPE void TclParseInit(Tcl_Interp *interp, CONST char *string, #if 0 MODULE_SCOPE int TclParseInteger(CONST char *string, int numBytes); #endif -MODULE_SCOPE int TclParseWhiteSpace(CONST char *src, - int numBytes, Tcl_Parse *parsePtr, char *typePtr); +MODULE_SCOPE int TclParseAllWhiteSpace(CONST char *src, int numBytes); MODULE_SCOPE int TclProcessReturn(Tcl_Interp *interp, int code, int level, Tcl_Obj *returnOpts); MODULE_SCOPE int TclpObjLstat(Tcl_Obj *pathPtr, Tcl_StatBuf *buf); diff --git a/generic/tclParse.c b/generic/tclParse.c index 5da1abb..9800537 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParse.c,v 1.45 2005/11/02 14:51:04 dkf Exp $ + * RCS: @(#) $Id: tclParse.c,v 1.46 2006/07/05 05:34:45 dgp Exp $ */ #include "tclInt.h" @@ -176,6 +176,9 @@ static int ParseComment(CONST char *src, int numBytes, Tcl_Parse *parsePtr); static int ParseTokens(CONST char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); +static int ParseWhiteSpace(CONST char *src, int numBytes, + Tcl_Parse *parsePtr, char *typePtr); + /* *---------------------------------------------------------------------- @@ -325,7 +328,7 @@ Tcl_ParseCommand( * sequence: it should be treated just like white space. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); src += scanned; numBytes -= scanned; if (numBytes == 0) { @@ -390,8 +393,7 @@ Tcl_ParseCommand( ) /* Is the prefix */ && (numBytes > 0) - && (TclParseWhiteSpace(termPtr, numBytes, parsePtr, - &type) == 0) + && (ParseWhiteSpace(termPtr, numBytes, parsePtr, &type) == 0) && (type != TYPE_COMMAND_END) /* Non-whitespace follows */ ) { @@ -435,7 +437,7 @@ Tcl_ParseCommand( * word), and (b) check for the end of the command. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); if (scanned) { src += scanned; numBytes -= scanned; @@ -480,10 +482,10 @@ Tcl_ParseCommand( /* *---------------------------------------------------------------------- * - * TclParseWhiteSpace -- + * ParseWhiteSpace -- * - * Scans up to numBytes bytes starting at src, consuming white space as - * defined by Tcl's parsing rules. + * Scans up to numBytes bytes starting at src, consuming white space + * between words as defined by Tcl's parsing rules. * * Results: * Returns the number of bytes recognized as white space. Records at @@ -497,8 +499,8 @@ Tcl_ParseCommand( *---------------------------------------------------------------------- */ -int -TclParseWhiteSpace( +static int +ParseWhiteSpace( CONST char *src, /* First character to parse. */ register int numBytes, /* Max number of bytes to scan. */ Tcl_Parse *parsePtr, /* Information about parse in progress. @@ -541,6 +543,38 @@ TclParseWhiteSpace( /* *---------------------------------------------------------------------- * + * TclParseAllWhiteSpace -- + * + * Scans up to numBytes bytes starting at src, consuming all white space + * including the command-terminating newline characters. + * + * Results: + * Returns the number of bytes recognized as white space. + * + *---------------------------------------------------------------------- + */ + +int +TclParseAllWhiteSpace( + CONST char *src, /* First character to parse. */ + int numBytes) /* Max number of byes to scan */ +{ + Tcl_Parse dummy; /* Since we know ParseWhiteSpace() generates + * no tokens, there's no need for a call to + * Tcl_FreeParse() in this routine */ + char type; + CONST char *p = src; + do { + int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); + p += scanned; + numBytes -= scanned; + } while (numBytes && (*p == '\n') && (p++, --numBytes)); + return (p-src); +} + +/* + *---------------------------------------------------------------------- + * * TclParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing @@ -801,11 +835,9 @@ ParseComment( char type; int scanned; - do { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); - p += scanned; - numBytes -= scanned; - } while (numBytes && (*p == '\n') && (p++,numBytes--)); + scanned = TclParseAllWhiteSpace(p, numBytes); + p += scanned; + numBytes -= scanned; if ((numBytes == 0) || (*p != '#')) { break; @@ -816,7 +848,7 @@ ParseComment( while (numBytes) { if (*p == '\\') { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(p, numBytes, parsePtr, &type); if (scanned) { p += scanned; numBytes -= scanned; diff --git a/generic/tclParseExpr.c b/generic/tclParseExpr.c index 688447b..98683cc 100644 --- a/generic/tclParseExpr.c +++ b/generic/tclParseExpr.c @@ -12,9 +12,12 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParseExpr.c,v 1.32 2005/12/19 19:03:17 dgp Exp $ + * RCS: @(#) $Id: tclParseExpr.c,v 1.33 2006/07/05 05:34:45 dgp Exp $ */ +#define OLD_EXPR_PARSER 0 +#if OLD_EXPR_PARSER + #include "tclInt.h" /* @@ -1589,13 +1592,9 @@ GetLexeme( src = infoPtr->next; numBytes = parsePtr->end - src; - do { - char type; - int scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); - - src += scanned; - numBytes -= scanned; - } while (numBytes && (*src == '\n') && (src++,numBytes--)); + length = TclParseAllWhiteSpace(src, numBytes); + src += length; + numBytes -= length; parsePtr->term = src; if (numBytes == 0) { @@ -1970,6 +1969,1141 @@ LogSyntaxError( infoPtr->parsePtr->errorType = TCL_PARSE_SYNTAX; infoPtr->parsePtr->term = infoPtr->start; } +#else + + +#include "tclInt.h" + +/* + * The ExprNode structure represents one node of the parse tree produced + * as an interim structure by the expression parser. + */ + +typedef struct ExprNode { + unsigned char lexeme; /* Code that identifies the type of this node */ + unsigned char precedence; /* Precedence for operator nodes */ + int left; /* Index of the left operand of this operator node */ + int right; /* Index of the right operand of this operator node */ + int parent; /* Index of the operator of this operand node */ + int token; /* Index of the Tcl_Tokens of this leaf node */ +} ExprNode; + +/* + * Set of lexeme codes stored in ExprNode structs to label and categorize + * the lexemes found. + */ + +#define LEAF (1<<7) +#define UNARY (1<<6) +#define BINARY (1<<5) + +#define NODE_TYPE ( LEAF | UNARY | BINARY) + +#define PLUS 1 +#define MINUS 2 +#define BAREWORD 3 +#define INCOMPLETE 4 +#define INVALID 5 + +#define NUMBER ( LEAF | 1) +#define SCRIPT ( LEAF | 2) +#define BOOLEAN ( LEAF | BAREWORD) +#define BRACED ( LEAF | 4) +#define VARIABLE ( LEAF | 5) +#define QUOTED ( LEAF | 6) +#define EMPTY ( LEAF | 7) + +#define UNARY_PLUS ( UNARY | PLUS) +#define UNARY_MINUS ( UNARY | MINUS) +#define FUNCTION ( UNARY | BAREWORD) +#define START ( UNARY | 4) +#define OPEN_PAREN ( UNARY | 5) +#define NOT ( UNARY | 6) +#define BIT_NOT ( UNARY | 7) + +#define BINARY_PLUS ( BINARY | PLUS) +#define BINARY_MINUS ( BINARY | MINUS) +#define COMMA ( BINARY | 3) +#define MULT ( BINARY | 4) +#define DIVIDE ( BINARY | 5) +#define MOD ( BINARY | 6) +#define LESS ( BINARY | 7) +#define GREATER ( BINARY | 8) +#define BIT_AND ( BINARY | 9) +#define BIT_XOR ( BINARY | 10) +#define BIT_OR ( BINARY | 11) +#define QUESTION ( BINARY | 12) +#define COLON ( BINARY | 13) +#define LEFT_SHIFT ( BINARY | 14) +#define RIGHT_SHIFT ( BINARY | 15) +#define LEQ ( BINARY | 16) +#define GEQ ( BINARY | 17) +#define EQUAL ( BINARY | 18) +#define NEQ ( BINARY | 19) +#define AND ( BINARY | 20) +#define OR ( BINARY | 21) +#define STREQ ( BINARY | 22) +#define STRNEQ ( BINARY | 23) +#define EXPON ( BINARY | 24) +#define IN_LIST ( BINARY | 25) +#define NOT_IN_LIST ( BINARY | 26) +#define CLOSE_PAREN ( BINARY | 27) +#define END ( BINARY | 28) + +/* + * Declarations for local functions to this file: + */ + +static void GenerateTokens(ExprNode *nodes, Tcl_Parse *scratchPtr, + Tcl_Parse *parsePtr); +static int ParseLexeme(CONST char *start, int numBytes, + unsigned char *lexemePtr); +static unsigned char PrecedenceOf(unsigned char operand); + + +/* + *---------------------------------------------------------------------- + * + * Tcl_ParseExpr -- + * + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and stores information about the + * structure of the expression in the Tcl_Parse struct indicated by the + * caller. + * + * Results: + * If the string is successfully parsed as a valid Tcl expression, + * TCL_OK is returned, and data about the expression structure is + * written to *parsePtr. If the string cannot be parsed as a valid + * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL, + * an error message is written to interp. + * + * Side effects: + * If there is insufficient space in parsePtr to hold all the information + * about the expression, then additional space is malloc-ed. If the + * function returns TCL_OK then the caller must eventually invoke + * Tcl_FreeParse to release any additional space that was allocated. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + CONST char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. If < 0, the + * string consists of all bytes up to the + * first null character. */ + Tcl_Parse *parsePtr) /* Structure to fill with information about + * the parsed expression; any previous + * information in the structure is ignored. */ +{ +#define NUM_STATIC_NODES 64 + ExprNode staticNodes[NUM_STATIC_NODES]; + ExprNode *lastOrphanPtr, *nodes = staticNodes; + int nodesAvailable = NUM_STATIC_NODES; + int nodesUsed = 0; + Tcl_Parse scratch; /* Parsing scratch space */ + Tcl_Obj *msg = NULL; + unsigned char precedence; + CONST char *space, *operand, *end; + int scanned, size, limit = 25, code = TCL_OK; + + if (numBytes < 0) { + numBytes = (start ? strlen(start) : 0); + } + + TclParseInit(interp, start, numBytes, &scratch); + TclParseInit(interp, start, numBytes, parsePtr); + + /* Initialize the parse tree with the special "START" node */ + + nodes->lexeme = START; + nodes->precedence = PrecedenceOf(START); + nodes->left = -1; + nodes->right = -1; + nodes->parent = -1; + nodes->token = -1; + lastOrphanPtr = nodes; + nodesUsed++; + + while ((code == TCL_OK) && (lastOrphanPtr->lexeme != END)) { + ExprNode *nodePtr, *lastNodePtr; + Tcl_Token *tokenPtr; + + /* + * Each pass through this loop adds one more ExprNode. + * Allocate space for one if required. + */ + if (nodesUsed >= nodesAvailable) { + int lastOrphanIdx = lastOrphanPtr - nodes; + ExprNode *newPtr; + + size = nodesUsed * 2; + if (nodes == staticNodes) { + nodes = NULL; + } + do { + newPtr = (ExprNode *) attemptckrealloc( (char *) nodes, + (unsigned int) (size * sizeof(ExprNode)) ); + } while ((newPtr == NULL) + && ((size -= (size - nodesUsed) / 2) > nodesUsed)); + if (newPtr == NULL) { + msg = Tcl_NewStringObj( + "not enough memory to parse expression", -1); + code = TCL_ERROR; + continue; + } + nodesAvailable = size; + if (nodes == NULL) { + memcpy((VOID *) newPtr, (VOID *) staticNodes, + (size_t) (nodesUsed * sizeof(ExprNode))); + } + nodes = newPtr; + lastOrphanPtr = nodes + lastOrphanIdx; + } + nodePtr = nodes + nodesUsed; + lastNodePtr = nodePtr - 1; + + /* Skip white space between lexemes */ + + space = start; /* Remember where last lexeme ended */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + scanned = ParseLexeme(start, numBytes, &(nodePtr->lexeme)); + + /* Use context to categorize the lexemes that are ambiguous */ + + if ((NODE_TYPE & nodePtr->lexeme) == 0) { + switch (nodePtr->lexeme) { + case INVALID: + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "invalid character \"%.*s\" in expression", + scanned, start); + code = TCL_ERROR; + continue; + case INCOMPLETE: + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "incomplete operator \"%.*s\" in expression", + scanned, start); + code = TCL_ERROR; + continue; + case BAREWORD: + if (start[scanned+TclParseAllWhiteSpace( + start+scanned, numBytes-scanned)] == '(') { + nodePtr->lexeme = FUNCTION; + } else { + Tcl_Obj *objPtr = Tcl_NewStringObj(start, scanned); + Tcl_IncrRefCount(objPtr); + code = Tcl_ConvertToType(NULL, objPtr, &tclBooleanType); + Tcl_DecrRefCount(objPtr); + if (code == TCL_OK) { + nodePtr->lexeme = BOOLEAN; + } else { + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "invalid bareword \"%.*s\" in expression", + scanned, start); + Tcl_AppendToObj(msg, + "\n (prepend $ for variable; ", -1); + Tcl_AppendToObj(msg, + "append argument list for function call)", -1); + continue; + } + } + break; + case PLUS: + case MINUS: + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + nodePtr->lexeme |= BINARY; + } else { + nodePtr->lexeme |= UNARY; + } + } + } + + /* Add node to parse tree based on category */ + + switch (NODE_TYPE & nodePtr->lexeme) { + case LEAF: + + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + msg = Tcl_NewStringObj("missing operator ", -1); + while (lastNodePtr->parent >= 0) { + lastNodePtr = nodes + lastNodePtr->parent; + } + while (lastNodePtr->left >= 0) { + lastNodePtr = nodes + lastNodePtr->left; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + switch (nodePtr->lexeme) { + case NUMBER: + case BOOLEAN: + TclObjPrintf(NULL, msg, + "between operands \"%s%.*s\" and \"%.*s%s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit, + (scanned < limit) ? scanned : limit - 3, + (scanned < limit) ? start : start+scanned+3-limit, + (scanned < limit) ? "" : "..."); + if ((operand[0] == '0') + && TclCheckBadOctal(NULL, operand)) { + Tcl_AppendToObj(msg, + "\n (looks like invalid octal number)", -1); + } + break; + default: + TclObjPrintf(NULL, msg, "following operand \"%s%.*s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit); + } + code = TCL_ERROR; + continue; + } + + if (scratch.numTokens+1 >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_SUB_EXPR; + tokenPtr->start = start; + scratch.numTokens++; + + switch (nodePtr->lexeme) { + case NUMBER: + case BOOLEAN: + tokenPtr = scratch.tokenPtr + scratch.numTokens; + tokenPtr->type = TCL_TOKEN_TEXT; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + break; + + case QUOTED: + code = Tcl_ParseQuotedString(interp, start, numBytes, + &scratch, 1, &end); + if (code != TCL_OK) { + continue; + } + scanned = end - start; + break; + + case BRACED: + code = Tcl_ParseBraces(interp, start, numBytes, + &scratch, 1, &end); + if (code != TCL_OK) { + continue; + } + scanned = end - start; + break; + + case VARIABLE: + code = Tcl_ParseVarName(interp, start, numBytes, &scratch, 1); + if (code != TCL_OK) { + continue; + } + tokenPtr = scratch.tokenPtr + nodePtr->token + 1; + if (tokenPtr->type != TCL_TOKEN_VARIABLE) { + msg = Tcl_NewStringObj( + "invalid character \"$\" in expression", -1); + code = TCL_ERROR; + continue; + } + scanned = tokenPtr->size; + break; + + case SCRIPT: + tokenPtr = scratch.tokenPtr + scratch.numTokens; + tokenPtr->type = TCL_TOKEN_COMMAND; + tokenPtr->start = start; + tokenPtr->numComponents = 0; + + end = start + numBytes; + start++; + while (1) { + Tcl_Parse nested; + code = Tcl_ParseCommand(interp, + start, (end - start), 1, &nested); + if (code != TCL_OK) { + parsePtr->term = nested.term; + parsePtr->errorType = nested.errorType; + parsePtr->incomplete = nested.incomplete; + break; + } + start = (nested.commandStart + nested.commandSize); + Tcl_FreeParse(&nested); + if ((nested.term < end) && (*nested.term == ']') + && !nested.incomplete) { + break; + } + + if (start == end) { + msg = Tcl_NewStringObj("missing close-bracket", -1); + parsePtr->term = tokenPtr->start; + parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; + parsePtr->incomplete = 1; + code = TCL_ERROR; + break; + } + } + if (code != TCL_OK) { + continue; + } + end = start; + start = tokenPtr->start; + scanned = end - start; + tokenPtr->size = scanned; + scratch.numTokens++; + break; + } + + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->size = scanned; + tokenPtr->numComponents = scratch.numTokens - nodePtr->token - 1; + + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->precedence = 0; + nodePtr->parent = -1; + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + case UNARY: + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + msg = Tcl_NewObj(); + while (lastNodePtr->parent >= 0) { + lastNodePtr = nodes + lastNodePtr->parent; + } + while (lastNodePtr->left >= 0) { + lastNodePtr = nodes + lastNodePtr->left; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + TclObjPrintf(NULL, msg, + "missing operator following operand \"%s%.*s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit); + code = TCL_ERROR; + continue; + } + nodePtr->precedence = PrecedenceOf(nodePtr->lexeme); + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->parent = -1; + + if (scratch.numTokens >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_OPERATOR; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + case BINARY: { + ExprNode *otherPtr; + + if ((nodePtr->lexeme == CLOSE_PAREN) + && (lastNodePtr->lexeme == OPEN_PAREN)) { + if (lastNodePtr[-1].lexeme == FUNCTION) { + /* Normally, "()" is a syntax error, but as a special + * case accept it as an argument list for a function */ + scanned = 0; + nodePtr->lexeme = EMPTY; + nodePtr->precedence = 0; + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->parent = -1; + nodePtr->token = -1; + + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + } + msg = Tcl_NewStringObj("empty subexpression", -1); + code = TCL_ERROR; + continue; + } + + precedence = PrecedenceOf(nodePtr->lexeme); + + if ((NODE_TYPE & lastNodePtr->lexeme) != LEAF) { + msg = Tcl_NewObj(); + if (lastNodePtr->precedence > precedence) { + if (lastNodePtr->lexeme == OPEN_PAREN) { + lastOrphanPtr = lastNodePtr; + msg = Tcl_NewStringObj("unbalanced open paren", -1); + code = TCL_ERROR; + continue; + } + if (lastNodePtr->lexeme == COMMA) { + msg = Tcl_NewStringObj("missing function argument", -1); + code = TCL_ERROR; + continue; + } + if (lastNodePtr->lexeme == START) { + msg = Tcl_NewStringObj("empty expression", -1); + code = TCL_ERROR; + continue; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + TclObjPrintf(NULL, msg, + "missing right operand following operator \"%.*s\"", + size, operand); + } else { + if (nodePtr->lexeme == CLOSE_PAREN) { + msg = Tcl_NewStringObj("unbalanced close paren", -1); + } else { + TclObjPrintf(NULL, msg, + "missing left operand before operator \"%.*s\"", + scanned, start); + } + } + code = TCL_ERROR; + continue; + } + + while (1) { + otherPtr = lastOrphanPtr; + while (otherPtr->left >= 0) { + otherPtr = nodes + otherPtr->left; + } + otherPtr--; + + if (otherPtr->precedence < precedence) { + break; + } + + /* Special association rules for the ternary operators */ + if (otherPtr->precedence == precedence) { + if ((otherPtr->lexeme == QUESTION) + && (lastOrphanPtr->lexeme != COLON)) { + break; + } + if ((otherPtr->lexeme == COLON) + && (nodePtr->lexeme == QUESTION)) { + break; + } + } + + /* Some checks before linking */ + if ((otherPtr->lexeme == OPEN_PAREN) + && (nodePtr->lexeme != CLOSE_PAREN)) { + lastOrphanPtr = otherPtr; + msg = Tcl_NewStringObj("unbalanced open paren", -1); + code = TCL_ERROR; + break; + } + if ((otherPtr->lexeme == QUESTION) + && (lastOrphanPtr->lexeme != COLON)) { + msg = Tcl_NewStringObj( + "missing operator \":\" in ternary conditional", + -1); + code = TCL_ERROR; + break; + } + if ((lastOrphanPtr->lexeme == COLON) + && (otherPtr->lexeme != QUESTION)) { + msg = Tcl_NewStringObj( + "unexpected operator \":\" without preceding \"?\"", + -1); + code = TCL_ERROR; + break; + } + if ((lastOrphanPtr->lexeme == COMMA) + && ((otherPtr->lexeme != OPEN_PAREN) + || (otherPtr[-1].lexeme != FUNCTION)) ) { + msg = Tcl_NewStringObj( + "unexpected \",\" outside function argument list", + -1); + code = TCL_ERROR; + break; + } + + /* Link orphan as right operand of otherPtr */ + otherPtr->right = lastOrphanPtr - nodes; + lastOrphanPtr->parent = otherPtr - nodes; + lastOrphanPtr = otherPtr; + + if (otherPtr->lexeme == OPEN_PAREN) { + /* CLOSE_PAREN can only close one OPEN_PAREN */ + tokenPtr = scratch.tokenPtr + otherPtr->token; + tokenPtr->size = start + scanned - tokenPtr->start; + break; + } + if (otherPtr->lexeme == START) { + /* Don't backtrack beyond the start */ + break; + } + } + if (code != TCL_OK) { + continue; + } + + if (nodePtr->lexeme == CLOSE_PAREN) { + if (otherPtr->lexeme == START) { + msg = Tcl_NewStringObj("unbalanced close paren", -1); + code = TCL_ERROR; + continue; + } + /* Create no node for a CLOSE_PAREN lexeme */ + break; + } + + if (lastOrphanPtr->lexeme == COLON) { + msg = Tcl_NewStringObj( + "unexpected operator \":\" without preceding \"?\"", + -1); + code = TCL_ERROR; + continue; + } + + /* Link orphan as left operand of new node */ + nodePtr->precedence = PrecedenceOf(nodePtr->lexeme); + nodePtr->right = -1; + nodePtr->parent = -1; + + if (scratch.numTokens >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_OPERATOR; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + nodePtr->left = lastOrphanPtr - nodes; + lastOrphanPtr->parent = nodePtr - nodes; + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + } + } + + start += scanned; + numBytes -= scanned; + } + + if (code == TCL_OK) { + /* Shift tokens from scratch space to caller space */ + GenerateTokens(nodes, &scratch, parsePtr); + } else { + if (parsePtr->errorType == TCL_PARSE_SUCCESS) { + parsePtr->errorType = TCL_PARSE_SYNTAX; + parsePtr->term = start; + } + if (interp == NULL) { + if (msg) { + Tcl_DecrRefCount(msg); + } + } else { + CONST char *subexpr = NULL; + if (msg == NULL) { + msg = Tcl_GetObjResult(interp); + } + while (lastOrphanPtr != nodes) { + if (lastOrphanPtr->lexeme == OPEN_PAREN) { + subexpr = scratch.tokenPtr[lastOrphanPtr->token].start; + lastOrphanPtr--; + } else { + precedence = lastOrphanPtr->precedence; + while (lastOrphanPtr->left >= 0) { + lastOrphanPtr = nodes + lastOrphanPtr->left; + } + subexpr = scratch.tokenPtr[lastOrphanPtr->token].start; + lastOrphanPtr--; + if (lastOrphanPtr->precedence >= precedence) { + continue; + } + } + size = start + scanned - subexpr; + if ((subexpr > scratch.string) + || ((start + scanned < scratch.end) + && (size < limit))) { + TclObjPrintf(NULL, msg, + "\n (parsing subexpression \"%.*s%s\")", + (size < limit) ? size : limit - 3, subexpr, + (size < limit) ? "" : "..."); + } + } + numBytes = scratch.end - scratch.string; + TclObjPrintf(NULL, msg, "\n (parsing expression \"%.*s%s\")", + (numBytes < limit) ? numBytes : limit - 3, scratch.string, + (numBytes < limit) ? "" : "..."); + Tcl_SetObjResult(interp, msg); + } + } + + if (nodes != staticNodes) { + ckfree((char *)nodes); + } + Tcl_FreeParse(&scratch); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * GenerateTokens -- + * + * Routine that generates Tcl_Tokens that represent a Tcl expression + * and writes them to *parsePtr. The parse tree of the expression + * is in the array of ExprNodes, nodes. Some of the Tcl_Tokens are + * copied from scratch space at *scratchPtr, where the parsing pass + * that constructed the parse tree left them. + * + *---------------------------------------------------------------------- + */ + +static void +GenerateTokens( + ExprNode *nodes, + Tcl_Parse *scratchPtr, + Tcl_Parse *parsePtr) +{ + ExprNode *nodePtr = nodes + nodes->right; + Tcl_Token *sourcePtr, *destPtr, *tokenPtr = scratchPtr->tokenPtr; + int toCopy; + CONST char *end = tokenPtr->start + tokenPtr->size; + + while (nodePtr->lexeme != START) { + switch (NODE_TYPE & nodePtr->lexeme) { + case BINARY: + if (nodePtr->left >= 0) { + if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + nodePtr->token = parsePtr->numTokens; + destPtr->type = TCL_TOKEN_SUB_EXPR; + destPtr->start = tokenPtr->start; + destPtr++; + *destPtr = *sourcePtr; + parsePtr->numTokens += 2; + } + nodePtr = nodes + nodePtr->left; + nodes[nodePtr->parent].left = -1; + } else if (nodePtr->right >= 0) { + tokenPtr += tokenPtr->numComponents + 1; + nodePtr = nodes + nodePtr->right; + nodes[nodePtr->parent].right = -1; + } else { + if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { + destPtr = parsePtr->tokenPtr + nodePtr->token; + destPtr->size = end - destPtr->start; + destPtr->numComponents = + parsePtr->numTokens - nodePtr->token - 1; + } + nodePtr = nodes + nodePtr->parent; + } + break; + + case UNARY: + if (nodePtr->right >= 0) { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + if (nodePtr->lexeme != OPEN_PAREN) { + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + nodePtr->token = parsePtr->numTokens; + destPtr->type = TCL_TOKEN_SUB_EXPR; + destPtr->start = tokenPtr->start; + destPtr++; + *destPtr = *sourcePtr; + parsePtr->numTokens += 2; + } + if (tokenPtr == sourcePtr) { + tokenPtr += tokenPtr->numComponents + 1; + } + nodePtr = nodes + nodePtr->right; + nodes[nodePtr->parent].right = -1; + } else { + if (nodePtr->lexeme != OPEN_PAREN) { + destPtr = parsePtr->tokenPtr + nodePtr->token; + destPtr->size = end - destPtr->start; + destPtr->numComponents = + parsePtr->numTokens - nodePtr->token - 1; + } else { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + } + nodePtr = nodes + nodePtr->parent; + } + break; + + case LEAF: + switch (nodePtr->lexeme) { + case EMPTY: + break; + + case BRACED: + case QUOTED: + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + if (sourcePtr->numComponents > 1) { + toCopy = sourcePtr->numComponents; + if (tokenPtr == sourcePtr) { + tokenPtr += toCopy + 1; + } + sourcePtr->numComponents++; + while (parsePtr->numTokens + toCopy + 1 + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *destPtr++ = *sourcePtr; + *destPtr = *sourcePtr++; + destPtr->type = TCL_TOKEN_WORD; + destPtr->numComponents = toCopy; + destPtr++; + memcpy((VOID *) destPtr, (VOID *) sourcePtr, + (size_t) (toCopy * sizeof(Tcl_Token))); + parsePtr->numTokens += toCopy + 2; + break; + } + + default: + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + toCopy = sourcePtr->numComponents + 1; + if (tokenPtr == sourcePtr) { + tokenPtr += toCopy; + } + while (parsePtr->numTokens + toCopy - 1 + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy((VOID *) destPtr, (VOID *) sourcePtr, + (size_t) (toCopy * sizeof(Tcl_Token))); + parsePtr->numTokens += toCopy; + break; + + } + nodePtr = nodes + nodePtr->parent; + break; + + } + } +} + +/* + *---------------------------------------------------------------------- + * + * PrecedenceOf -- + * + * Utility routine that returns the precedence level of an operator. + * + * Results: + * Returns an unsigned char value. Greater value indicates greater + * operator precedence. + * + *---------------------------------------------------------------------- + */ + +static unsigned char +PrecedenceOf( + unsigned char operator) +{ + switch (operator) { + case NOT: case BIT_NOT: case UNARY_PLUS: case UNARY_MINUS: + case FUNCTION: + return 18; + case EXPON: + return 17; + case MULT: case DIVIDE: case MOD: + return 16; + case BINARY_PLUS: case BINARY_MINUS: + return 15; + case LEFT_SHIFT: case RIGHT_SHIFT: + return 14; + case LESS: case LEQ: case GREATER: case GEQ: + return 13; + case EQUAL: case NEQ: case IN_LIST: case NOT_IN_LIST: + case STREQ: case STRNEQ: + return 12; + case BIT_AND: + return 11; + case BIT_XOR: + return 10; + case BIT_OR: + return 9; + case AND: + return 8; + case OR: + return 7; + case COLON: case QUESTION: + return 6; + case COMMA: + return 5; + case OPEN_PAREN: + return 4; + case CLOSE_PAREN: + return 3; + case START: + return 2; + case END: + return 1; + } + /* NOT REACHED */ + return 0; +} + +/* + *---------------------------------------------------------------------- + * + * ParseLexeme -- + * + * Parse a single lexeme from the start of a string, scanning no + * more than numBytes bytes. + * + * Results: + * Returns the number of bytes scanned to produce the lexeme. + * + * Side effects: + * Code identifying lexeme parsed is writen to *lexemePtr. + * + *---------------------------------------------------------------------- + */ + +static int +ParseLexeme( + CONST char *start, /* Start of lexeme to parse. */ + int numBytes, /* Number of bytes in string. */ + unsigned char *lexemePtr) /* Write code of parsed lexeme to this + * storage. */ +{ + CONST char *end; + int scanned; + Tcl_UniChar ch; + + if (numBytes == 0) { + *lexemePtr = END; + return 0; + } + switch (*start) { + case '[': + *lexemePtr = SCRIPT; + return 1; + + case '{': + *lexemePtr = BRACED; + return 1; + + case '(': + *lexemePtr = OPEN_PAREN; + return 1; + + case ')': + *lexemePtr = CLOSE_PAREN; + return 1; + + case '$': + *lexemePtr = VARIABLE; + return 1; + + case '\"': + *lexemePtr = QUOTED; + return 1; + + case ',': + *lexemePtr = COMMA; + return 1; + + case '/': + *lexemePtr = DIVIDE; + return 1; + + case '%': + *lexemePtr = MOD; + return 1; + + case '+': + *lexemePtr = PLUS; + return 1; + + case '-': + *lexemePtr = MINUS; + return 1; + + case '?': + *lexemePtr = QUESTION; + return 1; + + case ':': + *lexemePtr = COLON; + return 1; + + case '^': + *lexemePtr = BIT_XOR; + return 1; + + case '~': + *lexemePtr = BIT_NOT; + return 1; + + case '*': + if ((numBytes > 1) && (start[1] == '*')) { + *lexemePtr = EXPON; + return 2; + } + *lexemePtr = MULT; + return 1; + + case '=': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = EQUAL; + return 2; + } + *lexemePtr = INCOMPLETE; + return 1; + + case '!': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = NEQ; + return 2; + } + *lexemePtr = NOT; + return 1; + + case '&': + if ((numBytes > 1) && (start[1] == '&')) { + *lexemePtr = AND; + return 2; + } + *lexemePtr = BIT_AND; + return 1; + + case '|': + if ((numBytes > 1) && (start[1] == '|')) { + *lexemePtr = OR; + return 2; + } + *lexemePtr = BIT_OR; + return 1; + + case '<': + if (numBytes > 1) { + switch (start[1]) { + case '<': + *lexemePtr = LEFT_SHIFT; + return 2; + case '=': + *lexemePtr = LEQ; + return 2; + } + } + *lexemePtr = LESS; + return 1; + + case '>': + if (numBytes > 1) { + switch (start[1]) { + case '>': + *lexemePtr = RIGHT_SHIFT; + return 2; + case '=': + *lexemePtr = GEQ; + return 2; + } + } + *lexemePtr = GREATER; + return 1; + + case 'i': + if ((numBytes > 1) && (start[1] == 'n') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + /* + * Must make this check so we can tell the difference between + * the "in" operator and the "int" function name and the + * "infinity" numeric value. + */ + *lexemePtr = IN_LIST; + return 2; + } + break; + + case 'e': + if ((numBytes > 1) && (start[1] == 'q') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + *lexemePtr = STREQ; + return 2; + } + break; + + case 'n': + if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + switch (start[1]) { + case 'e': + *lexemePtr = STRNEQ; + return 2; + case 'i': + *lexemePtr = NOT_IN_LIST; + return 2; + } + } + } + + if (TclParseNumber(NULL, NULL, NULL, start, numBytes, &end, + TCL_PARSE_NO_WHITESPACE) == TCL_OK) { + *lexemePtr = NUMBER; + return (end-start); + } + + if (Tcl_UtfCharComplete(start, numBytes)) { + scanned = Tcl_UtfToUniChar(start, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, start, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + if (!isalpha(UCHAR(ch))) { + *lexemePtr = INVALID; + return scanned; + } + end = start; + while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { + end += scanned; + numBytes -= scanned; + if (Tcl_UtfCharComplete(end, numBytes)) { + scanned = Tcl_UtfToUniChar(end, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, end, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + } + *lexemePtr = BAREWORD; + return (end-start); +} +#endif /* * Local Variables: |