diff options
-rw-r--r-- | ChangeLog | 34 | ||||
-rw-r--r-- | generic/tclInt.h | 5 | ||||
-rw-r--r-- | generic/tclParse.c | 64 | ||||
-rw-r--r-- | generic/tclParseExpr.c | 1150 | ||||
-rw-r--r-- | tests/expr-old.test | 6 | ||||
-rw-r--r-- | tests/expr.test | 4 | ||||
-rw-r--r-- | tests/parseExpr.test | 4 |
7 files changed, 1233 insertions, 34 deletions
@@ -1,3 +1,37 @@ +2006-07-05 Don Porter <dgp@users.sourceforge.net> + + * generic/tclParseExpr.c: Completely new expression parser + that builds a parse tree instead of operating with deep recursion. + This corrects reports of stack-blowing crashes parsing long + expressions [Bug 906201] and replaces a fundamentally O(N^2) + algorithm with an O(N) one [RFE 903765]. The new parser is better + able to generate error messages that clearly report both the nature + and context of the syntax error [Bugs 1029267, 1381715]. For now, + the code for the old parser is still present and can be activated + with a "#define OLD_EXPR_PARSER 1". This is for the sake of a clean + implementation patch, and for ease of benchmarking. The new parser + is non-recursive, so much lighter in stack consumption, but it + does use more heap, so there may be cases where parsing of long + expressions that succeeded with the old parser will lead to out + of memory panics with the new one. There are still more improvements + possible on that point, though significant progress may require + changes to the Tcl_Token specifications documented for the public + Tcl_Parse*() routines. + ***POTENTIAL INCOMPATIBILITY*** for any callers that rely on the + exact (usually terrible) error messages generated by the old parser. + This includes a large number of tests in the test suite. + + * generic/tclInt.h: Replaced TclParseWhiteSpace() with + * generic/tclParse.c: TclParseAllWhiteSpace() which is what + * generic/tclParseExpr.c: all the callers really needed. + Breaking whitespace runs at newlines is useful only to the command + parsing function, and it can call the file scoped routine + ParseWhiteSpace() to do that. + + * tests/expr-old.test: Removed knownBug constraints that masked + * tests/expr.test: failures due to revised error messages. + * tests/parseExpr.test: + 2006-06-20 Don Porter <dgp@users.sourceforge.net> * generic/tclIOUtil.c: Changed default configuration to diff --git a/generic/tclInt.h b/generic/tclInt.h index 0ef3b96..35addc8 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.271 2006/06/16 18:35:55 dgp Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.272 2006/07/05 05:34:44 dgp Exp $ */ #ifndef _TCLINT @@ -2137,8 +2137,7 @@ MODULE_SCOPE void TclParseInit(Tcl_Interp *interp, CONST char *string, #if 0 MODULE_SCOPE int TclParseInteger(CONST char *string, int numBytes); #endif -MODULE_SCOPE int TclParseWhiteSpace(CONST char *src, - int numBytes, Tcl_Parse *parsePtr, char *typePtr); +MODULE_SCOPE int TclParseAllWhiteSpace(CONST char *src, int numBytes); MODULE_SCOPE int TclProcessReturn(Tcl_Interp *interp, int code, int level, Tcl_Obj *returnOpts); MODULE_SCOPE int TclpObjLstat(Tcl_Obj *pathPtr, Tcl_StatBuf *buf); diff --git a/generic/tclParse.c b/generic/tclParse.c index 5da1abb..9800537 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParse.c,v 1.45 2005/11/02 14:51:04 dkf Exp $ + * RCS: @(#) $Id: tclParse.c,v 1.46 2006/07/05 05:34:45 dgp Exp $ */ #include "tclInt.h" @@ -176,6 +176,9 @@ static int ParseComment(CONST char *src, int numBytes, Tcl_Parse *parsePtr); static int ParseTokens(CONST char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); +static int ParseWhiteSpace(CONST char *src, int numBytes, + Tcl_Parse *parsePtr, char *typePtr); + /* *---------------------------------------------------------------------- @@ -325,7 +328,7 @@ Tcl_ParseCommand( * sequence: it should be treated just like white space. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); src += scanned; numBytes -= scanned; if (numBytes == 0) { @@ -390,8 +393,7 @@ Tcl_ParseCommand( ) /* Is the prefix */ && (numBytes > 0) - && (TclParseWhiteSpace(termPtr, numBytes, parsePtr, - &type) == 0) + && (ParseWhiteSpace(termPtr, numBytes, parsePtr, &type) == 0) && (type != TYPE_COMMAND_END) /* Non-whitespace follows */ ) { @@ -435,7 +437,7 @@ Tcl_ParseCommand( * word), and (b) check for the end of the command. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); if (scanned) { src += scanned; numBytes -= scanned; @@ -480,10 +482,10 @@ Tcl_ParseCommand( /* *---------------------------------------------------------------------- * - * TclParseWhiteSpace -- + * ParseWhiteSpace -- * - * Scans up to numBytes bytes starting at src, consuming white space as - * defined by Tcl's parsing rules. + * Scans up to numBytes bytes starting at src, consuming white space + * between words as defined by Tcl's parsing rules. * * Results: * Returns the number of bytes recognized as white space. Records at @@ -497,8 +499,8 @@ Tcl_ParseCommand( *---------------------------------------------------------------------- */ -int -TclParseWhiteSpace( +static int +ParseWhiteSpace( CONST char *src, /* First character to parse. */ register int numBytes, /* Max number of bytes to scan. */ Tcl_Parse *parsePtr, /* Information about parse in progress. @@ -541,6 +543,38 @@ TclParseWhiteSpace( /* *---------------------------------------------------------------------- * + * TclParseAllWhiteSpace -- + * + * Scans up to numBytes bytes starting at src, consuming all white space + * including the command-terminating newline characters. + * + * Results: + * Returns the number of bytes recognized as white space. + * + *---------------------------------------------------------------------- + */ + +int +TclParseAllWhiteSpace( + CONST char *src, /* First character to parse. */ + int numBytes) /* Max number of byes to scan */ +{ + Tcl_Parse dummy; /* Since we know ParseWhiteSpace() generates + * no tokens, there's no need for a call to + * Tcl_FreeParse() in this routine */ + char type; + CONST char *p = src; + do { + int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); + p += scanned; + numBytes -= scanned; + } while (numBytes && (*p == '\n') && (p++, --numBytes)); + return (p-src); +} + +/* + *---------------------------------------------------------------------- + * * TclParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing @@ -801,11 +835,9 @@ ParseComment( char type; int scanned; - do { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); - p += scanned; - numBytes -= scanned; - } while (numBytes && (*p == '\n') && (p++,numBytes--)); + scanned = TclParseAllWhiteSpace(p, numBytes); + p += scanned; + numBytes -= scanned; if ((numBytes == 0) || (*p != '#')) { break; @@ -816,7 +848,7 @@ ParseComment( while (numBytes) { if (*p == '\\') { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(p, numBytes, parsePtr, &type); if (scanned) { p += scanned; numBytes -= scanned; diff --git a/generic/tclParseExpr.c b/generic/tclParseExpr.c index 688447b..98683cc 100644 --- a/generic/tclParseExpr.c +++ b/generic/tclParseExpr.c @@ -12,9 +12,12 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParseExpr.c,v 1.32 2005/12/19 19:03:17 dgp Exp $ + * RCS: @(#) $Id: tclParseExpr.c,v 1.33 2006/07/05 05:34:45 dgp Exp $ */ +#define OLD_EXPR_PARSER 0 +#if OLD_EXPR_PARSER + #include "tclInt.h" /* @@ -1589,13 +1592,9 @@ GetLexeme( src = infoPtr->next; numBytes = parsePtr->end - src; - do { - char type; - int scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); - - src += scanned; - numBytes -= scanned; - } while (numBytes && (*src == '\n') && (src++,numBytes--)); + length = TclParseAllWhiteSpace(src, numBytes); + src += length; + numBytes -= length; parsePtr->term = src; if (numBytes == 0) { @@ -1970,6 +1969,1141 @@ LogSyntaxError( infoPtr->parsePtr->errorType = TCL_PARSE_SYNTAX; infoPtr->parsePtr->term = infoPtr->start; } +#else + + +#include "tclInt.h" + +/* + * The ExprNode structure represents one node of the parse tree produced + * as an interim structure by the expression parser. + */ + +typedef struct ExprNode { + unsigned char lexeme; /* Code that identifies the type of this node */ + unsigned char precedence; /* Precedence for operator nodes */ + int left; /* Index of the left operand of this operator node */ + int right; /* Index of the right operand of this operator node */ + int parent; /* Index of the operator of this operand node */ + int token; /* Index of the Tcl_Tokens of this leaf node */ +} ExprNode; + +/* + * Set of lexeme codes stored in ExprNode structs to label and categorize + * the lexemes found. + */ + +#define LEAF (1<<7) +#define UNARY (1<<6) +#define BINARY (1<<5) + +#define NODE_TYPE ( LEAF | UNARY | BINARY) + +#define PLUS 1 +#define MINUS 2 +#define BAREWORD 3 +#define INCOMPLETE 4 +#define INVALID 5 + +#define NUMBER ( LEAF | 1) +#define SCRIPT ( LEAF | 2) +#define BOOLEAN ( LEAF | BAREWORD) +#define BRACED ( LEAF | 4) +#define VARIABLE ( LEAF | 5) +#define QUOTED ( LEAF | 6) +#define EMPTY ( LEAF | 7) + +#define UNARY_PLUS ( UNARY | PLUS) +#define UNARY_MINUS ( UNARY | MINUS) +#define FUNCTION ( UNARY | BAREWORD) +#define START ( UNARY | 4) +#define OPEN_PAREN ( UNARY | 5) +#define NOT ( UNARY | 6) +#define BIT_NOT ( UNARY | 7) + +#define BINARY_PLUS ( BINARY | PLUS) +#define BINARY_MINUS ( BINARY | MINUS) +#define COMMA ( BINARY | 3) +#define MULT ( BINARY | 4) +#define DIVIDE ( BINARY | 5) +#define MOD ( BINARY | 6) +#define LESS ( BINARY | 7) +#define GREATER ( BINARY | 8) +#define BIT_AND ( BINARY | 9) +#define BIT_XOR ( BINARY | 10) +#define BIT_OR ( BINARY | 11) +#define QUESTION ( BINARY | 12) +#define COLON ( BINARY | 13) +#define LEFT_SHIFT ( BINARY | 14) +#define RIGHT_SHIFT ( BINARY | 15) +#define LEQ ( BINARY | 16) +#define GEQ ( BINARY | 17) +#define EQUAL ( BINARY | 18) +#define NEQ ( BINARY | 19) +#define AND ( BINARY | 20) +#define OR ( BINARY | 21) +#define STREQ ( BINARY | 22) +#define STRNEQ ( BINARY | 23) +#define EXPON ( BINARY | 24) +#define IN_LIST ( BINARY | 25) +#define NOT_IN_LIST ( BINARY | 26) +#define CLOSE_PAREN ( BINARY | 27) +#define END ( BINARY | 28) + +/* + * Declarations for local functions to this file: + */ + +static void GenerateTokens(ExprNode *nodes, Tcl_Parse *scratchPtr, + Tcl_Parse *parsePtr); +static int ParseLexeme(CONST char *start, int numBytes, + unsigned char *lexemePtr); +static unsigned char PrecedenceOf(unsigned char operand); + + +/* + *---------------------------------------------------------------------- + * + * Tcl_ParseExpr -- + * + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and stores information about the + * structure of the expression in the Tcl_Parse struct indicated by the + * caller. + * + * Results: + * If the string is successfully parsed as a valid Tcl expression, + * TCL_OK is returned, and data about the expression structure is + * written to *parsePtr. If the string cannot be parsed as a valid + * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL, + * an error message is written to interp. + * + * Side effects: + * If there is insufficient space in parsePtr to hold all the information + * about the expression, then additional space is malloc-ed. If the + * function returns TCL_OK then the caller must eventually invoke + * Tcl_FreeParse to release any additional space that was allocated. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + CONST char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. If < 0, the + * string consists of all bytes up to the + * first null character. */ + Tcl_Parse *parsePtr) /* Structure to fill with information about + * the parsed expression; any previous + * information in the structure is ignored. */ +{ +#define NUM_STATIC_NODES 64 + ExprNode staticNodes[NUM_STATIC_NODES]; + ExprNode *lastOrphanPtr, *nodes = staticNodes; + int nodesAvailable = NUM_STATIC_NODES; + int nodesUsed = 0; + Tcl_Parse scratch; /* Parsing scratch space */ + Tcl_Obj *msg = NULL; + unsigned char precedence; + CONST char *space, *operand, *end; + int scanned, size, limit = 25, code = TCL_OK; + + if (numBytes < 0) { + numBytes = (start ? strlen(start) : 0); + } + + TclParseInit(interp, start, numBytes, &scratch); + TclParseInit(interp, start, numBytes, parsePtr); + + /* Initialize the parse tree with the special "START" node */ + + nodes->lexeme = START; + nodes->precedence = PrecedenceOf(START); + nodes->left = -1; + nodes->right = -1; + nodes->parent = -1; + nodes->token = -1; + lastOrphanPtr = nodes; + nodesUsed++; + + while ((code == TCL_OK) && (lastOrphanPtr->lexeme != END)) { + ExprNode *nodePtr, *lastNodePtr; + Tcl_Token *tokenPtr; + + /* + * Each pass through this loop adds one more ExprNode. + * Allocate space for one if required. + */ + if (nodesUsed >= nodesAvailable) { + int lastOrphanIdx = lastOrphanPtr - nodes; + ExprNode *newPtr; + + size = nodesUsed * 2; + if (nodes == staticNodes) { + nodes = NULL; + } + do { + newPtr = (ExprNode *) attemptckrealloc( (char *) nodes, + (unsigned int) (size * sizeof(ExprNode)) ); + } while ((newPtr == NULL) + && ((size -= (size - nodesUsed) / 2) > nodesUsed)); + if (newPtr == NULL) { + msg = Tcl_NewStringObj( + "not enough memory to parse expression", -1); + code = TCL_ERROR; + continue; + } + nodesAvailable = size; + if (nodes == NULL) { + memcpy((VOID *) newPtr, (VOID *) staticNodes, + (size_t) (nodesUsed * sizeof(ExprNode))); + } + nodes = newPtr; + lastOrphanPtr = nodes + lastOrphanIdx; + } + nodePtr = nodes + nodesUsed; + lastNodePtr = nodePtr - 1; + + /* Skip white space between lexemes */ + + space = start; /* Remember where last lexeme ended */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + scanned = ParseLexeme(start, numBytes, &(nodePtr->lexeme)); + + /* Use context to categorize the lexemes that are ambiguous */ + + if ((NODE_TYPE & nodePtr->lexeme) == 0) { + switch (nodePtr->lexeme) { + case INVALID: + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "invalid character \"%.*s\" in expression", + scanned, start); + code = TCL_ERROR; + continue; + case INCOMPLETE: + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "incomplete operator \"%.*s\" in expression", + scanned, start); + code = TCL_ERROR; + continue; + case BAREWORD: + if (start[scanned+TclParseAllWhiteSpace( + start+scanned, numBytes-scanned)] == '(') { + nodePtr->lexeme = FUNCTION; + } else { + Tcl_Obj *objPtr = Tcl_NewStringObj(start, scanned); + Tcl_IncrRefCount(objPtr); + code = Tcl_ConvertToType(NULL, objPtr, &tclBooleanType); + Tcl_DecrRefCount(objPtr); + if (code == TCL_OK) { + nodePtr->lexeme = BOOLEAN; + } else { + msg = Tcl_NewObj(); + TclObjPrintf(NULL, msg, + "invalid bareword \"%.*s\" in expression", + scanned, start); + Tcl_AppendToObj(msg, + "\n (prepend $ for variable; ", -1); + Tcl_AppendToObj(msg, + "append argument list for function call)", -1); + continue; + } + } + break; + case PLUS: + case MINUS: + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + nodePtr->lexeme |= BINARY; + } else { + nodePtr->lexeme |= UNARY; + } + } + } + + /* Add node to parse tree based on category */ + + switch (NODE_TYPE & nodePtr->lexeme) { + case LEAF: + + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + msg = Tcl_NewStringObj("missing operator ", -1); + while (lastNodePtr->parent >= 0) { + lastNodePtr = nodes + lastNodePtr->parent; + } + while (lastNodePtr->left >= 0) { + lastNodePtr = nodes + lastNodePtr->left; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + switch (nodePtr->lexeme) { + case NUMBER: + case BOOLEAN: + TclObjPrintf(NULL, msg, + "between operands \"%s%.*s\" and \"%.*s%s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit, + (scanned < limit) ? scanned : limit - 3, + (scanned < limit) ? start : start+scanned+3-limit, + (scanned < limit) ? "" : "..."); + if ((operand[0] == '0') + && TclCheckBadOctal(NULL, operand)) { + Tcl_AppendToObj(msg, + "\n (looks like invalid octal number)", -1); + } + break; + default: + TclObjPrintf(NULL, msg, "following operand \"%s%.*s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit); + } + code = TCL_ERROR; + continue; + } + + if (scratch.numTokens+1 >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_SUB_EXPR; + tokenPtr->start = start; + scratch.numTokens++; + + switch (nodePtr->lexeme) { + case NUMBER: + case BOOLEAN: + tokenPtr = scratch.tokenPtr + scratch.numTokens; + tokenPtr->type = TCL_TOKEN_TEXT; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + break; + + case QUOTED: + code = Tcl_ParseQuotedString(interp, start, numBytes, + &scratch, 1, &end); + if (code != TCL_OK) { + continue; + } + scanned = end - start; + break; + + case BRACED: + code = Tcl_ParseBraces(interp, start, numBytes, + &scratch, 1, &end); + if (code != TCL_OK) { + continue; + } + scanned = end - start; + break; + + case VARIABLE: + code = Tcl_ParseVarName(interp, start, numBytes, &scratch, 1); + if (code != TCL_OK) { + continue; + } + tokenPtr = scratch.tokenPtr + nodePtr->token + 1; + if (tokenPtr->type != TCL_TOKEN_VARIABLE) { + msg = Tcl_NewStringObj( + "invalid character \"$\" in expression", -1); + code = TCL_ERROR; + continue; + } + scanned = tokenPtr->size; + break; + + case SCRIPT: + tokenPtr = scratch.tokenPtr + scratch.numTokens; + tokenPtr->type = TCL_TOKEN_COMMAND; + tokenPtr->start = start; + tokenPtr->numComponents = 0; + + end = start + numBytes; + start++; + while (1) { + Tcl_Parse nested; + code = Tcl_ParseCommand(interp, + start, (end - start), 1, &nested); + if (code != TCL_OK) { + parsePtr->term = nested.term; + parsePtr->errorType = nested.errorType; + parsePtr->incomplete = nested.incomplete; + break; + } + start = (nested.commandStart + nested.commandSize); + Tcl_FreeParse(&nested); + if ((nested.term < end) && (*nested.term == ']') + && !nested.incomplete) { + break; + } + + if (start == end) { + msg = Tcl_NewStringObj("missing close-bracket", -1); + parsePtr->term = tokenPtr->start; + parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; + parsePtr->incomplete = 1; + code = TCL_ERROR; + break; + } + } + if (code != TCL_OK) { + continue; + } + end = start; + start = tokenPtr->start; + scanned = end - start; + tokenPtr->size = scanned; + scratch.numTokens++; + break; + } + + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->size = scanned; + tokenPtr->numComponents = scratch.numTokens - nodePtr->token - 1; + + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->precedence = 0; + nodePtr->parent = -1; + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + case UNARY: + if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { + msg = Tcl_NewObj(); + while (lastNodePtr->parent >= 0) { + lastNodePtr = nodes + lastNodePtr->parent; + } + while (lastNodePtr->left >= 0) { + lastNodePtr = nodes + lastNodePtr->left; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + TclObjPrintf(NULL, msg, + "missing operator following operand \"%s%.*s\"", + (size < limit) ? "" : "...", + (size < limit) ? size : limit - 3, + (size < limit) ? operand : operand+size+3-limit); + code = TCL_ERROR; + continue; + } + nodePtr->precedence = PrecedenceOf(nodePtr->lexeme); + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->parent = -1; + + if (scratch.numTokens >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_OPERATOR; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + case BINARY: { + ExprNode *otherPtr; + + if ((nodePtr->lexeme == CLOSE_PAREN) + && (lastNodePtr->lexeme == OPEN_PAREN)) { + if (lastNodePtr[-1].lexeme == FUNCTION) { + /* Normally, "()" is a syntax error, but as a special + * case accept it as an argument list for a function */ + scanned = 0; + nodePtr->lexeme = EMPTY; + nodePtr->precedence = 0; + nodePtr->left = -1; + nodePtr->right = -1; + nodePtr->parent = -1; + nodePtr->token = -1; + + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + + } + msg = Tcl_NewStringObj("empty subexpression", -1); + code = TCL_ERROR; + continue; + } + + precedence = PrecedenceOf(nodePtr->lexeme); + + if ((NODE_TYPE & lastNodePtr->lexeme) != LEAF) { + msg = Tcl_NewObj(); + if (lastNodePtr->precedence > precedence) { + if (lastNodePtr->lexeme == OPEN_PAREN) { + lastOrphanPtr = lastNodePtr; + msg = Tcl_NewStringObj("unbalanced open paren", -1); + code = TCL_ERROR; + continue; + } + if (lastNodePtr->lexeme == COMMA) { + msg = Tcl_NewStringObj("missing function argument", -1); + code = TCL_ERROR; + continue; + } + if (lastNodePtr->lexeme == START) { + msg = Tcl_NewStringObj("empty expression", -1); + code = TCL_ERROR; + continue; + } + operand = scratch.tokenPtr[lastNodePtr->token].start; + size = space - operand; + TclObjPrintf(NULL, msg, + "missing right operand following operator \"%.*s\"", + size, operand); + } else { + if (nodePtr->lexeme == CLOSE_PAREN) { + msg = Tcl_NewStringObj("unbalanced close paren", -1); + } else { + TclObjPrintf(NULL, msg, + "missing left operand before operator \"%.*s\"", + scanned, start); + } + } + code = TCL_ERROR; + continue; + } + + while (1) { + otherPtr = lastOrphanPtr; + while (otherPtr->left >= 0) { + otherPtr = nodes + otherPtr->left; + } + otherPtr--; + + if (otherPtr->precedence < precedence) { + break; + } + + /* Special association rules for the ternary operators */ + if (otherPtr->precedence == precedence) { + if ((otherPtr->lexeme == QUESTION) + && (lastOrphanPtr->lexeme != COLON)) { + break; + } + if ((otherPtr->lexeme == COLON) + && (nodePtr->lexeme == QUESTION)) { + break; + } + } + + /* Some checks before linking */ + if ((otherPtr->lexeme == OPEN_PAREN) + && (nodePtr->lexeme != CLOSE_PAREN)) { + lastOrphanPtr = otherPtr; + msg = Tcl_NewStringObj("unbalanced open paren", -1); + code = TCL_ERROR; + break; + } + if ((otherPtr->lexeme == QUESTION) + && (lastOrphanPtr->lexeme != COLON)) { + msg = Tcl_NewStringObj( + "missing operator \":\" in ternary conditional", + -1); + code = TCL_ERROR; + break; + } + if ((lastOrphanPtr->lexeme == COLON) + && (otherPtr->lexeme != QUESTION)) { + msg = Tcl_NewStringObj( + "unexpected operator \":\" without preceding \"?\"", + -1); + code = TCL_ERROR; + break; + } + if ((lastOrphanPtr->lexeme == COMMA) + && ((otherPtr->lexeme != OPEN_PAREN) + || (otherPtr[-1].lexeme != FUNCTION)) ) { + msg = Tcl_NewStringObj( + "unexpected \",\" outside function argument list", + -1); + code = TCL_ERROR; + break; + } + + /* Link orphan as right operand of otherPtr */ + otherPtr->right = lastOrphanPtr - nodes; + lastOrphanPtr->parent = otherPtr - nodes; + lastOrphanPtr = otherPtr; + + if (otherPtr->lexeme == OPEN_PAREN) { + /* CLOSE_PAREN can only close one OPEN_PAREN */ + tokenPtr = scratch.tokenPtr + otherPtr->token; + tokenPtr->size = start + scanned - tokenPtr->start; + break; + } + if (otherPtr->lexeme == START) { + /* Don't backtrack beyond the start */ + break; + } + } + if (code != TCL_OK) { + continue; + } + + if (nodePtr->lexeme == CLOSE_PAREN) { + if (otherPtr->lexeme == START) { + msg = Tcl_NewStringObj("unbalanced close paren", -1); + code = TCL_ERROR; + continue; + } + /* Create no node for a CLOSE_PAREN lexeme */ + break; + } + + if (lastOrphanPtr->lexeme == COLON) { + msg = Tcl_NewStringObj( + "unexpected operator \":\" without preceding \"?\"", + -1); + code = TCL_ERROR; + continue; + } + + /* Link orphan as left operand of new node */ + nodePtr->precedence = PrecedenceOf(nodePtr->lexeme); + nodePtr->right = -1; + nodePtr->parent = -1; + + if (scratch.numTokens >= scratch.tokensAvailable) { + TclExpandTokenArray(&scratch); + } + nodePtr->token = scratch.numTokens; + tokenPtr = scratch.tokenPtr + nodePtr->token; + tokenPtr->type = TCL_TOKEN_OPERATOR; + tokenPtr->start = start; + tokenPtr->size = scanned; + tokenPtr->numComponents = 0; + scratch.numTokens++; + + nodePtr->left = lastOrphanPtr - nodes; + lastOrphanPtr->parent = nodePtr - nodes; + lastOrphanPtr = nodePtr; + nodesUsed++; + break; + } + } + + start += scanned; + numBytes -= scanned; + } + + if (code == TCL_OK) { + /* Shift tokens from scratch space to caller space */ + GenerateTokens(nodes, &scratch, parsePtr); + } else { + if (parsePtr->errorType == TCL_PARSE_SUCCESS) { + parsePtr->errorType = TCL_PARSE_SYNTAX; + parsePtr->term = start; + } + if (interp == NULL) { + if (msg) { + Tcl_DecrRefCount(msg); + } + } else { + CONST char *subexpr = NULL; + if (msg == NULL) { + msg = Tcl_GetObjResult(interp); + } + while (lastOrphanPtr != nodes) { + if (lastOrphanPtr->lexeme == OPEN_PAREN) { + subexpr = scratch.tokenPtr[lastOrphanPtr->token].start; + lastOrphanPtr--; + } else { + precedence = lastOrphanPtr->precedence; + while (lastOrphanPtr->left >= 0) { + lastOrphanPtr = nodes + lastOrphanPtr->left; + } + subexpr = scratch.tokenPtr[lastOrphanPtr->token].start; + lastOrphanPtr--; + if (lastOrphanPtr->precedence >= precedence) { + continue; + } + } + size = start + scanned - subexpr; + if ((subexpr > scratch.string) + || ((start + scanned < scratch.end) + && (size < limit))) { + TclObjPrintf(NULL, msg, + "\n (parsing subexpression \"%.*s%s\")", + (size < limit) ? size : limit - 3, subexpr, + (size < limit) ? "" : "..."); + } + } + numBytes = scratch.end - scratch.string; + TclObjPrintf(NULL, msg, "\n (parsing expression \"%.*s%s\")", + (numBytes < limit) ? numBytes : limit - 3, scratch.string, + (numBytes < limit) ? "" : "..."); + Tcl_SetObjResult(interp, msg); + } + } + + if (nodes != staticNodes) { + ckfree((char *)nodes); + } + Tcl_FreeParse(&scratch); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * GenerateTokens -- + * + * Routine that generates Tcl_Tokens that represent a Tcl expression + * and writes them to *parsePtr. The parse tree of the expression + * is in the array of ExprNodes, nodes. Some of the Tcl_Tokens are + * copied from scratch space at *scratchPtr, where the parsing pass + * that constructed the parse tree left them. + * + *---------------------------------------------------------------------- + */ + +static void +GenerateTokens( + ExprNode *nodes, + Tcl_Parse *scratchPtr, + Tcl_Parse *parsePtr) +{ + ExprNode *nodePtr = nodes + nodes->right; + Tcl_Token *sourcePtr, *destPtr, *tokenPtr = scratchPtr->tokenPtr; + int toCopy; + CONST char *end = tokenPtr->start + tokenPtr->size; + + while (nodePtr->lexeme != START) { + switch (NODE_TYPE & nodePtr->lexeme) { + case BINARY: + if (nodePtr->left >= 0) { + if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + nodePtr->token = parsePtr->numTokens; + destPtr->type = TCL_TOKEN_SUB_EXPR; + destPtr->start = tokenPtr->start; + destPtr++; + *destPtr = *sourcePtr; + parsePtr->numTokens += 2; + } + nodePtr = nodes + nodePtr->left; + nodes[nodePtr->parent].left = -1; + } else if (nodePtr->right >= 0) { + tokenPtr += tokenPtr->numComponents + 1; + nodePtr = nodes + nodePtr->right; + nodes[nodePtr->parent].right = -1; + } else { + if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { + destPtr = parsePtr->tokenPtr + nodePtr->token; + destPtr->size = end - destPtr->start; + destPtr->numComponents = + parsePtr->numTokens - nodePtr->token - 1; + } + nodePtr = nodes + nodePtr->parent; + } + break; + + case UNARY: + if (nodePtr->right >= 0) { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + if (nodePtr->lexeme != OPEN_PAREN) { + if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + nodePtr->token = parsePtr->numTokens; + destPtr->type = TCL_TOKEN_SUB_EXPR; + destPtr->start = tokenPtr->start; + destPtr++; + *destPtr = *sourcePtr; + parsePtr->numTokens += 2; + } + if (tokenPtr == sourcePtr) { + tokenPtr += tokenPtr->numComponents + 1; + } + nodePtr = nodes + nodePtr->right; + nodes[nodePtr->parent].right = -1; + } else { + if (nodePtr->lexeme != OPEN_PAREN) { + destPtr = parsePtr->tokenPtr + nodePtr->token; + destPtr->size = end - destPtr->start; + destPtr->numComponents = + parsePtr->numTokens - nodePtr->token - 1; + } else { + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + } + nodePtr = nodes + nodePtr->parent; + } + break; + + case LEAF: + switch (nodePtr->lexeme) { + case EMPTY: + break; + + case BRACED: + case QUOTED: + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + if (sourcePtr->numComponents > 1) { + toCopy = sourcePtr->numComponents; + if (tokenPtr == sourcePtr) { + tokenPtr += toCopy + 1; + } + sourcePtr->numComponents++; + while (parsePtr->numTokens + toCopy + 1 + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *destPtr++ = *sourcePtr; + *destPtr = *sourcePtr++; + destPtr->type = TCL_TOKEN_WORD; + destPtr->numComponents = toCopy; + destPtr++; + memcpy((VOID *) destPtr, (VOID *) sourcePtr, + (size_t) (toCopy * sizeof(Tcl_Token))); + parsePtr->numTokens += toCopy + 2; + break; + } + + default: + sourcePtr = scratchPtr->tokenPtr + nodePtr->token; + end = sourcePtr->start + sourcePtr->size; + toCopy = sourcePtr->numComponents + 1; + if (tokenPtr == sourcePtr) { + tokenPtr += toCopy; + } + while (parsePtr->numTokens + toCopy - 1 + >= parsePtr->tokensAvailable) { + TclExpandTokenArray(parsePtr); + } + destPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy((VOID *) destPtr, (VOID *) sourcePtr, + (size_t) (toCopy * sizeof(Tcl_Token))); + parsePtr->numTokens += toCopy; + break; + + } + nodePtr = nodes + nodePtr->parent; + break; + + } + } +} + +/* + *---------------------------------------------------------------------- + * + * PrecedenceOf -- + * + * Utility routine that returns the precedence level of an operator. + * + * Results: + * Returns an unsigned char value. Greater value indicates greater + * operator precedence. + * + *---------------------------------------------------------------------- + */ + +static unsigned char +PrecedenceOf( + unsigned char operator) +{ + switch (operator) { + case NOT: case BIT_NOT: case UNARY_PLUS: case UNARY_MINUS: + case FUNCTION: + return 18; + case EXPON: + return 17; + case MULT: case DIVIDE: case MOD: + return 16; + case BINARY_PLUS: case BINARY_MINUS: + return 15; + case LEFT_SHIFT: case RIGHT_SHIFT: + return 14; + case LESS: case LEQ: case GREATER: case GEQ: + return 13; + case EQUAL: case NEQ: case IN_LIST: case NOT_IN_LIST: + case STREQ: case STRNEQ: + return 12; + case BIT_AND: + return 11; + case BIT_XOR: + return 10; + case BIT_OR: + return 9; + case AND: + return 8; + case OR: + return 7; + case COLON: case QUESTION: + return 6; + case COMMA: + return 5; + case OPEN_PAREN: + return 4; + case CLOSE_PAREN: + return 3; + case START: + return 2; + case END: + return 1; + } + /* NOT REACHED */ + return 0; +} + +/* + *---------------------------------------------------------------------- + * + * ParseLexeme -- + * + * Parse a single lexeme from the start of a string, scanning no + * more than numBytes bytes. + * + * Results: + * Returns the number of bytes scanned to produce the lexeme. + * + * Side effects: + * Code identifying lexeme parsed is writen to *lexemePtr. + * + *---------------------------------------------------------------------- + */ + +static int +ParseLexeme( + CONST char *start, /* Start of lexeme to parse. */ + int numBytes, /* Number of bytes in string. */ + unsigned char *lexemePtr) /* Write code of parsed lexeme to this + * storage. */ +{ + CONST char *end; + int scanned; + Tcl_UniChar ch; + + if (numBytes == 0) { + *lexemePtr = END; + return 0; + } + switch (*start) { + case '[': + *lexemePtr = SCRIPT; + return 1; + + case '{': + *lexemePtr = BRACED; + return 1; + + case '(': + *lexemePtr = OPEN_PAREN; + return 1; + + case ')': + *lexemePtr = CLOSE_PAREN; + return 1; + + case '$': + *lexemePtr = VARIABLE; + return 1; + + case '\"': + *lexemePtr = QUOTED; + return 1; + + case ',': + *lexemePtr = COMMA; + return 1; + + case '/': + *lexemePtr = DIVIDE; + return 1; + + case '%': + *lexemePtr = MOD; + return 1; + + case '+': + *lexemePtr = PLUS; + return 1; + + case '-': + *lexemePtr = MINUS; + return 1; + + case '?': + *lexemePtr = QUESTION; + return 1; + + case ':': + *lexemePtr = COLON; + return 1; + + case '^': + *lexemePtr = BIT_XOR; + return 1; + + case '~': + *lexemePtr = BIT_NOT; + return 1; + + case '*': + if ((numBytes > 1) && (start[1] == '*')) { + *lexemePtr = EXPON; + return 2; + } + *lexemePtr = MULT; + return 1; + + case '=': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = EQUAL; + return 2; + } + *lexemePtr = INCOMPLETE; + return 1; + + case '!': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = NEQ; + return 2; + } + *lexemePtr = NOT; + return 1; + + case '&': + if ((numBytes > 1) && (start[1] == '&')) { + *lexemePtr = AND; + return 2; + } + *lexemePtr = BIT_AND; + return 1; + + case '|': + if ((numBytes > 1) && (start[1] == '|')) { + *lexemePtr = OR; + return 2; + } + *lexemePtr = BIT_OR; + return 1; + + case '<': + if (numBytes > 1) { + switch (start[1]) { + case '<': + *lexemePtr = LEFT_SHIFT; + return 2; + case '=': + *lexemePtr = LEQ; + return 2; + } + } + *lexemePtr = LESS; + return 1; + + case '>': + if (numBytes > 1) { + switch (start[1]) { + case '>': + *lexemePtr = RIGHT_SHIFT; + return 2; + case '=': + *lexemePtr = GEQ; + return 2; + } + } + *lexemePtr = GREATER; + return 1; + + case 'i': + if ((numBytes > 1) && (start[1] == 'n') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + /* + * Must make this check so we can tell the difference between + * the "in" operator and the "int" function name and the + * "infinity" numeric value. + */ + *lexemePtr = IN_LIST; + return 2; + } + break; + + case 'e': + if ((numBytes > 1) && (start[1] == 'q') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + *lexemePtr = STREQ; + return 2; + } + break; + + case 'n': + if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + switch (start[1]) { + case 'e': + *lexemePtr = STRNEQ; + return 2; + case 'i': + *lexemePtr = NOT_IN_LIST; + return 2; + } + } + } + + if (TclParseNumber(NULL, NULL, NULL, start, numBytes, &end, + TCL_PARSE_NO_WHITESPACE) == TCL_OK) { + *lexemePtr = NUMBER; + return (end-start); + } + + if (Tcl_UtfCharComplete(start, numBytes)) { + scanned = Tcl_UtfToUniChar(start, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, start, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + if (!isalpha(UCHAR(ch))) { + *lexemePtr = INVALID; + return scanned; + } + end = start; + while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { + end += scanned; + numBytes -= scanned; + if (Tcl_UtfCharComplete(end, numBytes)) { + scanned = Tcl_UtfToUniChar(end, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, end, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + } + *lexemePtr = BAREWORD; + return (end-start); +} +#endif /* * Local Variables: diff --git a/tests/expr-old.test b/tests/expr-old.test index 578e697..f4e2a4c 100644 --- a/tests/expr-old.test +++ b/tests/expr-old.test @@ -13,7 +13,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: expr-old.test,v 1.34 2006/04/06 18:19:25 dgp Exp $ +# RCS: @(#) $Id: expr-old.test,v 1.35 2006/07/05 05:34:45 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2.1 @@ -958,7 +958,7 @@ test expr-old-34.17 {errors in math functions} -constraints testmathfunctions \ list [catch {expr T1(4)} msg] $msg } -match glob -result {1 {too many arguments for math function*}} -test expr-old-36.1 {ExprLooksLikeInt procedure} -constraints knownBug -body { +test expr-old-36.1 {ExprLooksLikeInt procedure} -body { expr 0289 } -returnCodes error -match glob -result {*invalid octal number*} test expr-old-36.2 {ExprLooksLikeInt procedure} { @@ -1118,7 +1118,7 @@ test expr-old-37.24 {Tcl_ExprDouble handles overflows that look like int} \ testexprdouble 17976931348623165[string repeat 0 292] } {This is a result: Inf} test expr-old-37.25 {Tcl_ExprDouble and NaN} \ - {knownBug ieeeFloatingPoint testexprdouble} { + {ieeeFloatingPoint testexprdouble} { list [catch {testexprdouble 0.0/0.0} result] $result } {1 {floating point value is Not a Number}} diff --git a/tests/expr.test b/tests/expr.test index 1f84e08..980f0b9 100644 --- a/tests/expr.test +++ b/tests/expr.test @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: expr.test,v 1.54 2006/04/06 18:19:25 dgp Exp $ +# RCS: @(#) $Id: expr.test,v 1.55 2006/07/05 05:34:45 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2.1 @@ -6365,7 +6365,7 @@ test expr-39.24 {Tcl_ExprDoubleObj handles overflows that look like int} \ testexprdoubleobj 17976931348623165[string repeat 0 292] } {This is a result: Inf} test expr-39.25 {Tcl_ExprDoubleObj and NaN} \ - {knownBug testexprdoubleobj ieeeFloatingPoint} { + {testexprdoubleobj ieeeFloatingPoint} { list [catch {testexprdoubleobj 0.0/0.0} result] $result } {1 {floating point value is Not a Number}} diff --git a/tests/parseExpr.test b/tests/parseExpr.test index 94c429c..7deb551 100644 --- a/tests/parseExpr.test +++ b/tests/parseExpr.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: parseExpr.test,v 1.21 2006/03/21 18:51:52 dgp Exp $ +# RCS: @(#) $Id: parseExpr.test,v 1.22 2006/07/05 05:34:45 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -620,7 +620,7 @@ test parseExpr-16.4 {GetLexeme procedure, integer lexeme} testexprparser { test parseExpr-16.5 {GetLexeme procedure, integer lexeme too big} {testexprparser wideIs32bit} { list [catch {testexprparser {12345678901234567890} -1} msg] $msg } {1 {integer value too large to represent}} -test parseExpr-16.6 {GetLexeme procedure, bad integer lexeme} -constraints {testexprparser knownBug} -body { +test parseExpr-16.6 {GetLexeme procedure, bad integer lexeme} -constraints testexprparser -body { testexprparser {0999} -1 } -returnCodes error -match glob -result {*invalid octal number*} test parseExpr-16.7 {GetLexeme procedure, double lexeme} testexprparser { |