summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCompExpr.c1052
-rw-r--r--generic/tclParseExpr.c1075
2 files changed, 1051 insertions, 1076 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 63d8be3..c4d2829 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -5,17 +5,1067 @@
*
* Copyright (c) 1997 Sun Microsystems, Inc.
* Copyright (c) 1998-2000 by Scriptics Corporation.
+ * Contributions from Don Porter, NIST, 2006. (not subject to US copyright)
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompExpr.c,v 1.34 2006/09/05 02:44:38 dgp Exp $
+ * RCS: @(#) $Id: tclCompExpr.c,v 1.35 2006/11/09 16:52:30 dgp Exp $
*/
#include "tclInt.h"
#include "tclCompile.h"
/*
+ * The ExprNode structure represents one node of the parse tree produced
+ * as an interim structure by the expression parser.
+ */
+
+typedef struct ExprNode {
+ unsigned char lexeme; /* Code that identifies the type of this node */
+ int left; /* Index of the left operand of this operator node */
+ int right; /* Index of the right operand of this operator node */
+ int parent; /* Index of the operator of this operand node */
+ int token; /* Index of the Tcl_Tokens of this leaf node */
+} ExprNode;
+
+/*
+ * Set of lexeme codes stored in ExprNode structs to label and categorize
+ * the lexemes found.
+ */
+
+#define LEAF (1<<7)
+#define UNARY (1<<6)
+#define BINARY (1<<5)
+
+#define NODE_TYPE ( LEAF | UNARY | BINARY)
+
+#define PLUS 1
+#define MINUS 2
+#define BAREWORD 3
+#define INCOMPLETE 4
+#define INVALID 5
+
+#define NUMBER ( LEAF | 1)
+#define SCRIPT ( LEAF | 2)
+#define BOOLEAN ( LEAF | BAREWORD)
+#define BRACED ( LEAF | 4)
+#define VARIABLE ( LEAF | 5)
+#define QUOTED ( LEAF | 6)
+#define EMPTY ( LEAF | 7)
+
+#define UNARY_PLUS ( UNARY | PLUS)
+#define UNARY_MINUS ( UNARY | MINUS)
+#define FUNCTION ( UNARY | BAREWORD)
+#define START ( UNARY | 4)
+#define OPEN_PAREN ( UNARY | 5)
+#define NOT ( UNARY | 6)
+#define BIT_NOT ( UNARY | 7)
+
+#define BINARY_PLUS ( BINARY | PLUS)
+#define BINARY_MINUS ( BINARY | MINUS)
+#define COMMA ( BINARY | 3)
+#define MULT ( BINARY | 4)
+#define DIVIDE ( BINARY | 5)
+#define MOD ( BINARY | 6)
+#define LESS ( BINARY | 7)
+#define GREATER ( BINARY | 8)
+#define BIT_AND ( BINARY | 9)
+#define BIT_XOR ( BINARY | 10)
+#define BIT_OR ( BINARY | 11)
+#define QUESTION ( BINARY | 12)
+#define COLON ( BINARY | 13)
+#define LEFT_SHIFT ( BINARY | 14)
+#define RIGHT_SHIFT ( BINARY | 15)
+#define LEQ ( BINARY | 16)
+#define GEQ ( BINARY | 17)
+#define EQUAL ( BINARY | 18)
+#define NEQ ( BINARY | 19)
+#define AND ( BINARY | 20)
+#define OR ( BINARY | 21)
+#define STREQ ( BINARY | 22)
+#define STRNEQ ( BINARY | 23)
+#define EXPON ( BINARY | 24)
+#define IN_LIST ( BINARY | 25)
+#define NOT_IN_LIST ( BINARY | 26)
+#define CLOSE_PAREN ( BINARY | 27)
+#define END ( BINARY | 28)
+
+/*
+ * Declarations for local functions to this file:
+ */
+
+static void GenerateTokens(ExprNode *nodes, Tcl_Parse *scratchPtr,
+ Tcl_Parse *parsePtr);
+static int ParseLexeme(CONST char *start, int numBytes,
+ unsigned char *lexemePtr);
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_ParseExpr --
+ *
+ * Given a string, the numBytes bytes starting at start, this function
+ * parses it as a Tcl expression and stores information about the
+ * structure of the expression in the Tcl_Parse struct indicated by the
+ * caller.
+ *
+ * Results:
+ * If the string is successfully parsed as a valid Tcl expression,
+ * TCL_OK is returned, and data about the expression structure is
+ * written to *parsePtr. If the string cannot be parsed as a valid
+ * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL,
+ * an error message is written to interp.
+ *
+ * Side effects:
+ * If there is insufficient space in parsePtr to hold all the information
+ * about the expression, then additional space is malloc-ed. If the
+ * function returns TCL_OK then the caller must eventually invoke
+ * Tcl_FreeParse to release any additional space that was allocated.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_ParseExpr(
+ Tcl_Interp *interp, /* Used for error reporting. */
+ CONST char *start, /* Start of source string to parse. */
+ int numBytes, /* Number of bytes in string. If < 0, the
+ * string consists of all bytes up to the
+ * first null character. */
+ Tcl_Parse *parsePtr) /* Structure to fill with information about
+ * the parsed expression; any previous
+ * information in the structure is ignored. */
+{
+#define NUM_STATIC_NODES 64
+ ExprNode staticNodes[NUM_STATIC_NODES];
+ ExprNode *lastOrphanPtr, *nodes = staticNodes;
+ int nodesAvailable = NUM_STATIC_NODES;
+ int nodesUsed = 0;
+ Tcl_Parse scratch; /* Parsing scratch space */
+ Tcl_Obj *msg = NULL, *post = NULL;
+ int scanned = 0, code = TCL_OK, insertMark = 0;
+ CONST char *mark = "_@_";
+ CONST int limit = 25;
+ static CONST unsigned char prec[80] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 15, 15, 5, 16, 16, 16, 13, 13, 11, 10, 9, 6, 6, 14, 14,
+ 13, 13, 12, 12, 8, 7, 12, 12, 17, 12, 12, 3, 1, 0, 0, 0,
+ 0, 18, 18, 18, 2, 4, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ if (numBytes < 0) {
+ numBytes = (start ? strlen(start) : 0);
+ }
+
+ TclParseInit(interp, start, numBytes, &scratch);
+ TclParseInit(interp, start, numBytes, parsePtr);
+
+ /* Initialize the parse tree with the special "START" node */
+
+ nodes->lexeme = START;
+ nodes->left = -1;
+ nodes->right = -1;
+ nodes->parent = -1;
+ nodes->token = -1;
+ lastOrphanPtr = nodes;
+ nodesUsed++;
+
+ while ((code == TCL_OK) && (lastOrphanPtr->lexeme != END)) {
+ ExprNode *nodePtr, *lastNodePtr;
+ Tcl_Token *tokenPtr;
+
+ /*
+ * Each pass through this loop adds one more ExprNode.
+ * Allocate space for one if required.
+ */
+ if (nodesUsed >= nodesAvailable) {
+ int lastOrphanIdx = lastOrphanPtr - nodes;
+ int size = nodesUsed * 2;
+ ExprNode *newPtr;
+
+ if (nodes == staticNodes) {
+ nodes = NULL;
+ }
+ do {
+ newPtr = (ExprNode *) attemptckrealloc( (char *) nodes,
+ (unsigned int) (size * sizeof(ExprNode)) );
+ } while ((newPtr == NULL)
+ && ((size -= (size - nodesUsed) / 2) > nodesUsed));
+ if (newPtr == NULL) {
+ msg = Tcl_NewStringObj(
+ "not enough memory to parse expression", -1);
+ code = TCL_ERROR;
+ continue;
+ }
+ nodesAvailable = size;
+ if (nodes == NULL) {
+ memcpy((VOID *) newPtr, (VOID *) staticNodes,
+ (size_t) (nodesUsed * sizeof(ExprNode)));
+ }
+ nodes = newPtr;
+ lastOrphanPtr = nodes + lastOrphanIdx;
+ }
+ nodePtr = nodes + nodesUsed;
+ lastNodePtr = nodePtr - 1;
+
+ /* Skip white space between lexemes */
+
+ scanned = TclParseAllWhiteSpace(start, numBytes);
+ start += scanned;
+ numBytes -= scanned;
+
+ scanned = ParseLexeme(start, numBytes, &(nodePtr->lexeme));
+
+ /* Use context to categorize the lexemes that are ambiguous */
+
+ if ((NODE_TYPE & nodePtr->lexeme) == 0) {
+ switch (nodePtr->lexeme) {
+ case INVALID:
+ msg = TclObjPrintf(
+ "invalid character \"%.*s\"", scanned, start);
+ code = TCL_ERROR;
+ continue;
+ case INCOMPLETE:
+ msg = TclObjPrintf(
+ "incomplete operator \"%.*s\"", scanned, start);
+ code = TCL_ERROR;
+ continue;
+ case BAREWORD:
+ if (start[scanned+TclParseAllWhiteSpace(
+ start+scanned, numBytes-scanned)] == '(') {
+ nodePtr->lexeme = FUNCTION;
+ } else {
+ Tcl_Obj *objPtr = Tcl_NewStringObj(start, scanned);
+ Tcl_IncrRefCount(objPtr);
+ code = Tcl_ConvertToType(NULL, objPtr, &tclBooleanType);
+ Tcl_DecrRefCount(objPtr);
+ if (code == TCL_OK) {
+ nodePtr->lexeme = BOOLEAN;
+ } else {
+ msg = TclObjPrintf(
+ "invalid bareword \"%.*s%s\"",
+ (scanned < limit) ? scanned : limit - 3, start,
+ (scanned < limit) ? "" : "...");
+ post = TclObjPrintf(
+ "should be \"$%.*s%s\" or \"{%.*s%s}\"",
+ (scanned < limit) ? scanned : limit - 3,
+ start, (scanned < limit) ? "" : "...",
+ (scanned < limit) ? scanned : limit - 3,
+ start, (scanned < limit) ? "" : "...");
+ TclAppendPrintfToObj(post,
+ " or \"%.*s%s(...)\" or ...",
+ (scanned < limit) ? scanned : limit - 3,
+ start, (scanned < limit) ? "" : "...");
+ continue;
+ }
+ }
+ break;
+ case PLUS:
+ case MINUS:
+ if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
+ nodePtr->lexeme |= BINARY;
+ } else {
+ nodePtr->lexeme |= UNARY;
+ }
+ }
+ }
+
+ /* Add node to parse tree based on category */
+
+ switch (NODE_TYPE & nodePtr->lexeme) {
+ case LEAF: {
+ CONST char *end;
+
+ if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
+ CONST char *operand =
+ scratch.tokenPtr[lastNodePtr->token].start;
+
+ msg = TclObjPrintf("missing operator at %s", mark);
+ if (operand[0] == '0') {
+ Tcl_Obj *copy = Tcl_NewStringObj(operand,
+ start + scanned - operand);
+ if (TclCheckBadOctal(NULL, Tcl_GetString(copy))) {
+ post = Tcl_NewStringObj(
+ "looks like invalid octal number", -1);
+ }
+ Tcl_DecrRefCount(copy);
+ }
+ scanned = 0;
+ insertMark = 1;
+ code = TCL_ERROR;
+ continue;
+ }
+
+ if (scratch.numTokens+1 >= scratch.tokensAvailable) {
+ TclExpandTokenArray(&scratch);
+ }
+ nodePtr->token = scratch.numTokens;
+ tokenPtr = scratch.tokenPtr + nodePtr->token;
+ tokenPtr->type = TCL_TOKEN_SUB_EXPR;
+ tokenPtr->start = start;
+ scratch.numTokens++;
+
+ switch (nodePtr->lexeme) {
+ case NUMBER:
+ case BOOLEAN:
+ tokenPtr = scratch.tokenPtr + scratch.numTokens;
+ tokenPtr->type = TCL_TOKEN_TEXT;
+ tokenPtr->start = start;
+ tokenPtr->size = scanned;
+ tokenPtr->numComponents = 0;
+ scratch.numTokens++;
+
+ break;
+
+ case QUOTED:
+ code = Tcl_ParseQuotedString(interp, start, numBytes,
+ &scratch, 1, &end);
+ if (code != TCL_OK) {
+ scanned = scratch.term - start;
+ scanned += (scanned < numBytes);
+ continue;
+ }
+ scanned = end - start;
+ break;
+
+ case BRACED:
+ code = Tcl_ParseBraces(interp, start, numBytes,
+ &scratch, 1, &end);
+ if (code != TCL_OK) {
+ continue;
+ }
+ scanned = end - start;
+ break;
+
+ case VARIABLE:
+ code = Tcl_ParseVarName(interp, start, numBytes, &scratch, 1);
+ if (code != TCL_OK) {
+ scanned = scratch.term - start;
+ scanned += (scanned < numBytes);
+ continue;
+ }
+ tokenPtr = scratch.tokenPtr + nodePtr->token + 1;
+ if (tokenPtr->type != TCL_TOKEN_VARIABLE) {
+ msg = Tcl_NewStringObj("invalid character \"$\"", -1);
+ code = TCL_ERROR;
+ continue;
+ }
+ scanned = tokenPtr->size;
+ break;
+
+ case SCRIPT:
+ tokenPtr = scratch.tokenPtr + scratch.numTokens;
+ tokenPtr->type = TCL_TOKEN_COMMAND;
+ tokenPtr->start = start;
+ tokenPtr->numComponents = 0;
+
+ end = start + numBytes;
+ start++;
+ while (1) {
+ Tcl_Parse nested;
+ code = Tcl_ParseCommand(interp,
+ start, (end - start), 1, &nested);
+ if (code != TCL_OK) {
+ parsePtr->term = nested.term;
+ parsePtr->errorType = nested.errorType;
+ parsePtr->incomplete = nested.incomplete;
+ break;
+ }
+ start = (nested.commandStart + nested.commandSize);
+ Tcl_FreeParse(&nested);
+ if ((nested.term < end) && (*nested.term == ']')
+ && !nested.incomplete) {
+ break;
+ }
+
+ if (start == end) {
+ msg = Tcl_NewStringObj("missing close-bracket", -1);
+ parsePtr->term = tokenPtr->start;
+ parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
+ parsePtr->incomplete = 1;
+ code = TCL_ERROR;
+ break;
+ }
+ }
+ end = start;
+ start = tokenPtr->start;
+ if (code != TCL_OK) {
+ scanned = parsePtr->term - start;
+ scanned += (scanned < numBytes);
+ continue;
+ }
+ scanned = end - start;
+ tokenPtr->size = scanned;
+ scratch.numTokens++;
+ break;
+ }
+
+ tokenPtr = scratch.tokenPtr + nodePtr->token;
+ tokenPtr->size = scanned;
+ tokenPtr->numComponents = scratch.numTokens - nodePtr->token - 1;
+
+ nodePtr->left = -1;
+ nodePtr->right = -1;
+ nodePtr->parent = -1;
+ lastOrphanPtr = nodePtr;
+ nodesUsed++;
+ break;
+ }
+
+ case UNARY:
+ if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
+ msg = TclObjPrintf("missing operator at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ code = TCL_ERROR;
+ continue;
+ }
+ nodePtr->left = -1;
+ nodePtr->right = -1;
+ nodePtr->parent = -1;
+
+ if (scratch.numTokens >= scratch.tokensAvailable) {
+ TclExpandTokenArray(&scratch);
+ }
+ nodePtr->token = scratch.numTokens;
+ tokenPtr = scratch.tokenPtr + nodePtr->token;
+ tokenPtr->type = TCL_TOKEN_OPERATOR;
+ tokenPtr->start = start;
+ tokenPtr->size = scanned;
+ tokenPtr->numComponents = 0;
+ scratch.numTokens++;
+
+ lastOrphanPtr = nodePtr;
+ nodesUsed++;
+ break;
+
+ case BINARY: {
+ ExprNode *otherPtr = NULL;
+ unsigned char precedence = prec[nodePtr->lexeme];
+
+ if ((nodePtr->lexeme == CLOSE_PAREN)
+ && (lastNodePtr->lexeme == OPEN_PAREN)) {
+ if (lastNodePtr[-1].lexeme == FUNCTION) {
+ /* Normally, "()" is a syntax error, but as a special
+ * case accept it as an argument list for a function */
+ scanned = 0;
+ nodePtr->lexeme = EMPTY;
+ nodePtr->left = -1;
+ nodePtr->right = -1;
+ nodePtr->parent = -1;
+ nodePtr->token = -1;
+
+ lastOrphanPtr = nodePtr;
+ nodesUsed++;
+ break;
+
+ }
+ msg = TclObjPrintf("empty subexpression at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ code = TCL_ERROR;
+ continue;
+ }
+
+
+ if ((NODE_TYPE & lastNodePtr->lexeme) != LEAF) {
+ if (prec[lastNodePtr->lexeme] > precedence) {
+ if (lastNodePtr->lexeme == OPEN_PAREN) {
+ msg = Tcl_NewStringObj("unbalanced open paren", -1);
+ } else if (lastNodePtr->lexeme == COMMA) {
+ msg = TclObjPrintf(
+ "missing function argument at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ } else if (lastNodePtr->lexeme == START) {
+ msg = Tcl_NewStringObj("empty expression", -1);
+ }
+ } else {
+ if (nodePtr->lexeme == CLOSE_PAREN) {
+ msg = Tcl_NewStringObj("unbalanced close paren", -1);
+ } else if ((nodePtr->lexeme == COMMA)
+ && (lastNodePtr->lexeme == OPEN_PAREN)
+ && (lastNodePtr[-1].lexeme == FUNCTION)) {
+ msg = TclObjPrintf(
+ "missing function argument at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ }
+ }
+ if (msg == NULL) {
+ msg = TclObjPrintf("missing operand at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ }
+ code = TCL_ERROR;
+ continue;
+ }
+
+ while (1) {
+
+ if (lastOrphanPtr->parent >= 0) {
+ otherPtr = nodes + lastOrphanPtr->parent;
+ } else if (lastOrphanPtr->left >= 0) {
+ Tcl_Panic("Tcl_ParseExpr: left closure programming error");
+ } else {
+ lastOrphanPtr->parent = lastOrphanPtr - nodes;
+ otherPtr = lastOrphanPtr;
+ }
+ otherPtr--;
+
+ if (prec[otherPtr->lexeme] < precedence) {
+ break;
+ }
+
+ if (prec[otherPtr->lexeme] == precedence) {
+ /* Special association rules for the ternary operators. */
+ if ((otherPtr->lexeme == QUESTION)
+ && (lastOrphanPtr->lexeme != COLON)) {
+ break;
+ }
+ if ((otherPtr->lexeme == COLON)
+ && (nodePtr->lexeme == QUESTION)) {
+ break;
+ }
+ /* Right association rules for exponentiation. */
+ if (nodePtr->lexeme == EXPON) {
+ break;
+ }
+ }
+
+ /* Some checks before linking */
+ if ((otherPtr->lexeme == OPEN_PAREN)
+ && (nodePtr->lexeme != CLOSE_PAREN)) {
+ lastOrphanPtr = otherPtr;
+ msg = Tcl_NewStringObj("unbalanced open paren", -1);
+ code = TCL_ERROR;
+ break;
+ }
+ if ((otherPtr->lexeme == QUESTION)
+ && (lastOrphanPtr->lexeme != COLON)) {
+ msg = TclObjPrintf(
+ "missing operator \":\" at %s", mark);
+ scanned = 0;
+ insertMark = 1;
+ code = TCL_ERROR;
+ break;
+ }
+ if ((lastOrphanPtr->lexeme == COLON)
+ && (otherPtr->lexeme != QUESTION)) {
+ msg = Tcl_NewStringObj(
+ "unexpected operator \":\" without preceding \"?\"",
+ -1);
+ code = TCL_ERROR;
+ break;
+ }
+
+ /* Link orphan as right operand of otherPtr */
+ otherPtr->right = lastOrphanPtr - nodes;
+ lastOrphanPtr->parent = otherPtr - nodes;
+ lastOrphanPtr = otherPtr;
+
+ if (otherPtr->lexeme == OPEN_PAREN) {
+ /* CLOSE_PAREN can only close one OPEN_PAREN */
+ tokenPtr = scratch.tokenPtr + otherPtr->token;
+ tokenPtr->size = start + scanned - tokenPtr->start;
+ break;
+ }
+ if (otherPtr->lexeme == START) {
+ /* Don't backtrack beyond the start */
+ break;
+ }
+ }
+ if (code != TCL_OK) {
+ continue;
+ }
+
+ if (nodePtr->lexeme == CLOSE_PAREN) {
+ if (otherPtr->lexeme == START) {
+ msg = Tcl_NewStringObj("unbalanced close paren", -1);
+ code = TCL_ERROR;
+ continue;
+ }
+ /* Create no node for a CLOSE_PAREN lexeme */
+ break;
+ }
+
+ if ((nodePtr->lexeme == COMMA) && ((otherPtr->lexeme != OPEN_PAREN)
+ || (otherPtr[-1].lexeme != FUNCTION))) {
+ msg = Tcl_NewStringObj(
+ "unexpected \",\" outside function argument list", -1);
+ code = TCL_ERROR;
+ continue;
+ }
+
+ if (lastOrphanPtr->lexeme == COLON) {
+ msg = Tcl_NewStringObj(
+ "unexpected operator \":\" without preceding \"?\"",
+ -1);
+ code = TCL_ERROR;
+ continue;
+ }
+
+ /* Link orphan as left operand of new node */
+ nodePtr->right = -1;
+
+ if (scratch.numTokens >= scratch.tokensAvailable) {
+ TclExpandTokenArray(&scratch);
+ }
+ nodePtr->token = scratch.numTokens;
+ tokenPtr = scratch.tokenPtr + nodePtr->token;
+ tokenPtr->type = TCL_TOKEN_OPERATOR;
+ tokenPtr->start = start;
+ tokenPtr->size = scanned;
+ tokenPtr->numComponents = 0;
+ scratch.numTokens++;
+
+ nodePtr->left = lastOrphanPtr - nodes;
+ nodePtr->parent = lastOrphanPtr->parent;
+ lastOrphanPtr->parent = nodePtr - nodes;
+ lastOrphanPtr = nodePtr;
+ nodesUsed++;
+ break;
+ }
+ }
+
+ start += scanned;
+ numBytes -= scanned;
+ }
+
+ if (code == TCL_OK) {
+ /* Shift tokens from scratch space to caller space */
+ GenerateTokens(nodes, &scratch, parsePtr);
+ } else {
+ if (parsePtr->errorType == TCL_PARSE_SUCCESS) {
+ parsePtr->errorType = TCL_PARSE_SYNTAX;
+ parsePtr->term = start;
+ }
+ if (interp == NULL) {
+ if (msg) {
+ Tcl_DecrRefCount(msg);
+ }
+ } else {
+ if (msg == NULL) {
+ msg = Tcl_GetObjResult(interp);
+ }
+ TclAppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"",
+ ((start - limit) < scratch.string) ? "" : "...",
+ ((start - limit) < scratch.string)
+ ? (start - scratch.string) : limit - 3,
+ ((start - limit) < scratch.string)
+ ? scratch.string : start - limit + 3,
+ (scanned < limit) ? scanned : limit - 3, start,
+ (scanned < limit) ? "" : "...",
+ insertMark ? mark : "",
+ (start + scanned + limit > scratch.end)
+ ? scratch.end - (start + scanned) : limit-3,
+ start + scanned,
+ (start + scanned + limit > scratch.end) ? "" : "..."
+ );
+ if (post != NULL) {
+ Tcl_AppendToObj(msg, ";\n", -1);
+ Tcl_AppendObjToObj(msg, post);
+ Tcl_DecrRefCount(post);
+ }
+ Tcl_SetObjResult(interp, msg);
+ numBytes = scratch.end - scratch.string;
+ TclAppendObjToErrorInfo(interp, TclObjPrintf(
+ "\n (parsing expression \"%.*s%s\")",
+ (numBytes < limit) ? numBytes : limit - 3,
+ scratch.string, (numBytes < limit) ? "" : "..."));
+ }
+ }
+
+ if (nodes != staticNodes) {
+ ckfree((char *)nodes);
+ }
+ Tcl_FreeParse(&scratch);
+ return code;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * GenerateTokens --
+ *
+ * Routine that generates Tcl_Tokens that represent a Tcl expression
+ * and writes them to *parsePtr. The parse tree of the expression
+ * is in the array of ExprNodes, nodes. Some of the Tcl_Tokens are
+ * copied from scratch space at *scratchPtr, where the parsing pass
+ * that constructed the parse tree left them.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+GenerateTokens(
+ ExprNode *nodes,
+ Tcl_Parse *scratchPtr,
+ Tcl_Parse *parsePtr)
+{
+ ExprNode *nodePtr = nodes + nodes->right;
+ Tcl_Token *sourcePtr, *destPtr, *tokenPtr = scratchPtr->tokenPtr;
+ int toCopy;
+ CONST char *end = tokenPtr->start + tokenPtr->size;
+
+ while (nodePtr->lexeme != START) {
+ switch (NODE_TYPE & nodePtr->lexeme) {
+ case BINARY:
+ if (nodePtr->left >= 0) {
+ if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) {
+ sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
+ if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
+ TclExpandTokenArray(parsePtr);
+ }
+ destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
+ nodePtr->token = parsePtr->numTokens;
+ destPtr->type = TCL_TOKEN_SUB_EXPR;
+ destPtr->start = tokenPtr->start;
+ destPtr++;
+ *destPtr = *sourcePtr;
+ parsePtr->numTokens += 2;
+ }
+ nodePtr = nodes + nodePtr->left;
+ nodes[nodePtr->parent].left = -1;
+ } else if (nodePtr->right >= 0) {
+ tokenPtr += tokenPtr->numComponents + 1;
+ nodePtr = nodes + nodePtr->right;
+ nodes[nodePtr->parent].right = -1;
+ } else {
+ if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) {
+ destPtr = parsePtr->tokenPtr + nodePtr->token;
+ destPtr->size = end - destPtr->start;
+ destPtr->numComponents =
+ parsePtr->numTokens - nodePtr->token - 1;
+ }
+ nodePtr = nodes + nodePtr->parent;
+ }
+ break;
+
+ case UNARY:
+ if (nodePtr->right >= 0) {
+ sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
+ if (nodePtr->lexeme != OPEN_PAREN) {
+ if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
+ TclExpandTokenArray(parsePtr);
+ }
+ destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
+ nodePtr->token = parsePtr->numTokens;
+ destPtr->type = TCL_TOKEN_SUB_EXPR;
+ destPtr->start = tokenPtr->start;
+ destPtr++;
+ *destPtr = *sourcePtr;
+ parsePtr->numTokens += 2;
+ }
+ if (tokenPtr == sourcePtr) {
+ tokenPtr += tokenPtr->numComponents + 1;
+ }
+ nodePtr = nodes + nodePtr->right;
+ nodes[nodePtr->parent].right = -1;
+ } else {
+ if (nodePtr->lexeme != OPEN_PAREN) {
+ destPtr = parsePtr->tokenPtr + nodePtr->token;
+ destPtr->size = end - destPtr->start;
+ destPtr->numComponents =
+ parsePtr->numTokens - nodePtr->token - 1;
+ } else {
+ sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
+ end = sourcePtr->start + sourcePtr->size;
+ }
+ nodePtr = nodes + nodePtr->parent;
+ }
+ break;
+
+ case LEAF:
+ switch (nodePtr->lexeme) {
+ case EMPTY:
+ break;
+
+ case BRACED:
+ case QUOTED:
+ sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
+ end = sourcePtr->start + sourcePtr->size;
+ if (sourcePtr->numComponents > 1) {
+ toCopy = sourcePtr->numComponents;
+ if (tokenPtr == sourcePtr) {
+ tokenPtr += toCopy + 1;
+ }
+ sourcePtr->numComponents++;
+ while (parsePtr->numTokens + toCopy + 1
+ >= parsePtr->tokensAvailable) {
+ TclExpandTokenArray(parsePtr);
+ }
+ destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
+ *destPtr++ = *sourcePtr;
+ *destPtr = *sourcePtr++;
+ destPtr->type = TCL_TOKEN_WORD;
+ destPtr->numComponents = toCopy;
+ destPtr++;
+ memcpy((VOID *) destPtr, (VOID *) sourcePtr,
+ (size_t) (toCopy * sizeof(Tcl_Token)));
+ parsePtr->numTokens += toCopy + 2;
+ break;
+ }
+
+ default:
+ sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
+ end = sourcePtr->start + sourcePtr->size;
+ toCopy = sourcePtr->numComponents + 1;
+ if (tokenPtr == sourcePtr) {
+ tokenPtr += toCopy;
+ }
+ while (parsePtr->numTokens + toCopy - 1
+ >= parsePtr->tokensAvailable) {
+ TclExpandTokenArray(parsePtr);
+ }
+ destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
+ memcpy((VOID *) destPtr, (VOID *) sourcePtr,
+ (size_t) (toCopy * sizeof(Tcl_Token)));
+ parsePtr->numTokens += toCopy;
+ break;
+
+ }
+ nodePtr = nodes + nodePtr->parent;
+ break;
+
+ }
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ParseLexeme --
+ *
+ * Parse a single lexeme from the start of a string, scanning no
+ * more than numBytes bytes.
+ *
+ * Results:
+ * Returns the number of bytes scanned to produce the lexeme.
+ *
+ * Side effects:
+ * Code identifying lexeme parsed is writen to *lexemePtr.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+ParseLexeme(
+ CONST char *start, /* Start of lexeme to parse. */
+ int numBytes, /* Number of bytes in string. */
+ unsigned char *lexemePtr) /* Write code of parsed lexeme to this
+ * storage. */
+{
+ CONST char *end;
+ int scanned;
+ Tcl_UniChar ch;
+
+ if (numBytes == 0) {
+ *lexemePtr = END;
+ return 0;
+ }
+ switch (*start) {
+ case '[':
+ *lexemePtr = SCRIPT;
+ return 1;
+
+ case '{':
+ *lexemePtr = BRACED;
+ return 1;
+
+ case '(':
+ *lexemePtr = OPEN_PAREN;
+ return 1;
+
+ case ')':
+ *lexemePtr = CLOSE_PAREN;
+ return 1;
+
+ case '$':
+ *lexemePtr = VARIABLE;
+ return 1;
+
+ case '\"':
+ *lexemePtr = QUOTED;
+ return 1;
+
+ case ',':
+ *lexemePtr = COMMA;
+ return 1;
+
+ case '/':
+ *lexemePtr = DIVIDE;
+ return 1;
+
+ case '%':
+ *lexemePtr = MOD;
+ return 1;
+
+ case '+':
+ *lexemePtr = PLUS;
+ return 1;
+
+ case '-':
+ *lexemePtr = MINUS;
+ return 1;
+
+ case '?':
+ *lexemePtr = QUESTION;
+ return 1;
+
+ case ':':
+ *lexemePtr = COLON;
+ return 1;
+
+ case '^':
+ *lexemePtr = BIT_XOR;
+ return 1;
+
+ case '~':
+ *lexemePtr = BIT_NOT;
+ return 1;
+
+ case '*':
+ if ((numBytes > 1) && (start[1] == '*')) {
+ *lexemePtr = EXPON;
+ return 2;
+ }
+ *lexemePtr = MULT;
+ return 1;
+
+ case '=':
+ if ((numBytes > 1) && (start[1] == '=')) {
+ *lexemePtr = EQUAL;
+ return 2;
+ }
+ *lexemePtr = INCOMPLETE;
+ return 1;
+
+ case '!':
+ if ((numBytes > 1) && (start[1] == '=')) {
+ *lexemePtr = NEQ;
+ return 2;
+ }
+ *lexemePtr = NOT;
+ return 1;
+
+ case '&':
+ if ((numBytes > 1) && (start[1] == '&')) {
+ *lexemePtr = AND;
+ return 2;
+ }
+ *lexemePtr = BIT_AND;
+ return 1;
+
+ case '|':
+ if ((numBytes > 1) && (start[1] == '|')) {
+ *lexemePtr = OR;
+ return 2;
+ }
+ *lexemePtr = BIT_OR;
+ return 1;
+
+ case '<':
+ if (numBytes > 1) {
+ switch (start[1]) {
+ case '<':
+ *lexemePtr = LEFT_SHIFT;
+ return 2;
+ case '=':
+ *lexemePtr = LEQ;
+ return 2;
+ }
+ }
+ *lexemePtr = LESS;
+ return 1;
+
+ case '>':
+ if (numBytes > 1) {
+ switch (start[1]) {
+ case '>':
+ *lexemePtr = RIGHT_SHIFT;
+ return 2;
+ case '=':
+ *lexemePtr = GEQ;
+ return 2;
+ }
+ }
+ *lexemePtr = GREATER;
+ return 1;
+
+ case 'i':
+ if ((numBytes > 1) && (start[1] == 'n')
+ && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ /*
+ * Must make this check so we can tell the difference between
+ * the "in" operator and the "int" function name and the
+ * "infinity" numeric value.
+ */
+ *lexemePtr = IN_LIST;
+ return 2;
+ }
+ break;
+
+ case 'e':
+ if ((numBytes > 1) && (start[1] == 'q')
+ && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ *lexemePtr = STREQ;
+ return 2;
+ }
+ break;
+
+ case 'n':
+ if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ switch (start[1]) {
+ case 'e':
+ *lexemePtr = STRNEQ;
+ return 2;
+ case 'i':
+ *lexemePtr = NOT_IN_LIST;
+ return 2;
+ }
+ }
+ }
+
+ if (TclParseNumber(NULL, NULL, NULL, start, numBytes, &end,
+ TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
+ *lexemePtr = NUMBER;
+ return (end-start);
+ }
+
+ if (Tcl_UtfCharComplete(start, numBytes)) {
+ scanned = Tcl_UtfToUniChar(start, &ch);
+ } else {
+ char utfBytes[TCL_UTF_MAX];
+ memcpy(utfBytes, start, (size_t) numBytes);
+ utfBytes[numBytes] = '\0';
+ scanned = Tcl_UtfToUniChar(utfBytes, &ch);
+ }
+ if (!isalpha(UCHAR(ch))) {
+ *lexemePtr = INVALID;
+ return scanned;
+ }
+ end = start;
+ while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) {
+ end += scanned;
+ numBytes -= scanned;
+ if (Tcl_UtfCharComplete(end, numBytes)) {
+ scanned = Tcl_UtfToUniChar(end, &ch);
+ } else {
+ char utfBytes[TCL_UTF_MAX];
+ memcpy(utfBytes, end, (size_t) numBytes);
+ utfBytes[numBytes] = '\0';
+ scanned = Tcl_UtfToUniChar(utfBytes, &ch);
+ }
+ }
+ *lexemePtr = BAREWORD;
+ return (end-start);
+}
+
+/*
* Boolean variable that controls whether expression compilation tracing is
* enabled.
*/
diff --git a/generic/tclParseExpr.c b/generic/tclParseExpr.c
deleted file mode 100644
index 6f9103b..0000000
--- a/generic/tclParseExpr.c
+++ /dev/null
@@ -1,1075 +0,0 @@
-/*
- * tclParseExpr.c --
- *
- * This file contains functions that parse Tcl expressions. They do so in
- * a general-purpose fashion that can be used for many different
- * purposes, including compilation, direct execution, code analysis, etc.
- *
- * Copyright (c) 1997 Sun Microsystems, Inc.
- * Copyright (c) 1998-2000 by Scriptics Corporation.
- * Contributions from Don Porter, NIST, 2006. (not subject to US copyright)
- *
- * See the file "license.terms" for information on usage and redistribution of
- * this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclParseExpr.c,v 1.47 2006/11/05 03:33:56 dgp Exp $
- */
-
-#include "tclInt.h"
-
-/*
- * The ExprNode structure represents one node of the parse tree produced
- * as an interim structure by the expression parser.
- */
-
-typedef struct ExprNode {
- unsigned char lexeme; /* Code that identifies the type of this node */
- int left; /* Index of the left operand of this operator node */
- int right; /* Index of the right operand of this operator node */
- int parent; /* Index of the operator of this operand node */
- int token; /* Index of the Tcl_Tokens of this leaf node */
-} ExprNode;
-
-/*
- * Set of lexeme codes stored in ExprNode structs to label and categorize
- * the lexemes found.
- */
-
-#define LEAF (1<<7)
-#define UNARY (1<<6)
-#define BINARY (1<<5)
-
-#define NODE_TYPE ( LEAF | UNARY | BINARY)
-
-#define PLUS 1
-#define MINUS 2
-#define BAREWORD 3
-#define INCOMPLETE 4
-#define INVALID 5
-
-#define NUMBER ( LEAF | 1)
-#define SCRIPT ( LEAF | 2)
-#define BOOLEAN ( LEAF | BAREWORD)
-#define BRACED ( LEAF | 4)
-#define VARIABLE ( LEAF | 5)
-#define QUOTED ( LEAF | 6)
-#define EMPTY ( LEAF | 7)
-
-#define UNARY_PLUS ( UNARY | PLUS)
-#define UNARY_MINUS ( UNARY | MINUS)
-#define FUNCTION ( UNARY | BAREWORD)
-#define START ( UNARY | 4)
-#define OPEN_PAREN ( UNARY | 5)
-#define NOT ( UNARY | 6)
-#define BIT_NOT ( UNARY | 7)
-
-#define BINARY_PLUS ( BINARY | PLUS)
-#define BINARY_MINUS ( BINARY | MINUS)
-#define COMMA ( BINARY | 3)
-#define MULT ( BINARY | 4)
-#define DIVIDE ( BINARY | 5)
-#define MOD ( BINARY | 6)
-#define LESS ( BINARY | 7)
-#define GREATER ( BINARY | 8)
-#define BIT_AND ( BINARY | 9)
-#define BIT_XOR ( BINARY | 10)
-#define BIT_OR ( BINARY | 11)
-#define QUESTION ( BINARY | 12)
-#define COLON ( BINARY | 13)
-#define LEFT_SHIFT ( BINARY | 14)
-#define RIGHT_SHIFT ( BINARY | 15)
-#define LEQ ( BINARY | 16)
-#define GEQ ( BINARY | 17)
-#define EQUAL ( BINARY | 18)
-#define NEQ ( BINARY | 19)
-#define AND ( BINARY | 20)
-#define OR ( BINARY | 21)
-#define STREQ ( BINARY | 22)
-#define STRNEQ ( BINARY | 23)
-#define EXPON ( BINARY | 24)
-#define IN_LIST ( BINARY | 25)
-#define NOT_IN_LIST ( BINARY | 26)
-#define CLOSE_PAREN ( BINARY | 27)
-#define END ( BINARY | 28)
-
-/*
- * Declarations for local functions to this file:
- */
-
-static void GenerateTokens(ExprNode *nodes, Tcl_Parse *scratchPtr,
- Tcl_Parse *parsePtr);
-static int ParseLexeme(CONST char *start, int numBytes,
- unsigned char *lexemePtr);
-
-
-/*
- *----------------------------------------------------------------------
- *
- * Tcl_ParseExpr --
- *
- * Given a string, the numBytes bytes starting at start, this function
- * parses it as a Tcl expression and stores information about the
- * structure of the expression in the Tcl_Parse struct indicated by the
- * caller.
- *
- * Results:
- * If the string is successfully parsed as a valid Tcl expression,
- * TCL_OK is returned, and data about the expression structure is
- * written to *parsePtr. If the string cannot be parsed as a valid
- * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL,
- * an error message is written to interp.
- *
- * Side effects:
- * If there is insufficient space in parsePtr to hold all the information
- * about the expression, then additional space is malloc-ed. If the
- * function returns TCL_OK then the caller must eventually invoke
- * Tcl_FreeParse to release any additional space that was allocated.
- *
- *----------------------------------------------------------------------
- */
-
-int
-Tcl_ParseExpr(
- Tcl_Interp *interp, /* Used for error reporting. */
- CONST char *start, /* Start of source string to parse. */
- int numBytes, /* Number of bytes in string. If < 0, the
- * string consists of all bytes up to the
- * first null character. */
- Tcl_Parse *parsePtr) /* Structure to fill with information about
- * the parsed expression; any previous
- * information in the structure is ignored. */
-{
-#define NUM_STATIC_NODES 64
- ExprNode staticNodes[NUM_STATIC_NODES];
- ExprNode *lastOrphanPtr, *nodes = staticNodes;
- int nodesAvailable = NUM_STATIC_NODES;
- int nodesUsed = 0;
- Tcl_Parse scratch; /* Parsing scratch space */
- Tcl_Obj *msg = NULL, *post = NULL;
- int scanned = 0, code = TCL_OK, insertMark = 0;
- CONST char *mark = "_@_";
- CONST int limit = 25;
- static CONST unsigned char prec[80] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 15, 15, 5, 16, 16, 16, 13, 13, 11, 10, 9, 6, 6, 14, 14,
- 13, 13, 12, 12, 8, 7, 12, 12, 17, 12, 12, 3, 1, 0, 0, 0,
- 0, 18, 18, 18, 2, 4, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0,
- };
-
- if (numBytes < 0) {
- numBytes = (start ? strlen(start) : 0);
- }
-
- TclParseInit(interp, start, numBytes, &scratch);
- TclParseInit(interp, start, numBytes, parsePtr);
-
- /* Initialize the parse tree with the special "START" node */
-
- nodes->lexeme = START;
- nodes->left = -1;
- nodes->right = -1;
- nodes->parent = -1;
- nodes->token = -1;
- lastOrphanPtr = nodes;
- nodesUsed++;
-
- while ((code == TCL_OK) && (lastOrphanPtr->lexeme != END)) {
- ExprNode *nodePtr, *lastNodePtr;
- Tcl_Token *tokenPtr;
-
- /*
- * Each pass through this loop adds one more ExprNode.
- * Allocate space for one if required.
- */
- if (nodesUsed >= nodesAvailable) {
- int lastOrphanIdx = lastOrphanPtr - nodes;
- int size = nodesUsed * 2;
- ExprNode *newPtr;
-
- if (nodes == staticNodes) {
- nodes = NULL;
- }
- do {
- newPtr = (ExprNode *) attemptckrealloc( (char *) nodes,
- (unsigned int) (size * sizeof(ExprNode)) );
- } while ((newPtr == NULL)
- && ((size -= (size - nodesUsed) / 2) > nodesUsed));
- if (newPtr == NULL) {
- msg = Tcl_NewStringObj(
- "not enough memory to parse expression", -1);
- code = TCL_ERROR;
- continue;
- }
- nodesAvailable = size;
- if (nodes == NULL) {
- memcpy((VOID *) newPtr, (VOID *) staticNodes,
- (size_t) (nodesUsed * sizeof(ExprNode)));
- }
- nodes = newPtr;
- lastOrphanPtr = nodes + lastOrphanIdx;
- }
- nodePtr = nodes + nodesUsed;
- lastNodePtr = nodePtr - 1;
-
- /* Skip white space between lexemes */
-
- scanned = TclParseAllWhiteSpace(start, numBytes);
- start += scanned;
- numBytes -= scanned;
-
- scanned = ParseLexeme(start, numBytes, &(nodePtr->lexeme));
-
- /* Use context to categorize the lexemes that are ambiguous */
-
- if ((NODE_TYPE & nodePtr->lexeme) == 0) {
- switch (nodePtr->lexeme) {
- case INVALID:
- msg = TclObjPrintf(
- "invalid character \"%.*s\"", scanned, start);
- code = TCL_ERROR;
- continue;
- case INCOMPLETE:
- msg = TclObjPrintf(
- "incomplete operator \"%.*s\"", scanned, start);
- code = TCL_ERROR;
- continue;
- case BAREWORD:
- if (start[scanned+TclParseAllWhiteSpace(
- start+scanned, numBytes-scanned)] == '(') {
- nodePtr->lexeme = FUNCTION;
- } else {
- Tcl_Obj *objPtr = Tcl_NewStringObj(start, scanned);
- Tcl_IncrRefCount(objPtr);
- code = Tcl_ConvertToType(NULL, objPtr, &tclBooleanType);
- Tcl_DecrRefCount(objPtr);
- if (code == TCL_OK) {
- nodePtr->lexeme = BOOLEAN;
- } else {
- msg = TclObjPrintf(
- "invalid bareword \"%.*s%s\"",
- (scanned < limit) ? scanned : limit - 3, start,
- (scanned < limit) ? "" : "...");
- post = TclObjPrintf(
- "should be \"$%.*s%s\" or \"{%.*s%s}\"",
- (scanned < limit) ? scanned : limit - 3,
- start, (scanned < limit) ? "" : "...",
- (scanned < limit) ? scanned : limit - 3,
- start, (scanned < limit) ? "" : "...");
- TclAppendPrintfToObj(post,
- " or \"%.*s%s(...)\" or ...",
- (scanned < limit) ? scanned : limit - 3,
- start, (scanned < limit) ? "" : "...");
- continue;
- }
- }
- break;
- case PLUS:
- case MINUS:
- if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
- nodePtr->lexeme |= BINARY;
- } else {
- nodePtr->lexeme |= UNARY;
- }
- }
- }
-
- /* Add node to parse tree based on category */
-
- switch (NODE_TYPE & nodePtr->lexeme) {
- case LEAF: {
- CONST char *end;
-
- if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
- CONST char *operand =
- scratch.tokenPtr[lastNodePtr->token].start;
-
- msg = TclObjPrintf("missing operator at %s", mark);
- if (operand[0] == '0') {
- Tcl_Obj *copy = Tcl_NewStringObj(operand,
- start + scanned - operand);
- if (TclCheckBadOctal(NULL, Tcl_GetString(copy))) {
- post = Tcl_NewStringObj(
- "looks like invalid octal number", -1);
- }
- Tcl_DecrRefCount(copy);
- }
- scanned = 0;
- insertMark = 1;
- code = TCL_ERROR;
- continue;
- }
-
- if (scratch.numTokens+1 >= scratch.tokensAvailable) {
- TclExpandTokenArray(&scratch);
- }
- nodePtr->token = scratch.numTokens;
- tokenPtr = scratch.tokenPtr + nodePtr->token;
- tokenPtr->type = TCL_TOKEN_SUB_EXPR;
- tokenPtr->start = start;
- scratch.numTokens++;
-
- switch (nodePtr->lexeme) {
- case NUMBER:
- case BOOLEAN:
- tokenPtr = scratch.tokenPtr + scratch.numTokens;
- tokenPtr->type = TCL_TOKEN_TEXT;
- tokenPtr->start = start;
- tokenPtr->size = scanned;
- tokenPtr->numComponents = 0;
- scratch.numTokens++;
-
- break;
-
- case QUOTED:
- code = Tcl_ParseQuotedString(interp, start, numBytes,
- &scratch, 1, &end);
- if (code != TCL_OK) {
- scanned = scratch.term - start;
- scanned += (scanned < numBytes);
- continue;
- }
- scanned = end - start;
- break;
-
- case BRACED:
- code = Tcl_ParseBraces(interp, start, numBytes,
- &scratch, 1, &end);
- if (code != TCL_OK) {
- continue;
- }
- scanned = end - start;
- break;
-
- case VARIABLE:
- code = Tcl_ParseVarName(interp, start, numBytes, &scratch, 1);
- if (code != TCL_OK) {
- scanned = scratch.term - start;
- scanned += (scanned < numBytes);
- continue;
- }
- tokenPtr = scratch.tokenPtr + nodePtr->token + 1;
- if (tokenPtr->type != TCL_TOKEN_VARIABLE) {
- msg = Tcl_NewStringObj("invalid character \"$\"", -1);
- code = TCL_ERROR;
- continue;
- }
- scanned = tokenPtr->size;
- break;
-
- case SCRIPT:
- tokenPtr = scratch.tokenPtr + scratch.numTokens;
- tokenPtr->type = TCL_TOKEN_COMMAND;
- tokenPtr->start = start;
- tokenPtr->numComponents = 0;
-
- end = start + numBytes;
- start++;
- while (1) {
- Tcl_Parse nested;
- code = Tcl_ParseCommand(interp,
- start, (end - start), 1, &nested);
- if (code != TCL_OK) {
- parsePtr->term = nested.term;
- parsePtr->errorType = nested.errorType;
- parsePtr->incomplete = nested.incomplete;
- break;
- }
- start = (nested.commandStart + nested.commandSize);
- Tcl_FreeParse(&nested);
- if ((nested.term < end) && (*nested.term == ']')
- && !nested.incomplete) {
- break;
- }
-
- if (start == end) {
- msg = Tcl_NewStringObj("missing close-bracket", -1);
- parsePtr->term = tokenPtr->start;
- parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
- parsePtr->incomplete = 1;
- code = TCL_ERROR;
- break;
- }
- }
- end = start;
- start = tokenPtr->start;
- if (code != TCL_OK) {
- scanned = parsePtr->term - start;
- scanned += (scanned < numBytes);
- continue;
- }
- scanned = end - start;
- tokenPtr->size = scanned;
- scratch.numTokens++;
- break;
- }
-
- tokenPtr = scratch.tokenPtr + nodePtr->token;
- tokenPtr->size = scanned;
- tokenPtr->numComponents = scratch.numTokens - nodePtr->token - 1;
-
- nodePtr->left = -1;
- nodePtr->right = -1;
- nodePtr->parent = -1;
- lastOrphanPtr = nodePtr;
- nodesUsed++;
- break;
- }
-
- case UNARY:
- if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) {
- msg = TclObjPrintf("missing operator at %s", mark);
- scanned = 0;
- insertMark = 1;
- code = TCL_ERROR;
- continue;
- }
- nodePtr->left = -1;
- nodePtr->right = -1;
- nodePtr->parent = -1;
-
- if (scratch.numTokens >= scratch.tokensAvailable) {
- TclExpandTokenArray(&scratch);
- }
- nodePtr->token = scratch.numTokens;
- tokenPtr = scratch.tokenPtr + nodePtr->token;
- tokenPtr->type = TCL_TOKEN_OPERATOR;
- tokenPtr->start = start;
- tokenPtr->size = scanned;
- tokenPtr->numComponents = 0;
- scratch.numTokens++;
-
- lastOrphanPtr = nodePtr;
- nodesUsed++;
- break;
-
- case BINARY: {
- ExprNode *otherPtr = NULL;
- unsigned char precedence = prec[nodePtr->lexeme];
-
- if ((nodePtr->lexeme == CLOSE_PAREN)
- && (lastNodePtr->lexeme == OPEN_PAREN)) {
- if (lastNodePtr[-1].lexeme == FUNCTION) {
- /* Normally, "()" is a syntax error, but as a special
- * case accept it as an argument list for a function */
- scanned = 0;
- nodePtr->lexeme = EMPTY;
- nodePtr->left = -1;
- nodePtr->right = -1;
- nodePtr->parent = -1;
- nodePtr->token = -1;
-
- lastOrphanPtr = nodePtr;
- nodesUsed++;
- break;
-
- }
- msg = TclObjPrintf("empty subexpression at %s", mark);
- scanned = 0;
- insertMark = 1;
- code = TCL_ERROR;
- continue;
- }
-
-
- if ((NODE_TYPE & lastNodePtr->lexeme) != LEAF) {
- if (prec[lastNodePtr->lexeme] > precedence) {
- if (lastNodePtr->lexeme == OPEN_PAREN) {
- msg = Tcl_NewStringObj("unbalanced open paren", -1);
- } else if (lastNodePtr->lexeme == COMMA) {
- msg = TclObjPrintf(
- "missing function argument at %s", mark);
- scanned = 0;
- insertMark = 1;
- } else if (lastNodePtr->lexeme == START) {
- msg = Tcl_NewStringObj("empty expression", -1);
- }
- } else {
- if (nodePtr->lexeme == CLOSE_PAREN) {
- msg = Tcl_NewStringObj("unbalanced close paren", -1);
- } else if ((nodePtr->lexeme == COMMA)
- && (lastNodePtr->lexeme == OPEN_PAREN)
- && (lastNodePtr[-1].lexeme == FUNCTION)) {
- msg = TclObjPrintf(
- "missing function argument at %s", mark);
- scanned = 0;
- insertMark = 1;
- }
- }
- if (msg == NULL) {
- msg = TclObjPrintf("missing operand at %s", mark);
- scanned = 0;
- insertMark = 1;
- }
- code = TCL_ERROR;
- continue;
- }
-
- while (1) {
-
- if (lastOrphanPtr->parent >= 0) {
- otherPtr = nodes + lastOrphanPtr->parent;
- } else if (lastOrphanPtr->left >= 0) {
- Tcl_Panic("Tcl_ParseExpr: left closure programming error");
- } else {
- lastOrphanPtr->parent = lastOrphanPtr - nodes;
- otherPtr = lastOrphanPtr;
- }
- otherPtr--;
-
- if (prec[otherPtr->lexeme] < precedence) {
- break;
- }
-
- if (prec[otherPtr->lexeme] == precedence) {
- /* Special association rules for the ternary operators. */
- if ((otherPtr->lexeme == QUESTION)
- && (lastOrphanPtr->lexeme != COLON)) {
- break;
- }
- if ((otherPtr->lexeme == COLON)
- && (nodePtr->lexeme == QUESTION)) {
- break;
- }
- /* Right association rules for exponentiation. */
- if (nodePtr->lexeme == EXPON) {
- break;
- }
- }
-
- /* Some checks before linking */
- if ((otherPtr->lexeme == OPEN_PAREN)
- && (nodePtr->lexeme != CLOSE_PAREN)) {
- lastOrphanPtr = otherPtr;
- msg = Tcl_NewStringObj("unbalanced open paren", -1);
- code = TCL_ERROR;
- break;
- }
- if ((otherPtr->lexeme == QUESTION)
- && (lastOrphanPtr->lexeme != COLON)) {
- msg = TclObjPrintf(
- "missing operator \":\" at %s", mark);
- scanned = 0;
- insertMark = 1;
- code = TCL_ERROR;
- break;
- }
- if ((lastOrphanPtr->lexeme == COLON)
- && (otherPtr->lexeme != QUESTION)) {
- msg = Tcl_NewStringObj(
- "unexpected operator \":\" without preceding \"?\"",
- -1);
- code = TCL_ERROR;
- break;
- }
-
- /* Link orphan as right operand of otherPtr */
- otherPtr->right = lastOrphanPtr - nodes;
- lastOrphanPtr->parent = otherPtr - nodes;
- lastOrphanPtr = otherPtr;
-
- if (otherPtr->lexeme == OPEN_PAREN) {
- /* CLOSE_PAREN can only close one OPEN_PAREN */
- tokenPtr = scratch.tokenPtr + otherPtr->token;
- tokenPtr->size = start + scanned - tokenPtr->start;
- break;
- }
- if (otherPtr->lexeme == START) {
- /* Don't backtrack beyond the start */
- break;
- }
- }
- if (code != TCL_OK) {
- continue;
- }
-
- if (nodePtr->lexeme == CLOSE_PAREN) {
- if (otherPtr->lexeme == START) {
- msg = Tcl_NewStringObj("unbalanced close paren", -1);
- code = TCL_ERROR;
- continue;
- }
- /* Create no node for a CLOSE_PAREN lexeme */
- break;
- }
-
- if ((nodePtr->lexeme == COMMA) && ((otherPtr->lexeme != OPEN_PAREN)
- || (otherPtr[-1].lexeme != FUNCTION))) {
- msg = Tcl_NewStringObj(
- "unexpected \",\" outside function argument list", -1);
- code = TCL_ERROR;
- continue;
- }
-
- if (lastOrphanPtr->lexeme == COLON) {
- msg = Tcl_NewStringObj(
- "unexpected operator \":\" without preceding \"?\"",
- -1);
- code = TCL_ERROR;
- continue;
- }
-
- /* Link orphan as left operand of new node */
- nodePtr->right = -1;
-
- if (scratch.numTokens >= scratch.tokensAvailable) {
- TclExpandTokenArray(&scratch);
- }
- nodePtr->token = scratch.numTokens;
- tokenPtr = scratch.tokenPtr + nodePtr->token;
- tokenPtr->type = TCL_TOKEN_OPERATOR;
- tokenPtr->start = start;
- tokenPtr->size = scanned;
- tokenPtr->numComponents = 0;
- scratch.numTokens++;
-
- nodePtr->left = lastOrphanPtr - nodes;
- nodePtr->parent = lastOrphanPtr->parent;
- lastOrphanPtr->parent = nodePtr - nodes;
- lastOrphanPtr = nodePtr;
- nodesUsed++;
- break;
- }
- }
-
- start += scanned;
- numBytes -= scanned;
- }
-
- if (code == TCL_OK) {
- /* Shift tokens from scratch space to caller space */
- GenerateTokens(nodes, &scratch, parsePtr);
- } else {
- if (parsePtr->errorType == TCL_PARSE_SUCCESS) {
- parsePtr->errorType = TCL_PARSE_SYNTAX;
- parsePtr->term = start;
- }
- if (interp == NULL) {
- if (msg) {
- Tcl_DecrRefCount(msg);
- }
- } else {
- if (msg == NULL) {
- msg = Tcl_GetObjResult(interp);
- }
- TclAppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"",
- ((start - limit) < scratch.string) ? "" : "...",
- ((start - limit) < scratch.string)
- ? (start - scratch.string) : limit - 3,
- ((start - limit) < scratch.string)
- ? scratch.string : start - limit + 3,
- (scanned < limit) ? scanned : limit - 3, start,
- (scanned < limit) ? "" : "...",
- insertMark ? mark : "",
- (start + scanned + limit > scratch.end)
- ? scratch.end - (start + scanned) : limit-3,
- start + scanned,
- (start + scanned + limit > scratch.end) ? "" : "..."
- );
- if (post != NULL) {
- Tcl_AppendToObj(msg, ";\n", -1);
- Tcl_AppendObjToObj(msg, post);
- Tcl_DecrRefCount(post);
- }
- Tcl_SetObjResult(interp, msg);
- numBytes = scratch.end - scratch.string;
- TclAppendObjToErrorInfo(interp, TclObjPrintf(
- "\n (parsing expression \"%.*s%s\")",
- (numBytes < limit) ? numBytes : limit - 3,
- scratch.string, (numBytes < limit) ? "" : "..."));
- }
- }
-
- if (nodes != staticNodes) {
- ckfree((char *)nodes);
- }
- Tcl_FreeParse(&scratch);
- return code;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * GenerateTokens --
- *
- * Routine that generates Tcl_Tokens that represent a Tcl expression
- * and writes them to *parsePtr. The parse tree of the expression
- * is in the array of ExprNodes, nodes. Some of the Tcl_Tokens are
- * copied from scratch space at *scratchPtr, where the parsing pass
- * that constructed the parse tree left them.
- *
- *----------------------------------------------------------------------
- */
-
-static void
-GenerateTokens(
- ExprNode *nodes,
- Tcl_Parse *scratchPtr,
- Tcl_Parse *parsePtr)
-{
- ExprNode *nodePtr = nodes + nodes->right;
- Tcl_Token *sourcePtr, *destPtr, *tokenPtr = scratchPtr->tokenPtr;
- int toCopy;
- CONST char *end = tokenPtr->start + tokenPtr->size;
-
- while (nodePtr->lexeme != START) {
- switch (NODE_TYPE & nodePtr->lexeme) {
- case BINARY:
- if (nodePtr->left >= 0) {
- if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) {
- sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
- if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
- TclExpandTokenArray(parsePtr);
- }
- destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
- nodePtr->token = parsePtr->numTokens;
- destPtr->type = TCL_TOKEN_SUB_EXPR;
- destPtr->start = tokenPtr->start;
- destPtr++;
- *destPtr = *sourcePtr;
- parsePtr->numTokens += 2;
- }
- nodePtr = nodes + nodePtr->left;
- nodes[nodePtr->parent].left = -1;
- } else if (nodePtr->right >= 0) {
- tokenPtr += tokenPtr->numComponents + 1;
- nodePtr = nodes + nodePtr->right;
- nodes[nodePtr->parent].right = -1;
- } else {
- if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) {
- destPtr = parsePtr->tokenPtr + nodePtr->token;
- destPtr->size = end - destPtr->start;
- destPtr->numComponents =
- parsePtr->numTokens - nodePtr->token - 1;
- }
- nodePtr = nodes + nodePtr->parent;
- }
- break;
-
- case UNARY:
- if (nodePtr->right >= 0) {
- sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
- if (nodePtr->lexeme != OPEN_PAREN) {
- if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) {
- TclExpandTokenArray(parsePtr);
- }
- destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
- nodePtr->token = parsePtr->numTokens;
- destPtr->type = TCL_TOKEN_SUB_EXPR;
- destPtr->start = tokenPtr->start;
- destPtr++;
- *destPtr = *sourcePtr;
- parsePtr->numTokens += 2;
- }
- if (tokenPtr == sourcePtr) {
- tokenPtr += tokenPtr->numComponents + 1;
- }
- nodePtr = nodes + nodePtr->right;
- nodes[nodePtr->parent].right = -1;
- } else {
- if (nodePtr->lexeme != OPEN_PAREN) {
- destPtr = parsePtr->tokenPtr + nodePtr->token;
- destPtr->size = end - destPtr->start;
- destPtr->numComponents =
- parsePtr->numTokens - nodePtr->token - 1;
- } else {
- sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
- end = sourcePtr->start + sourcePtr->size;
- }
- nodePtr = nodes + nodePtr->parent;
- }
- break;
-
- case LEAF:
- switch (nodePtr->lexeme) {
- case EMPTY:
- break;
-
- case BRACED:
- case QUOTED:
- sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
- end = sourcePtr->start + sourcePtr->size;
- if (sourcePtr->numComponents > 1) {
- toCopy = sourcePtr->numComponents;
- if (tokenPtr == sourcePtr) {
- tokenPtr += toCopy + 1;
- }
- sourcePtr->numComponents++;
- while (parsePtr->numTokens + toCopy + 1
- >= parsePtr->tokensAvailable) {
- TclExpandTokenArray(parsePtr);
- }
- destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
- *destPtr++ = *sourcePtr;
- *destPtr = *sourcePtr++;
- destPtr->type = TCL_TOKEN_WORD;
- destPtr->numComponents = toCopy;
- destPtr++;
- memcpy((VOID *) destPtr, (VOID *) sourcePtr,
- (size_t) (toCopy * sizeof(Tcl_Token)));
- parsePtr->numTokens += toCopy + 2;
- break;
- }
-
- default:
- sourcePtr = scratchPtr->tokenPtr + nodePtr->token;
- end = sourcePtr->start + sourcePtr->size;
- toCopy = sourcePtr->numComponents + 1;
- if (tokenPtr == sourcePtr) {
- tokenPtr += toCopy;
- }
- while (parsePtr->numTokens + toCopy - 1
- >= parsePtr->tokensAvailable) {
- TclExpandTokenArray(parsePtr);
- }
- destPtr = parsePtr->tokenPtr + parsePtr->numTokens;
- memcpy((VOID *) destPtr, (VOID *) sourcePtr,
- (size_t) (toCopy * sizeof(Tcl_Token)));
- parsePtr->numTokens += toCopy;
- break;
-
- }
- nodePtr = nodes + nodePtr->parent;
- break;
-
- }
- }
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * ParseLexeme --
- *
- * Parse a single lexeme from the start of a string, scanning no
- * more than numBytes bytes.
- *
- * Results:
- * Returns the number of bytes scanned to produce the lexeme.
- *
- * Side effects:
- * Code identifying lexeme parsed is writen to *lexemePtr.
- *
- *----------------------------------------------------------------------
- */
-
-static int
-ParseLexeme(
- CONST char *start, /* Start of lexeme to parse. */
- int numBytes, /* Number of bytes in string. */
- unsigned char *lexemePtr) /* Write code of parsed lexeme to this
- * storage. */
-{
- CONST char *end;
- int scanned;
- Tcl_UniChar ch;
-
- if (numBytes == 0) {
- *lexemePtr = END;
- return 0;
- }
- switch (*start) {
- case '[':
- *lexemePtr = SCRIPT;
- return 1;
-
- case '{':
- *lexemePtr = BRACED;
- return 1;
-
- case '(':
- *lexemePtr = OPEN_PAREN;
- return 1;
-
- case ')':
- *lexemePtr = CLOSE_PAREN;
- return 1;
-
- case '$':
- *lexemePtr = VARIABLE;
- return 1;
-
- case '\"':
- *lexemePtr = QUOTED;
- return 1;
-
- case ',':
- *lexemePtr = COMMA;
- return 1;
-
- case '/':
- *lexemePtr = DIVIDE;
- return 1;
-
- case '%':
- *lexemePtr = MOD;
- return 1;
-
- case '+':
- *lexemePtr = PLUS;
- return 1;
-
- case '-':
- *lexemePtr = MINUS;
- return 1;
-
- case '?':
- *lexemePtr = QUESTION;
- return 1;
-
- case ':':
- *lexemePtr = COLON;
- return 1;
-
- case '^':
- *lexemePtr = BIT_XOR;
- return 1;
-
- case '~':
- *lexemePtr = BIT_NOT;
- return 1;
-
- case '*':
- if ((numBytes > 1) && (start[1] == '*')) {
- *lexemePtr = EXPON;
- return 2;
- }
- *lexemePtr = MULT;
- return 1;
-
- case '=':
- if ((numBytes > 1) && (start[1] == '=')) {
- *lexemePtr = EQUAL;
- return 2;
- }
- *lexemePtr = INCOMPLETE;
- return 1;
-
- case '!':
- if ((numBytes > 1) && (start[1] == '=')) {
- *lexemePtr = NEQ;
- return 2;
- }
- *lexemePtr = NOT;
- return 1;
-
- case '&':
- if ((numBytes > 1) && (start[1] == '&')) {
- *lexemePtr = AND;
- return 2;
- }
- *lexemePtr = BIT_AND;
- return 1;
-
- case '|':
- if ((numBytes > 1) && (start[1] == '|')) {
- *lexemePtr = OR;
- return 2;
- }
- *lexemePtr = BIT_OR;
- return 1;
-
- case '<':
- if (numBytes > 1) {
- switch (start[1]) {
- case '<':
- *lexemePtr = LEFT_SHIFT;
- return 2;
- case '=':
- *lexemePtr = LEQ;
- return 2;
- }
- }
- *lexemePtr = LESS;
- return 1;
-
- case '>':
- if (numBytes > 1) {
- switch (start[1]) {
- case '>':
- *lexemePtr = RIGHT_SHIFT;
- return 2;
- case '=':
- *lexemePtr = GEQ;
- return 2;
- }
- }
- *lexemePtr = GREATER;
- return 1;
-
- case 'i':
- if ((numBytes > 1) && (start[1] == 'n')
- && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
- /*
- * Must make this check so we can tell the difference between
- * the "in" operator and the "int" function name and the
- * "infinity" numeric value.
- */
- *lexemePtr = IN_LIST;
- return 2;
- }
- break;
-
- case 'e':
- if ((numBytes > 1) && (start[1] == 'q')
- && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
- *lexemePtr = STREQ;
- return 2;
- }
- break;
-
- case 'n':
- if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
- switch (start[1]) {
- case 'e':
- *lexemePtr = STRNEQ;
- return 2;
- case 'i':
- *lexemePtr = NOT_IN_LIST;
- return 2;
- }
- }
- }
-
- if (TclParseNumber(NULL, NULL, NULL, start, numBytes, &end,
- TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
- *lexemePtr = NUMBER;
- return (end-start);
- }
-
- if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = Tcl_UtfToUniChar(start, &ch);
- } else {
- char utfBytes[TCL_UTF_MAX];
- memcpy(utfBytes, start, (size_t) numBytes);
- utfBytes[numBytes] = '\0';
- scanned = Tcl_UtfToUniChar(utfBytes, &ch);
- }
- if (!isalpha(UCHAR(ch))) {
- *lexemePtr = INVALID;
- return scanned;
- }
- end = start;
- while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) {
- end += scanned;
- numBytes -= scanned;
- if (Tcl_UtfCharComplete(end, numBytes)) {
- scanned = Tcl_UtfToUniChar(end, &ch);
- } else {
- char utfBytes[TCL_UTF_MAX];
- memcpy(utfBytes, end, (size_t) numBytes);
- utfBytes[numBytes] = '\0';
- scanned = Tcl_UtfToUniChar(utfBytes, &ch);
- }
- }
- *lexemePtr = BAREWORD;
- return (end-start);
-}
-
-/*
- * Local Variables:
- * mode: c
- * c-basic-offset: 4
- * fill-column: 78
- * End:
- */