diff options
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r-- | generic/tclCompExpr.c | 4388 |
1 files changed, 1916 insertions, 2472 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index d98061c..94c1bd6 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1,188 +1,545 @@ /* * tclCompExpr.c -- * - * This file contains the code to compile Tcl expressions. + * This file contains the code to parse and compile Tcl expressions and + * implementations of the Tcl commands corresponding to expression + * operators, such as the command ::tcl::mathop::+ . * - * Copyright (c) 1997 Sun Microsystems, Inc. - * Copyright (c) 1998-2000 by Scriptics Corporation. - * Contributions from Don Porter, NIST, 2006. (not subject to US copyright) + * Contributions from Don Porter, NIST, 2006-2007. (not subject to US copyright) * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclCompExpr.c,v 1.46 2006/12/13 16:28:06 dkf Exp $ */ #include "tclInt.h" -#include "tclCompile.h" +#include "tclCompile.h" /* CompileEnv */ -#undef USE_EXPR_TOKENS -#undef PARSE_DIRECT_EXPR_TOKENS +/* + * Expression parsing takes place in the routine ParseExpr(). It takes a + * string as input, parses that string, and generates a representation of the + * expression in the form of a tree of operators, a list of literals, a list + * of function names, and an array of Tcl_Token's within a Tcl_Parse struct. + * The tree is composed of OpNodes. + */ -#ifdef PARSE_DIRECT_EXPR_TOKENS +typedef struct OpNode { + int left; /* "Pointer" to the left operand. */ + int right; /* "Pointer" to the right operand. */ + union { + int parent; /* "Pointer" to the parent operand. */ + int prev; /* "Pointer" joining incomplete tree stack */ + } p; + unsigned char lexeme; /* Code that identifies the operator. */ + unsigned char precedence; /* Precedence of the operator */ + unsigned char mark; /* Mark used to control traversal. */ + unsigned char constant; /* Flag marking constant subexpressions. */ +} OpNode; /* - * The ExprNode structure represents one node of the parse tree produced as an - * interim structure by the expression parser. + * The storage for the tree is dynamically allocated array of OpNodes. The + * array is grown as parsing needs dictate according to a scheme similar to + * Tcl's string growth algorithm, so that the resizing costs are O(N) and so + * that we use at least half the memory allocated as expressions get large. + * + * Each OpNode in the tree represents an operator in the expression, either + * unary or binary. When parsing is completed successfully, a binary operator + * OpNode will have its left and right fields filled with "pointers" to its + * left and right operands. A unary operator OpNode will have its right field + * filled with a pointer to its single operand. When an operand is a + * subexpression the "pointer" takes the form of the index -- a non-negative + * integer -- into the OpNode storage array where the root of that + * subexpression parse tree is found. + * + * Non-operator elements of the expression do not get stored in the OpNode + * tree. They are stored in the other structures according to their type. + * Literal values get appended to the literal list. Elements that denote forms + * of quoting or substitution known to the Tcl parser get stored as + * Tcl_Tokens. These non-operator elements of the expression are the leaves of + * the completed parse tree. When an operand of an OpNode is one of these leaf + * elements, the following negative integer codes are used to indicate which + * kind of elements it is. */ -typedef struct ExprNode { - unsigned char lexeme; /* Code that identifies the type of this - * node. */ - int left; /* Index of the left operand of this operator - * node. */ - int right; /* Index of the right operand of this operator - * node. */ - int parent; /* Index of the operator of this operand - * node. */ - int token; /* Index of the Tcl_Tokens of this leaf - * node. */ -} ExprNode; - -#endif +enum OperandTypes { + OT_LITERAL = -3, /* Operand is a literal in the literal list */ + OT_TOKENS = -2, /* Operand is sequence of Tcl_Tokens */ + OT_EMPTY = -1 /* "Operand" is an empty string. This is a special + * case used only to represent the EMPTY lexeme. See + * below. */ +}; /* - * Integer codes indicating the form of an operand of an operator. + * Readable macros to test whether a "pointer" value points to an operator. + * They operate on the "non-negative integer -> operator; negative integer -> + * a non-operator OperandType" distinction. */ -enum OperandTypes { - OT_NONE = -4, OT_LITERAL = -3, OT_TOKENS = -2, OT_EMPTY = -1 +#define IsOperator(l) ((l) >= 0) +#define NotOperator(l) ((l) < 0) + +/* + * Note that it is sufficient to store in the tree just the type of leaf + * operand, without any explicit pointer to which leaf. This is true because + * the traversals of the completed tree we perform are known to visit the + * leaves in the same order as the original parse. + * + * In a completed parse tree, those OpNodes that are themselves (roots of + * subexpression trees that are) operands of some operator store in their + * p.parent field a "pointer" to the OpNode of that operator. The p.parent + * field permits a traversal of the tree within a non-recursive routine + * (ConvertTreeToTokens() and CompileExprTree()). This means that even + * expression trees of great depth pose no risk of blowing the C stack. + * + * While the parse tree is being constructed, the same memory space is used to + * hold the p.prev field which chains together a stack of incomplete trees + * awaiting their right operands. + * + * The lexeme field is filled in with the lexeme of the operator that is + * returned by the ParseLexeme() routine. Only lexemes for unary and binary + * operators get stored in an OpNode. Other lexmes get different treatement. + * + * The precedence field provides a place to store the precedence of the + * operator, so it need not be looked up again and again. + * + * The mark field is use to control the traversal of the tree, so that it can + * be done non-recursively. The mark values are: + */ + +enum Marks { + MARK_LEFT, /* Next step of traversal is to visit left subtree */ + MARK_RIGHT, /* Next step of traversal is to visit right subtree */ + MARK_PARENT /* Next step of traversal is to return to parent */ }; /* - * The OpNode structure represents one operator node in the parse tree - * produced as an interim structure by the expression parser. + * The constant field is a boolean flag marking which subexpressions are + * completely known at compile time, and are eligible for computing then + * rather than waiting until run time. */ -typedef struct OpNode { - unsigned char lexeme; /* Code that identifies the operator. */ - int left; /* Index of the left operand. Non-negative - * integer is an index into the parse tree, - * pointing to another operator. Value - * OT_LITERAL indicates operand is the next - * entry in the literal list. Value OT_TOKENS - * indicates the operand is the next word in - * the Tcl_Parse struct. Value OT_NONE - * indicates we haven't yet parsed the operand - * for this operator. */ - int right; /* Index of the right operand. Same - * interpretation as left, with addition of - * OT_EMPTY meaning zero arguments. */ - int parent; /* Index of the operator of this operand - * node. */ -} OpNode; +/* + * Each lexeme belongs to one of four categories, which determine its place in + * the parse tree. We use the two high bits of the (unsigned char) value to + * store a NODE_TYPE code. + */ + +#define NODE_TYPE 0xC0 /* - * Set of lexeme codes stored in ExprNode structs to label and categorize the - * lexemes found. + * The four category values are LEAF, UNARY, and BINARY, explained below, and + * "uncategorized", which is used either temporarily, until context determines + * which of the other three categories is correct, or for lexemes like + * INVALID, which aren't really lexemes at all, but indicators of a parsing + * error. Note that the codes must be distinct to distinguish categories, but + * need not take the form of a bit array. */ -#define LEAF (1<<7) -#define UNARY (1<<6) -#define BINARY (1<<5) - -#define NODE_TYPE ( LEAF | UNARY | BINARY) - -#define PLUS 1 -#define MINUS 2 -#define BAREWORD 3 -#define INCOMPLETE 4 -#define INVALID 5 - -#define NUMBER ( LEAF | 1) -#define SCRIPT ( LEAF | 2) -#define BOOLEAN ( LEAF | BAREWORD) -#define BRACED ( LEAF | 4) -#define VARIABLE ( LEAF | 5) -#define QUOTED ( LEAF | 6) -#define EMPTY ( LEAF | 7) - -#define UNARY_PLUS ( UNARY | PLUS) -#define UNARY_MINUS ( UNARY | MINUS) -#define FUNCTION ( UNARY | BAREWORD) -#define START ( UNARY | 4) -#define OPEN_PAREN ( UNARY | 5) -#define NOT ( UNARY | 6) -#define BIT_NOT ( UNARY | 7) - -#define BINARY_PLUS ( BINARY | PLUS) -#define BINARY_MINUS ( BINARY | MINUS) -#define COMMA ( BINARY | 3) -#define MULT ( BINARY | 4) -#define DIVIDE ( BINARY | 5) -#define MOD ( BINARY | 6) -#define LESS ( BINARY | 7) -#define GREATER ( BINARY | 8) -#define BIT_AND ( BINARY | 9) -#define BIT_XOR ( BINARY | 10) -#define BIT_OR ( BINARY | 11) -#define QUESTION ( BINARY | 12) -#define COLON ( BINARY | 13) -#define LEFT_SHIFT ( BINARY | 14) -#define RIGHT_SHIFT ( BINARY | 15) -#define LEQ ( BINARY | 16) -#define GEQ ( BINARY | 17) -#define EQUAL ( BINARY | 18) -#define NEQ ( BINARY | 19) -#define AND ( BINARY | 20) -#define OR ( BINARY | 21) -#define STREQ ( BINARY | 22) -#define STRNEQ ( BINARY | 23) -#define EXPON ( BINARY | 24) -#define IN_LIST ( BINARY | 25) -#define NOT_IN_LIST ( BINARY | 26) -#define CLOSE_PAREN ( BINARY | 27) -#define END ( BINARY | 28) +#define BINARY 0x40 /* This lexeme is a binary operator. An OpNode + * representing it should go into the parse + * tree, and two operands should be parsed for + * it in the expression. */ +#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode + * representing it should go into the parse + * tree, and one operand should be parsed for + * it in the expression. */ +#define LEAF 0xC0 /* This lexeme is a leaf operand in the parse + * tree. No OpNode will be placed in the tree + * for it. Either a literal value will be + * appended to the list of literals in this + * expression, or appropriate Tcl_Tokens will + * be appended in a Tcl_Parse struct to + * represent those leaves that require some + * form of substitution. */ + +/* Uncategorized lexemes */ + +#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or + * BINARY_PLUS according to context. */ +#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or + * BINARY_MINUS according to context. */ +#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to + * FUNCTION or a parse error according to + * context and value. */ +#define INCOMPLETE 4 /* A parse error. Used only when the single + * "=" is encountered. */ +#define INVALID 5 /* A parse error. Used when any punctuation + * appears that's not a supported operator. */ + +/* Leaf lexemes */ + +#define NUMBER (LEAF | 1) + /* For literal numbers */ +#define SCRIPT (LEAF | 2) + /* Script substitution; [foo] */ +#define BOOLEAN (LEAF | BAREWORD) + /* For literal booleans */ +#define BRACED (LEAF | 4) + /* Braced string; {foo bar} */ +#define VARIABLE (LEAF | 5) + /* Variable substitution; $x */ +#define QUOTED (LEAF | 6) + /* Quoted string; "foo $bar [soom]" */ +#define EMPTY (LEAF | 7) + /* Used only for an empty argument list to a + * function. Represents the empty string + * within parens in the expression: rand() */ + +/* Unary operator lexemes */ + +#define UNARY_PLUS (UNARY | PLUS) +#define UNARY_MINUS (UNARY | MINUS) +#define FUNCTION (UNARY | BAREWORD) + /* This is a bit of "creative interpretation" + * on the part of the parser. A function call + * is parsed into the parse tree according to + * the perspective that the function name is a + * unary operator and its argument list, + * enclosed in parens, is its operand. The + * additional requirements not implied + * generally by treatment as a unary operator + * -- for example, the requirement that the + * operand be enclosed in parens -- are hard + * coded in the relevant portions of + * ParseExpr(). We trade off the need to + * include such exceptional handling in the + * code against the need we would otherwise + * have for more lexeme categories. */ +#define START (UNARY | 4) + /* This lexeme isn't parsed from the + * expression text at all. It represents the + * start of the expression and sits at the + * root of the parse tree where it serves as + * the start/end point of traversals. */ +#define OPEN_PAREN (UNARY | 5) + /* Another bit of creative interpretation, + * where we treat "(" as a unary operator with + * the sub-expression between it and its + * matching ")" as its operand. See + * CLOSE_PAREN below. */ +#define NOT (UNARY | 6) +#define BIT_NOT (UNARY | 7) + +/* Binary operator lexemes */ + +#define BINARY_PLUS (BINARY | PLUS) +#define BINARY_MINUS (BINARY | MINUS) +#define COMMA (BINARY | 3) + /* The "," operator is a low precedence binary + * operator that separates the arguments in a + * function call. The additional constraint + * that this operator can only legally appear + * at the right places within a function call + * argument list are hard coded within + * ParseExpr(). */ +#define MULT (BINARY | 4) +#define DIVIDE (BINARY | 5) +#define MOD (BINARY | 6) +#define LESS (BINARY | 7) +#define GREATER (BINARY | 8) +#define BIT_AND (BINARY | 9) +#define BIT_XOR (BINARY | 10) +#define BIT_OR (BINARY | 11) +#define QUESTION (BINARY | 12) + /* These two lexemes make up the */ +#define COLON (BINARY | 13) + /* ternary conditional operator, $x ? $y : $z. + * We treat them as two binary operators to + * avoid another lexeme category, and code the + * additional constraints directly in + * ParseExpr(). For instance, the right + * operand of a "?" operator must be a ":" + * operator. */ +#define LEFT_SHIFT (BINARY | 14) +#define RIGHT_SHIFT (BINARY | 15) +#define LEQ (BINARY | 16) +#define GEQ (BINARY | 17) +#define EQUAL (BINARY | 18) +#define NEQ (BINARY | 19) +#define AND (BINARY | 20) +#define OR (BINARY | 21) +#define STREQ (BINARY | 22) +#define STRNEQ (BINARY | 23) +#define EXPON (BINARY | 24) + /* Unlike the other binary operators, EXPON is + * right associative and this distinction is + * coded directly in ParseExpr(). */ +#define IN_LIST (BINARY | 25) +#define NOT_IN_LIST (BINARY | 26) +#define CLOSE_PAREN (BINARY | 27) + /* By categorizing the CLOSE_PAREN lexeme as a + * BINARY operator, the normal parsing rules + * for binary operators assure that a close + * paren will not directly follow another + * operator, and the machinery already in + * place to connect operands to operators + * according to precedence performs most of + * the work of matching open and close parens + * for us. In the end though, a close paren is + * not really a binary operator, and some + * special coding in ParseExpr() make sure we + * never put an actual CLOSE_PAREN node in the + * parse tree. The sub-expression between + * parens becomes the single argument of the + * matching OPEN_PAREN unary operator. */ +#define END (BINARY | 28) + /* This lexeme represents the end of the + * string being parsed. Treating it as a + * binary operator follows the same logic as + * the CLOSE_PAREN lexeme and END pairs with + * START, in the same way that CLOSE_PAREN + * pairs with OPEN_PAREN. */ + +/* + * When ParseExpr() builds the parse tree it must choose which operands to + * connect to which operators. This is done according to operator precedence. + * The greater an operator's precedence the greater claim it has to link to an + * available operand. The Precedence enumeration lists the precedence values + * used by Tcl expression operators, from lowest to highest claim. Each + * precedence level is commented with the operators that hold that precedence. + */ + +enum Precedence { + PREC_END = 1, /* END */ + PREC_START, /* START */ + PREC_CLOSE_PAREN, /* ")" */ + PREC_OPEN_PAREN, /* "(" */ + PREC_COMMA, /* "," */ + PREC_CONDITIONAL, /* "?", ":" */ + PREC_OR, /* "||" */ + PREC_AND, /* "&&" */ + PREC_BIT_OR, /* "|" */ + PREC_BIT_XOR, /* "^" */ + PREC_BIT_AND, /* "&" */ + PREC_EQUAL, /* "==", "!=", "eq", "ne", "in", "ni" */ + PREC_COMPARE, /* "<", ">", "<=", ">=" */ + PREC_SHIFT, /* "<<", ">>" */ + PREC_ADD, /* "+", "-" */ + PREC_MULT, /* "*", "/", "%" */ + PREC_EXPON, /* "**" */ + PREC_UNARY /* "+", "-", FUNCTION, "!", "~" */ +}; + +/* + * Here the same information contained in the comments above is stored in + * inverted form, so that given a lexeme, one can quickly look up its + * precedence value. + */ + +static const unsigned char prec[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + PREC_ADD, /* BINARY_PLUS */ + PREC_ADD, /* BINARY_MINUS */ + PREC_COMMA, /* COMMA */ + PREC_MULT, /* MULT */ + PREC_MULT, /* DIVIDE */ + PREC_MULT, /* MOD */ + PREC_COMPARE, /* LESS */ + PREC_COMPARE, /* GREATER */ + PREC_BIT_AND, /* BIT_AND */ + PREC_BIT_XOR, /* BIT_XOR */ + PREC_BIT_OR, /* BIT_OR */ + PREC_CONDITIONAL, /* QUESTION */ + PREC_CONDITIONAL, /* COLON */ + PREC_SHIFT, /* LEFT_SHIFT */ + PREC_SHIFT, /* RIGHT_SHIFT */ + PREC_COMPARE, /* LEQ */ + PREC_COMPARE, /* GEQ */ + PREC_EQUAL, /* EQUAL */ + PREC_EQUAL, /* NEQ */ + PREC_AND, /* AND */ + PREC_OR, /* OR */ + PREC_EQUAL, /* STREQ */ + PREC_EQUAL, /* STRNEQ */ + PREC_EXPON, /* EXPON */ + PREC_EQUAL, /* IN_LIST */ + PREC_EQUAL, /* NOT_IN_LIST */ + PREC_CLOSE_PAREN, /* CLOSE_PAREN */ + PREC_END, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + PREC_UNARY, /* UNARY_PLUS */ + PREC_UNARY, /* UNARY_MINUS */ + PREC_UNARY, /* FUNCTION */ + PREC_START, /* START */ + PREC_OPEN_PAREN, /* OPEN_PAREN */ + PREC_UNARY, /* NOT*/ + PREC_UNARY, /* BIT_NOT*/ +}; + +/* + * A table mapping lexemes to bytecode instructions, used by CompileExprTree(). + */ + +static const unsigned char instruction[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + INST_ADD, /* BINARY_PLUS */ + INST_SUB, /* BINARY_MINUS */ + 0, /* COMMA */ + INST_MULT, /* MULT */ + INST_DIV, /* DIVIDE */ + INST_MOD, /* MOD */ + INST_LT, /* LESS */ + INST_GT, /* GREATER */ + INST_BITAND, /* BIT_AND */ + INST_BITXOR, /* BIT_XOR */ + INST_BITOR, /* BIT_OR */ + 0, /* QUESTION */ + 0, /* COLON */ + INST_LSHIFT, /* LEFT_SHIFT */ + INST_RSHIFT, /* RIGHT_SHIFT */ + INST_LE, /* LEQ */ + INST_GE, /* GEQ */ + INST_EQ, /* EQUAL */ + INST_NEQ, /* NEQ */ + 0, /* AND */ + 0, /* OR */ + INST_STR_EQ, /* STREQ */ + INST_STR_NEQ, /* STRNEQ */ + INST_EXPON, /* EXPON */ + INST_LIST_IN, /* IN_LIST */ + INST_LIST_NOT_IN, /* NOT_IN_LIST */ + 0, /* CLOSE_PAREN */ + 0, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + INST_UPLUS, /* UNARY_PLUS */ + INST_UMINUS, /* UNARY_MINUS */ + 0, /* FUNCTION */ + 0, /* START */ + 0, /* OPEN_PAREN */ + INST_LNOT, /* NOT*/ + INST_BITNOT, /* BIT_NOT*/ +}; + +/* + * A table mapping a byte value to the corresponding lexeme for use by + * ParseLexeme(). + */ + +static const unsigned char Lexeme[] = { + INVALID /* NUL */, INVALID /* SOH */, + INVALID /* STX */, INVALID /* ETX */, + INVALID /* EOT */, INVALID /* ENQ */, + INVALID /* ACK */, INVALID /* BEL */, + INVALID /* BS */, INVALID /* HT */, + INVALID /* LF */, INVALID /* VT */, + INVALID /* FF */, INVALID /* CR */, + INVALID /* SO */, INVALID /* SI */, + INVALID /* DLE */, INVALID /* DC1 */, + INVALID /* DC2 */, INVALID /* DC3 */, + INVALID /* DC4 */, INVALID /* NAK */, + INVALID /* SYN */, INVALID /* ETB */, + INVALID /* CAN */, INVALID /* EM */, + INVALID /* SUB */, INVALID /* ESC */, + INVALID /* FS */, INVALID /* GS */, + INVALID /* RS */, INVALID /* US */, + INVALID /* SPACE */, 0 /* ! or != */, + QUOTED /* " */, INVALID /* # */, + VARIABLE /* $ */, MOD /* % */, + 0 /* & or && */, INVALID /* ' */, + OPEN_PAREN /* ( */, CLOSE_PAREN /* ) */, + 0 /* * or ** */, PLUS /* + */, + COMMA /* , */, MINUS /* - */, + 0 /* . */, DIVIDE /* / */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-9 */ + COLON /* : */, INVALID /* ; */, + 0 /* < or << or <= */, + 0 /* == or INVALID */, + 0 /* > or >> or >= */, + QUESTION /* ? */, INVALID /* @ */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A-M */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* N-Z */ + SCRIPT /* [ */, INVALID /* \ */, + INVALID /* ] */, BIT_XOR /* ^ */, + INVALID /* _ */, INVALID /* ` */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a-m */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* n-z */ + BRACED /* { */, 0 /* | or || */, + INVALID /* } */, BIT_NOT /* ~ */, + INVALID /* DEL */ +}; + +/* + * The JumpList struct is used to create a stack of data needed for the + * TclEmitForwardJump() and TclFixupForwardJump() calls that are performed + * when compiling the short-circuiting operators QUESTION/COLON, AND, and OR. + * Keeping a stack permits the CompileExprTree() routine to be non-recursive. + */ + +typedef struct JumpList { + JumpFixup jump; /* Pass this argument to matching calls of + * TclEmitForwardJump() and + * TclFixupForwardJump(). */ + struct JumpList *next; /* Point to next item on the stack */ +} JumpList; /* * Declarations for local functions to this file: */ -static int ParseLexeme(CONST char *start, int numBytes, - unsigned char *lexemePtr, Tcl_Obj **literalPtr); -#if (!defined(PARSE_DIRECT_EXPR_TOKENS) || !defined(USE_EXPR_TOKENS)) -static int ParseExpr(Tcl_Interp *interp, CONST char *start, +static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, Tcl_Token *tokenPtr, + CompileEnv *envPtr, int optimize); +static void ConvertTreeToTokens(const char *start, int numBytes, + OpNode *nodes, Tcl_Token *tokenPtr, + Tcl_Parse *parsePtr); +static int ExecConstantExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj * const **litObjvPtr); +static int ParseExpr(Tcl_Interp *interp, const char *start, int numBytes, OpNode **opTreePtr, Tcl_Obj *litList, Tcl_Obj *funcList, - Tcl_Parse *parsePtr); -#endif -#ifdef PARSE_DIRECT_EXPR_TOKENS -static void GenerateTokens(ExprNode *nodes, Tcl_Parse *scratchPtr, - Tcl_Parse *parsePtr); -#else -static void ConvertTreeToTokens(Tcl_Interp *interp, - CONST char *start, int numBytes, OpNode *nodes, - Tcl_Obj *litList, Tcl_Token *tokenPtr, - Tcl_Parse *parsePtr); -static int GenerateTokensForLiteral(CONST char *script, - int numBytes, Tcl_Obj *litList, int nextLiteral, - Tcl_Parse *parsePtr); -static int CopyTokens(Tcl_Token *sourcePtr, Tcl_Parse *parsePtr); -#endif + Tcl_Parse *parsePtr, int parseOnly); +static int ParseLexeme(const char *start, int numBytes, + unsigned char *lexemePtr, Tcl_Obj **literalPtr); -#if (!defined(PARSE_DIRECT_EXPR_TOKENS) || !defined(USE_EXPR_TOKENS)) /* *---------------------------------------------------------------------- * * ParseExpr -- * * Given a string, the numBytes bytes starting at start, this function - * parses it as a Tcl expression and stores information about the - * structure of the expression in the Tcl_Parse struct indicated by the - * caller. + * parses it as a Tcl expression and constructs a tree representing the + * structure of the expression. The caller must pass in empty lists as + * the funcList and litList arguments. The elements of the parsed + * expression are returned to the caller as that tree, a list of literal + * values, a list of function names, and in Tcl_Tokens added to a + * Tcl_Parse struct passed in by the caller. * * Results: * If the string is successfully parsed as a valid Tcl expression, TCL_OK - * is returned, and data about the expression structure is written to - * *parsePtr. If the string cannot be parsed as a valid Tcl expression, - * TCL_ERROR is returned, and if interp is non-NULL, an error message is - * written to interp. + * is returned, and data about the expression structure is written to the + * last four arguments. If the string cannot be parsed as a valid Tcl + * expression, TCL_ERROR is returned, and if interp is non-NULL, an error + * message is written to interp. * * Side effects: - * If there is insufficient space in parsePtr to hold all the information - * about the expression, then additional space is malloc-ed. If the - * function returns TCL_OK then the caller must eventually invoke - * Tcl_FreeParse to release any additional space that was allocated. + * Memory will be allocated. If TCL_OK is returned, the caller must clean + * up the returned data structures. The (OpNode *) value written to + * opTreePtr should be passed to ckfree() and the parsePtr argument + * should be passed to Tcl_FreeParse(). The elements appended to the + * litList and funcList will automatically be freed whenever the refcount + * on those lists indicates they can be freed. * *---------------------------------------------------------------------- */ @@ -190,68 +547,118 @@ static int CopyTokens(Tcl_Token *sourcePtr, Tcl_Parse *parsePtr); static int ParseExpr( Tcl_Interp *interp, /* Used for error reporting. */ - CONST char *start, /* Start of source string to parse. */ - int numBytes, /* Number of bytes in string. If < 0, the - * string consists of all bytes up to the - * first null character. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. */ OpNode **opTreePtr, /* Points to space where a pointer to the * allocated OpNode tree should go. */ Tcl_Obj *litList, /* List to append literals to. */ Tcl_Obj *funcList, /* List to append function names to. */ - Tcl_Parse *parsePtr) /* Structure to fill with tokens representing + Tcl_Parse *parsePtr, /* Structure to fill with tokens representing * those operands that require run time * substitutions. */ + int parseOnly) /* A boolean indicating whether the caller's + * aim is just a parse, or whether it will go + * on to compile the expression. Different + * optimizations are appropriate for the two + * scenarios. */ { - OpNode *nodes; - int nodesAvailable = 64, nodesUsed = 0; - int code = TCL_OK; - int numLiterals = 0, numFuncs = 0; - int scanned = 0, insertMark = 0; - int lastOpen = 0, lastWas = 0; - unsigned char lexeme = START; - Tcl_Obj *msg = NULL, *post = NULL; - CONST int limit = 25; - CONST char *mark = "_@_"; - static CONST unsigned char prec[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 15, 15, 5, 16, 16, 16, 13, 13, 11, 10, 9, 6, 6, 14, 14, - 13, 13, 12, 12, 8, 7, 12, 12, 17, 12, 12, 3, 1, 0, 0, 0, - 0, 18, 18, 18, 2, 4, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, - }; + OpNode *nodes = NULL; /* Pointer to the OpNode storage array where + * we build the parse tree. */ + int nodesAvailable = 64; /* Initial size of the storage array. This + * value establishes a minimum tree memory + * cost of only about 1 kibyte, and is large + * enough for most expressions to parse with + * no need for array growth and + * reallocation. */ + int nodesUsed = 0; /* Number of OpNodes filled. */ + int scanned = 0; /* Capture number of byte scanned by parsing + * routines. */ + int lastParsed; /* Stores info about what the lexeme parsed + * the previous pass through the parsing loop + * was. If it was an operator, lastParsed is + * the index of the OpNode for that operator. + * If it was not an operator, lastParsed holds + * an OperandTypes value encoding what we need + * to know about it. */ + int incomplete; /* Index of the most recent incomplete tree in + * the OpNode array. Heads a stack of + * incomplete trees linked by p.prev. */ + int complete = OT_EMPTY; /* "Index" of the complete tree (that is, a + * complete subexpression) determined at the + * moment. OT_EMPTY is a nonsense value used + * only to silence compiler warnings. During a + * parse, complete will always hold an index + * or an OperandTypes value pointing to an + * actual leaf at the time the complete tree + * is needed. */ - if (numBytes < 0) { - numBytes = (start ? strlen(start) : 0); - } + /* + * These variables control generation of the error message. + */ + + Tcl_Obj *msg = NULL; /* The error message. */ + Tcl_Obj *post = NULL; /* In a few cases, an additional postscript + * for the error message, supplying more + * information after the error msg and + * location have been reported. */ + const char *errCode = NULL; /* The detail word of the errorCode list, or + * NULL to indicate that no changes to the + * errorCode are to be done. */ + const char *subErrCode = NULL; + /* Extra information for use in generating the + * errorCode. */ + const char *mark = "_@_"; /* In the portion of the complete error + * message where the error location is + * reported, this "mark" substring is inserted + * into the string being parsed to aid in + * pinpointing the location of the syntax + * error in the expression. */ + int insertMark = 0; /* A boolean controlling whether the "mark" + * should be inserted. */ + const int limit = 25; /* Portions of the error message are + * constructed out of substrings of the + * original expression. In order to keep the + * error message readable, we impose this + * limit on the substring size we extract. */ TclParseInit(interp, start, numBytes, parsePtr); - nodes = (OpNode *) attemptckalloc(nodesAvailable * sizeof(OpNode)); + nodes = attemptckalloc(nodesAvailable * sizeof(OpNode)); if (nodes == NULL) { - msg = Tcl_NewStringObj( - "not enough memory to parse expression", -1); - code = TCL_ERROR; - } else { - /* - * Initialize the parse tree with the special "START" node. - */ - - nodes->lexeme = lexeme; - nodes->left = OT_NONE; - nodes->right = OT_NONE; - nodes->parent = -1; - nodesUsed++; + TclNewLiteralStringObj(msg, "not enough memory to parse expression"); + errCode = "NOMEM"; + goto error; } - while ((code == TCL_OK) && (lexeme != END)) { - OpNode *nodePtr; - Tcl_Token *tokenPtr = NULL; - Tcl_Obj *literal = NULL; - CONST char *lastStart = start - scanned; + /* + * Initialize the parse tree with the special "START" node. + */ + + nodes->lexeme = START; + nodes->precedence = prec[START]; + nodes->mark = MARK_RIGHT; + nodes->constant = 1; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + + /* + * Main parsing loop parses one lexeme per iteration. We exit the loop + * only when there's a syntax error with a "goto error" which takes us to + * the error handling code following the loop, or when we've successfully + * completed the parse and we return to the caller. + */ + + while (1) { + OpNode *nodePtr; /* Points to the OpNode we may fill this pass + * through the loop. */ + unsigned char lexeme; /* The lexeme we parse this iteration. */ + Tcl_Obj *literal; /* Filled by the ParseLexeme() call when a + * literal is parsed that has a Tcl_Obj rep + * worth preserving. */ /* - * Each pass through this loop adds one more ExprNode. Allocate space - * for one if required. + * Each pass through this loop adds up to one more OpNode. Allocate + * space for one if required. */ if (nodesUsed >= nodesAvailable) { @@ -259,15 +666,14 @@ ParseExpr( OpNode *newPtr; do { - newPtr = (OpNode *) attemptckrealloc((char *) nodes, - (unsigned int) size * sizeof(OpNode)); + newPtr = attemptckrealloc(nodes, size * sizeof(OpNode)); } while ((newPtr == NULL) && ((size -= (size - nodesUsed) / 2) > nodesUsed)); if (newPtr == NULL) { - msg = Tcl_NewStringObj( - "not enough memory to parse expression", -1); - code = TCL_ERROR; - continue; + TclNewLiteralStringObj(msg, + "not enough memory to parse expression"); + errCode = "NOMEM"; + goto error; } nodesAvailable = size; nodes = newPtr; @@ -289,103 +695,183 @@ ParseExpr( */ if ((NODE_TYPE & lexeme) == 0) { + int b; + switch (lexeme) { case INVALID: - msg = Tcl_ObjPrintf( - "invalid character \"%.*s\"", scanned, start); - code = TCL_ERROR; - continue; + msg = Tcl_ObjPrintf("invalid character \"%.*s\"", + scanned, start); + errCode = "BADCHAR"; + goto error; case INCOMPLETE: - msg = Tcl_ObjPrintf( - "incomplete operator \"%.*s\"", scanned, start); - code = TCL_ERROR; - continue; + msg = Tcl_ObjPrintf("incomplete operator \"%.*s\"", + scanned, start); + errCode = "PARTOP"; + goto error; case BAREWORD: + + /* + * Most barewords in an expression are a syntax error. The + * exceptions are that when a bareword is followed by an open + * paren, it might be a function call, and when the bareword + * is a legal literal boolean value, we accept that as well. + */ + if (start[scanned+TclParseAllWhiteSpace( start+scanned, numBytes-scanned)] == '(') { lexeme = FUNCTION; + + /* + * When we compile the expression we'll need the function + * name, and there's no place in the parse tree to store + * it, so we keep a separate list of all the function + * names we've parsed in the order we found them. + */ + Tcl_ListObjAppendElement(NULL, funcList, literal); - numFuncs++; + } else if (Tcl_GetBooleanFromObj(NULL,literal,&b) == TCL_OK) { + lexeme = BOOLEAN; } else { - int b; - if (Tcl_GetBooleanFromObj(NULL, literal, &b) == TCL_OK) { - lexeme = BOOLEAN; - } else { - msg = Tcl_ObjPrintf( - "invalid bareword \"%.*s%s\"", - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "..."); - post = Tcl_ObjPrintf( - "should be \"$%.*s%s\" or \"{%.*s%s}\"", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - Tcl_AppendPrintfToObj(post, - " or \"%.*s%s(...)\" or ...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - code = TCL_ERROR; - continue; + Tcl_DecrRefCount(literal); + msg = Tcl_ObjPrintf("invalid bareword \"%.*s%s\"", + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "..."); + post = Tcl_ObjPrintf( + "should be \"$%.*s%s\" or \"{%.*s%s}\"", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + Tcl_AppendPrintfToObj(post, " or \"%.*s%s(...)\" or ...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + errCode = "BAREWORD"; + if (start[0] == '0') { + const char *stop; + TclParseNumber(NULL, NULL, NULL, start, scanned, + &stop, TCL_PARSE_NO_WHITESPACE); + + if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { + switch (start[1]) { + case 'b': + Tcl_AppendToObj(post, + " (invalid binary number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "BINARY"; + break; + case 'o': + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; + break; + default: + if (isdigit(UCHAR(start[1]))) { + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; + } + break; + } + } } + goto error; } break; case PLUS: case MINUS: - if (lastWas < 0) { - lexeme |= BINARY; - } else { + if (IsOperator(lastParsed)) { + /* + * A "+" or "-" coming just after another operator must be + * interpreted as a unary operator. + */ + lexeme |= UNARY; + } else { + lexeme |= BINARY; } } - } + } /* Uncategorized lexemes */ /* - * Add node to parse tree based on category. + * Handle lexeme based on its category. */ switch (NODE_TYPE & lexeme) { case LEAF: { - CONST char *end; + /* + * Each LEAF results in either a literal getting appended to the + * litList, or a sequence of Tcl_Tokens representing a Tcl word + * getting appended to the parsePtr->tokens. No OpNode is filled + * for this lexeme. + */ + + Tcl_Token *tokenPtr; + const char *end = start; int wordIndex; + int code = TCL_OK; - if (lastWas < 0) { + /* + * A leaf operand appearing just after something that's not an + * operator is a syntax error. + */ + + if (NotOperator(lastParsed)) { msg = Tcl_ObjPrintf("missing operator at %s", mark); - if (lastStart[0] == '0') { - Tcl_Obj *copy = Tcl_NewStringObj(lastStart, - start + scanned - lastStart); - if (TclCheckBadOctal(NULL, Tcl_GetString(copy))) { - post = Tcl_NewStringObj( - "looks like invalid octal number", -1); - } - Tcl_DecrRefCount(copy); - } + errCode = "MISSING"; scanned = 0; insertMark = 1; - code = TCL_ERROR; - continue; + + /* + * Free any literal to avoid a memleak. + */ + + if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) { + Tcl_DecrRefCount(literal); + } + goto error; } switch (lexeme) { case NUMBER: - case BOOLEAN: + case BOOLEAN: + /* + * TODO: Consider using a dict or hash to collapse all + * duplicate literals into a single representative value. + * (Like what is done with [split $s {}]). + * Pro: ~75% memory saving on expressions like + * {1+1+1+1+1+.....+1} (Convert "pointer + Tcl_Obj" cost + * to "pointer" cost only) + * Con: Cost of the dict store/retrieve on every literal in + * every expression when expressions like the above tend + * to be uncommon. + * The memory savings is temporary; Compiling to bytecode + * will collapse things as literals are registered + * anyway, so the savings applies only to the time + * between parsing and compiling. Possibly important due + * to high-water mark nature of memory allocation. + */ + Tcl_ListObjAppendElement(NULL, litList, literal); - numLiterals++; - lastWas = OT_LITERAL; + complete = lastParsed = OT_LITERAL; start += scanned; numBytes -= scanned; continue; + default: break; } /* - * Make room for at least 2 more tokens. + * Remaining LEAF cases may involve filling Tcl_Tokens, so make + * room for at least 2 more tokens. */ - if (parsePtr->numTokens+1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 2); wordIndex = parsePtr->numTokens; tokenPtr = parsePtr->tokenPtr + wordIndex; tokenPtr->type = TCL_TOKEN_WORD; @@ -394,42 +880,38 @@ ParseExpr( switch (lexeme) { case QUOTED: - code = Tcl_ParseQuotedString(interp, start, numBytes, + code = Tcl_ParseQuotedString(NULL, start, numBytes, parsePtr, 1, &end); - if (code != TCL_OK) { - scanned = parsePtr->term - start; - scanned += (scanned < numBytes); - continue; - } scanned = end - start; break; case BRACED: - code = Tcl_ParseBraces(interp, start, numBytes, - parsePtr, 1, &end); - if (code != TCL_OK) { - continue; - } + code = Tcl_ParseBraces(NULL, start, numBytes, + parsePtr, 1, &end); scanned = end - start; break; case VARIABLE: - code = Tcl_ParseVarName(interp, start, numBytes, parsePtr, 1); - if (code != TCL_OK) { - scanned = parsePtr->term - start; - scanned += (scanned < numBytes); - continue; - } + code = Tcl_ParseVarName(NULL, start, numBytes, parsePtr, 1); + + /* + * Handle the quirk that Tcl_ParseVarName reports a successful + * parse even when it gets only a "$" with no variable name. + */ + tokenPtr = parsePtr->tokenPtr + wordIndex + 1; - if (tokenPtr->type != TCL_TOKEN_VARIABLE) { - msg = Tcl_NewStringObj("invalid character \"$\"", -1); - code = TCL_ERROR; - continue; + if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) { + TclNewLiteralStringObj(msg, "invalid character \"$\""); + errCode = "BADCHAR"; + goto error; } scanned = tokenPtr->size; break; - case SCRIPT: + case SCRIPT: { + Tcl_Parse *nestedPtr = + TclStackAlloc(interp, sizeof(Tcl_Parse)); + tokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; tokenPtr->type = TCL_TOKEN_COMMAND; tokenPtr->start = start; @@ -438,155 +920,250 @@ ParseExpr( end = start + numBytes; start++; while (1) { - Tcl_Parse nested; - code = Tcl_ParseCommand(interp, start, (end - start), 1, - &nested); + code = Tcl_ParseCommand(interp, start, end - start, 1, + nestedPtr); if (code != TCL_OK) { - parsePtr->term = nested.term; - parsePtr->errorType = nested.errorType; - parsePtr->incomplete = nested.incomplete; + parsePtr->term = nestedPtr->term; + parsePtr->errorType = nestedPtr->errorType; + parsePtr->incomplete = nestedPtr->incomplete; break; } - start = (nested.commandStart + nested.commandSize); - Tcl_FreeParse(&nested); - if ((nested.term < end) && (*nested.term == ']') - && !nested.incomplete) { + start = nestedPtr->commandStart + nestedPtr->commandSize; + Tcl_FreeParse(nestedPtr); + if ((nestedPtr->term < end) && (nestedPtr->term[0] == ']') + && !nestedPtr->incomplete) { break; } if (start == end) { - msg = Tcl_NewStringObj("missing close-bracket", -1); + TclNewLiteralStringObj(msg, "missing close-bracket"); parsePtr->term = tokenPtr->start; parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->incomplete = 1; code = TCL_ERROR; + errCode = "UNBALANCED"; break; } } + TclStackFree(interp, nestedPtr); end = start; start = tokenPtr->start; - if (code != TCL_OK) { - scanned = parsePtr->term - start; - scanned += (scanned < numBytes); - continue; - } scanned = end - start; tokenPtr->size = scanned; parsePtr->numTokens++; break; + } /* SCRIPT case */ + } + if (code != TCL_OK) { + /* + * Here we handle all the syntax errors generated by the + * Tcl_Token generating parsing routines called in the switch + * just above. If the value of parsePtr->incomplete is 1, then + * the error was an unbalanced '[', '(', '{', or '"' and + * parsePtr->term is pointing to that unbalanced character. If + * the value of parsePtr->incomplete is 0, then the error is + * one of lacking whitespace following a quoted word, for + * example: expr {[an error {foo}bar]}, and parsePtr->term + * points to where the whitespace is missing. We reset our + * values of start and scanned so that when our error message + * is constructed, the location of the syntax error is sure to + * appear in it, even if the quoted expression is truncated. + */ + + start = parsePtr->term; + scanned = parsePtr->incomplete; + if (parsePtr->incomplete) { + errCode = "UNBALANCED"; + } + goto error; } tokenPtr = parsePtr->tokenPtr + wordIndex; tokenPtr->size = scanned; tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1; - if ((lexeme == QUOTED) || (lexeme == BRACED)) { + if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) { + /* + * When this expression is destined to be compiled, and a + * braced or quoted word within an expression is known at + * compile time (no runtime substitutions in it), we can store + * it as a literal rather than in its tokenized form. This is + * an advantage since the compiled bytecode is going to need + * the argument in Tcl_Obj form eventually, so it's just as + * well to get there now. Another advantage is that with this + * conversion, larger constant expressions might be grown and + * optimized. + * + * On the contrary, if the end goal of this parse is to fill a + * Tcl_Parse for a caller of Tcl_ParseExpr(), then it's + * wasteful to convert to a literal only to convert back again + * later. + */ + literal = Tcl_NewObj(); - /* TODO: allow all compile-time known words */ - if (tokenPtr->numComponents == 1 - && tokenPtr[1].type == TCL_TOKEN_TEXT - && TclWordKnownAtCompileTime(tokenPtr, literal)) { + if (TclWordKnownAtCompileTime(tokenPtr, literal)) { Tcl_ListObjAppendElement(NULL, litList, literal); - numLiterals++; - lastWas = OT_LITERAL; + complete = lastParsed = OT_LITERAL; parsePtr->numTokens = wordIndex; break; } Tcl_DecrRefCount(literal); } - lastWas = OT_TOKENS; + complete = lastParsed = OT_TOKENS; break; - } + } /* case LEAF */ case UNARY: - if (lastWas < 0) { + + /* + * A unary operator appearing just after something that's not an + * operator is a syntax error -- something trying to be the left + * operand of an operator that doesn't take one. + */ + + if (NotOperator(lastParsed)) { msg = Tcl_ObjPrintf("missing operator at %s", mark); scanned = 0; insertMark = 1; - code = TCL_ERROR; - continue; + errCode = "MISSING"; + goto error; } - lastWas = nodesUsed; + + /* + * Create an OpNode for the unary operator. + */ + nodePtr->lexeme = lexeme; - nodePtr->left = OT_NONE; - nodePtr->right = OT_NONE; - nodePtr->parent = nodePtr - nodes - 1; + nodePtr->precedence = prec[lexeme]; + nodePtr->mark = MARK_RIGHT; + + /* + * A FUNCTION cannot be a constant expression, because Tcl allows + * functions to return variable results with the same arguments; + * for example, rand(). Other unary operators can root a constant + * expression, so long as the argument is a constant expression. + */ + + nodePtr->constant = (lexeme != FUNCTION); + + /* + * This unary operator is a new incomplete tree, so push it onto + * our stack of incomplete trees. Also remember it as the last + * lexeme we parsed. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; nodesUsed++; break; case BINARY: { - OpNode *otherPtr = NULL; + OpNode *incompletePtr; unsigned char precedence = prec[lexeme]; - if (lastWas >= 0) { + /* + * A binary operator appearing just after another operator is a + * syntax error -- one of the two operators is missing an operand. + */ + + if (IsOperator(lastParsed)) { if ((lexeme == CLOSE_PAREN) && (nodePtr[-1].lexeme == OPEN_PAREN)) { if (nodePtr[-2].lexeme == FUNCTION) { /* * Normally, "()" is a syntax error, but as a special * case accept it as an argument list for a function. + * Treat this as a special LEAF lexeme, and restart + * the parsing loop with zero characters scanned. We + * will parse the ")" again the next time through, but + * with the OT_EMPTY leaf as the subexpression between + * the parens. */ scanned = 0; - lastWas = OT_EMPTY; - nodePtr[-1].left--; + complete = lastParsed = OT_EMPTY; break; } msg = Tcl_ObjPrintf("empty subexpression at %s", mark); scanned = 0; insertMark = 1; - code = TCL_ERROR; - continue; + errCode = "EMPTY"; + goto error; } - if (prec[nodePtr[-1].lexeme] > precedence) { + if (nodePtr[-1].precedence > precedence) { if (nodePtr[-1].lexeme == OPEN_PAREN) { - msg = Tcl_NewStringObj("unbalanced open paren", -1); + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; } else if (nodePtr[-1].lexeme == COMMA) { msg = Tcl_ObjPrintf( "missing function argument at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; } else if (nodePtr[-1].lexeme == START) { - msg = Tcl_NewStringObj("empty expression", -1); - } - } else { - if (lexeme == CLOSE_PAREN) { - msg = Tcl_NewStringObj("unbalanced close paren", -1); - } else if ((lexeme == COMMA) - && (nodePtr[-1].lexeme == OPEN_PAREN) - && (nodePtr[-2].lexeme == FUNCTION)) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; + TclNewLiteralStringObj(msg, "empty expression"); + errCode = "EMPTY"; } + } else if (lexeme == CLOSE_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; + } else if ((lexeme == COMMA) + && (nodePtr[-1].lexeme == OPEN_PAREN) + && (nodePtr[-2].lexeme == FUNCTION)) { + msg = Tcl_ObjPrintf("missing function argument at %s", + mark); + scanned = 0; + insertMark = 1; + errCode = "UNBALANCED"; } if (msg == NULL) { msg = Tcl_ObjPrintf("missing operand at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; } - code = TCL_ERROR; - continue; + goto error; } - if (lastWas == OT_NONE) { - otherPtr = nodes + lastOpen - 1; - lastWas = lastOpen; - } else { - otherPtr = nodePtr - 1; - } + /* + * Here is where the tree comes together. At this point, we have a + * stack of incomplete trees corresponding to substrings that are + * incomplete expressions, followed by a complete tree + * corresponding to a substring that is itself a complete + * expression, followed by the binary operator we have just + * parsed. The incomplete trees can each be completed by adding a + * right operand. + * + * To illustrate with an example, when we parse the expression + * "1+2*3-4" and we reach this point having just parsed the "-" + * operator, we have these incomplete trees: START, "1+", and + * "2*". Next we have the complete subexpression "3". Last is the + * "-" we've just parsed. + * + * The next step is to join our complete tree to an operator. The + * choice is governed by the precedence and associativity of the + * competing operators. If we connect it as the right operand of + * our most recent incomplete tree, we get a new complete tree, + * and we can repeat the process. The while loop following repeats + * this until precedence indicates it is time to join the complete + * tree as the left operand of the just parsed binary operator. + * + * Continuing the example, the first pass through the loop will + * join "3" to "2*"; the next pass will join "2*3" to "1+". Then + * we'll exit the loop and join "1+2*3" to "-". When we return to + * parse another lexeme, our stack of incomplete trees is START + * and "1+2*3-". + */ + while (1) { - /* - * lastWas is "index" of item to be linked. otherPtr points to - * competing operator. - */ + incompletePtr = nodes + incomplete; - if (prec[otherPtr->lexeme] < precedence) { + if (incompletePtr->precedence < precedence) { break; } - if (prec[otherPtr->lexeme] == precedence) { + if (incompletePtr->precedence == precedence) { /* * Right association rules for exponentiation. */ @@ -596,289 +1173,274 @@ ParseExpr( } /* - * Special association rules for the ternary operators. - * The "?" and ":" operators have equal precedence, but - * must be linked up in sensible pairs. + * Special association rules for the conditional + * operators. The "?" and ":" operators have equal + * precedence, but must be linked up in sensible pairs. */ - if ((otherPtr->lexeme == QUESTION) && ((lastWas < 0) - || (nodes[lastWas].lexeme != COLON))) { + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { break; } - if ((otherPtr->lexeme == COLON) && (lexeme == QUESTION)) { + if ((incompletePtr->lexeme == COLON) + && (lexeme == QUESTION)) { break; } } /* - * We should link the lastWas item to the otherPtr as its - * right operand. First make some syntax checks. + * Some special syntax checks... */ - if ((otherPtr->lexeme == OPEN_PAREN) + /* Parens must balance */ + if ((incompletePtr->lexeme == OPEN_PAREN) && (lexeme != CLOSE_PAREN)) { - msg = Tcl_NewStringObj("unbalanced open paren", -1); - code = TCL_ERROR; - break; + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; + goto error; } - if ((otherPtr->lexeme == QUESTION) && ((lastWas < 0) - || (nodes[lastWas].lexeme != COLON))) { - msg = Tcl_ObjPrintf( - "missing operator \":\" at %s", mark); + + /* Right operand of "?" must be ":" */ + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { + msg = Tcl_ObjPrintf("missing operator \":\" at %s", mark); scanned = 0; insertMark = 1; - code = TCL_ERROR; - break; + errCode = "MISSING"; + goto error; } - if ((lastWas >= 0) && (nodes[lastWas].lexeme == COLON) - && (otherPtr->lexeme != QUESTION)) { - msg = Tcl_NewStringObj( - "unexpected operator \":\" without preceding \"?\"", - -1); - code = TCL_ERROR; - break; + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) + && (nodes[complete].lexeme == COLON) + && (incompletePtr->lexeme != QUESTION)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" " + "without preceding \"?\""); + errCode = "SURPRISE"; + goto error; } /* - * Link orphan as right operand of otherPtr. + * Attach complete tree as right operand of most recent + * incomplete tree. */ - otherPtr->right = lastWas; - if (lastWas >= 0) { - nodes[lastWas].parent = otherPtr - nodes; + incompletePtr->right = complete; + if (IsOperator(complete)) { + nodes[complete].p.parent = incomplete; + incompletePtr->constant = incompletePtr->constant + && nodes[complete].constant; + } else { + incompletePtr->constant = incompletePtr->constant + && (complete == OT_LITERAL); } - lastWas = otherPtr - nodes; - if (otherPtr->lexeme == OPEN_PAREN) { - /* - * CLOSE_PAREN can only close one OPEN_PAREN. - */ + /* + * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations + * each make up a single operator. Force them to agree whether + * they have a constant expression. + */ - break; + if ((incompletePtr->lexeme == QUESTION) + || (incompletePtr->lexeme == FUNCTION)) { + nodes[complete].constant = incompletePtr->constant; } - if (otherPtr->lexeme == START) { + + if (incompletePtr->lexeme == START) { /* - * Don't backtrack beyond the start. + * Completing the START tree indicates we're done. + * Transfer the parse tree to the caller and return. */ + *opTreePtr = nodes; + return TCL_OK; + } + + /* + * With a right operand attached, last incomplete tree has + * become the complete tree. Pop it from the incomplete tree + * stack. + */ + + complete = incomplete; + incomplete = incompletePtr->p.prev; + + /* CLOSE_PAREN can only close one OPEN_PAREN. */ + if (incompletePtr->lexeme == OPEN_PAREN) { break; } - otherPtr = nodes + otherPtr->parent; - } - if (code != TCL_OK) { - continue; } + /* + * More syntax checks... + */ + + /* Parens must balance. */ if (lexeme == CLOSE_PAREN) { - if (otherPtr->lexeme == START) { - msg = Tcl_NewStringObj("unbalanced close paren", -1); - code = TCL_ERROR; - continue; + if (incompletePtr->lexeme != OPEN_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; + goto error; } - lastWas = OT_NONE; - lastOpen = otherPtr - nodes; - otherPtr->left++; - - /* - * Create no node for a CLOSE_PAREN lexeme. - */ - - break; } + + /* Commas must appear only in function argument lists. */ if (lexeme == COMMA) { - if ((otherPtr->lexeme != OPEN_PAREN) - || (otherPtr[-1].lexeme != FUNCTION)) { - msg = Tcl_NewStringObj( - "unexpected \",\" outside function argument list", - -1); - code = TCL_ERROR; - continue; + if ((incompletePtr->lexeme != OPEN_PAREN) + || (incompletePtr[-1].lexeme != FUNCTION)) { + TclNewLiteralStringObj(msg, + "unexpected \",\" outside function argument list"); + errCode = "SURPRISE"; + goto error; } - otherPtr->left++; } - if ((lastWas >= 0) && (nodes[lastWas].lexeme == COLON)) { - msg = Tcl_NewStringObj( - "unexpected operator \":\" without preceding \"?\"", - -1); - code = TCL_ERROR; - continue; + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" without preceding \"?\""); + errCode = "SURPRISE"; + goto error; + } + + /* + * Create no node for a CLOSE_PAREN lexeme. + */ + + if (lexeme == CLOSE_PAREN) { + break; } /* - * Link orphan as left operand of new node. + * Link complete tree as left operand of new node. */ nodePtr->lexeme = lexeme; - nodePtr->right = -1; - nodePtr->left = lastWas; - if (lastWas < 0) { - nodePtr->parent = nodePtr - nodes - 1; + nodePtr->precedence = precedence; + nodePtr->mark = MARK_LEFT; + nodePtr->left = complete; + + /* + * The COMMA operator cannot be optimized, since the function + * needs all of its arguments, and optimization would reduce the + * number. Other binary operators root constant expressions when + * both arguments are constant expressions. + */ + + nodePtr->constant = (lexeme != COMMA); + + if (IsOperator(complete)) { + nodes[complete].p.parent = nodesUsed; + nodePtr->constant = nodePtr->constant + && nodes[complete].constant; } else { - nodePtr->parent = nodes[lastWas].parent; - nodes[lastWas].parent = nodePtr - nodes; + nodePtr->constant = nodePtr->constant + && (complete == OT_LITERAL); } - lastWas = nodesUsed; + + /* + * With a left operand attached and a right operand missing, the + * just-parsed binary operator is root of a new incomplete tree. + * Push it onto the stack of incomplete trees. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; nodesUsed++; break; - } - } + } /* case BINARY */ + } /* lexeme handler */ + /* Advance past the just-parsed lexeme */ start += scanned; numBytes -= scanned; + } /* main parsing loop */ + + /* + * We only get here if there's been an error. Any errors that didn't get a + * suitable parsePtr->errorType, get recorded as syntax errors. + */ + + error: + if (parsePtr->errorType == TCL_PARSE_SUCCESS) { + parsePtr->errorType = TCL_PARSE_SYNTAX; } - if (code == TCL_OK) { - *opTreePtr = nodes; - } else if (interp == NULL) { + /* + * Free any partial parse tree we've built. + */ + + if (nodes != NULL) { + ckfree(nodes); + } + + if (interp == NULL) { + /* + * Nowhere to report an error message, so just free it. + */ + if (msg) { Tcl_DecrRefCount(msg); } } else { + /* + * Construct the complete error message. Start with the simple error + * message, pulled from the interp result if necessary... + */ + if (msg == NULL) { msg = Tcl_GetObjResult(interp); } + + /* + * Add a detailed quote from the bad expression, displaying and + * sometimes marking the precise location of the syntax error. + */ + Tcl_AppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", ((start - limit) < parsePtr->string) ? "" : "...", ((start - limit) < parsePtr->string) - ? (start - parsePtr->string) : limit - 3, + ? (int) (start - parsePtr->string) : limit - 3, ((start - limit) < parsePtr->string) ? parsePtr->string : start - limit + 3, (scanned < limit) ? scanned : limit - 3, start, (scanned < limit) ? "" : "...", insertMark ? mark : "", (start + scanned + limit > parsePtr->end) - ? parsePtr->end - (start + scanned) : limit-3, + ? (int) (parsePtr->end - start) - scanned : limit-3, start + scanned, (start + scanned + limit > parsePtr->end) ? "" : "..."); + + /* + * Next, append any postscript message. + */ + if (post != NULL) { Tcl_AppendToObj(msg, ";\n", -1); Tcl_AppendObjToObj(msg, post); Tcl_DecrRefCount(post); } Tcl_SetObjResult(interp, msg); + + /* + * Finally, place context information in the errorInfo. + */ + numBytes = parsePtr->end - parsePtr->string; Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( "\n (parsing expression \"%.*s%s\")", (numBytes < limit) ? numBytes : limit - 3, parsePtr->string, (numBytes < limit) ? "" : "...")); - } - - return code; -} -#endif - -#ifndef PARSE_DIRECT_EXPR_TOKENS -/* - *---------------------------------------------------------------------- - * - * GenerateTokensForLiteral -- - * - * Results: - * Number of bytes scanned. - * - * Side effects: - * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the - * literal. - * - *---------------------------------------------------------------------- - */ - -static int -GenerateTokensForLiteral( - CONST char *script, - int numBytes, - Tcl_Obj *litList, - int nextLiteral, - Tcl_Parse *parsePtr) -{ - int scanned, closer = 0; - CONST char *start = script; - Tcl_Token *destPtr; - unsigned char lexeme; - - /* - * Have to reparse to get pointers into source string. - */ - - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - scanned = ParseLexeme(start, numBytes-scanned, &lexeme, NULL); - if ((lexeme != NUMBER) && (lexeme != BAREWORD)) { - Tcl_Obj *literal; - CONST char *bytes; - - Tcl_ListObjIndex(NULL, litList, nextLiteral, &literal); - bytes = Tcl_GetStringFromObj(literal, &scanned); - start++; - if (memcmp(bytes, start, (size_t) scanned) == 0) { - closer = 1; - } else { - /* TODO */ - Tcl_Panic("figure this out"); + if (errCode) { + Tcl_SetErrorCode(interp, "TCL", "PARSE", "EXPR", errCode, + subErrCode, NULL); } } - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start-closer; - destPtr->size = scanned+2*closer; - destPtr->numComponents = 1; - destPtr++; - destPtr->type = TCL_TOKEN_TEXT; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - parsePtr->numTokens += 2; - - return (start + scanned + closer - script); -} - -/* - *---------------------------------------------------------------------- - * - * CopyTokens -- - * - * Results: - * Number of bytes scanned. - * - * Side effects: - * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the - * literal. - * - *---------------------------------------------------------------------- - */ - -static int -CopyTokens( - Tcl_Token *sourcePtr, - Tcl_Parse *parsePtr) -{ - int toCopy = sourcePtr->numComponents + 1; - Tcl_Token *destPtr; - - if (sourcePtr->numComponents == sourcePtr[1].numComponents + 1) { - while (parsePtr->numTokens + toCopy - 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - memcpy(destPtr, sourcePtr, (size_t) toCopy * sizeof(Tcl_Token)); - destPtr->type = TCL_TOKEN_SUB_EXPR; - parsePtr->numTokens += toCopy; - } else { - while (parsePtr->numTokens + toCopy >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - *destPtr = *sourcePtr; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->numComponents++; - destPtr++; - memcpy(destPtr, sourcePtr, (size_t) toCopy * sizeof(Tcl_Token)); - parsePtr->numTokens += toCopy + 1; - } - return toCopy; + return TCL_ERROR; } /* @@ -886,6 +1448,14 @@ CopyTokens( * * ConvertTreeToTokens -- * + * Given a string, the numBytes bytes starting at start, and an OpNode + * tree and Tcl_Token array created by passing that same string to + * ParseExpr(), this function writes into *parsePtr the sequence of + * Tcl_Tokens needed so to satisfy the historical interface provided by + * Tcl_ParseExpr(). Note that this routine exists only for the sake of + * the public Tcl_ParseExpr() routine. It is not used by Tcl itself at + * all. + * * Results: * None. * @@ -898,1002 +1468,392 @@ CopyTokens( static void ConvertTreeToTokens( - Tcl_Interp *interp, - CONST char *start, + const char *start, int numBytes, OpNode *nodes, - Tcl_Obj *litList, Tcl_Token *tokenPtr, Tcl_Parse *parsePtr) { + int subExprTokenIdx = 0; OpNode *nodePtr = nodes; - int nextLiteral = 0; - int scanned, copied, tokenIdx; - unsigned char lexeme; - Tcl_Token *destPtr; + int next = nodePtr->right; while (1) { - switch (NODE_TYPE & nodePtr->lexeme) { - case UNARY: - if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; + Tcl_Token *subExprTokenPtr; + int scanned, parentIdx; + unsigned char lexeme; - nodePtr->right = OT_NONE; - if (nodePtr->lexeme != START) { - /* - * Find operator in string. - */ + /* + * Advance the mark so the next exit from this node won't retrace + * steps over ground already covered. + */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - if (lexeme != nodePtr->lexeme) { - if (lexeme != (nodePtr->lexeme & ~NODE_TYPE)) { - Tcl_Panic("lexeme mismatch"); - } - } - if (nodePtr->lexeme != OPEN_PAREN) { - if (parsePtr->numTokens + 1 - >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - nodePtr->right = OT_NONE - parsePtr->numTokens; - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start; - destPtr++; - destPtr->type = TCL_TOKEN_OPERATOR; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - parsePtr->numTokens += 2; - } - start +=scanned; - numBytes -= scanned; - } - switch (right) { - case OT_EMPTY: - break; - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - litList, nextLiteral++, parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + right; - } - } else { - if (nodePtr->lexeme == START) { - /* - * We're done. - */ + nodePtr->mark++; - return; - } - if (nodePtr->lexeme == OPEN_PAREN) { - /* - * Skip past matching close paren. - */ + /* + * Handle next child node or leaf. + */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - start +=scanned; - numBytes -= scanned; - } else { - tokenIdx = OT_NONE - nodePtr->right; - nodePtr->right = OT_NONE; - destPtr = parsePtr->tokenPtr + tokenIdx; - destPtr->size = start - destPtr->start; - destPtr->numComponents = parsePtr->numTokens - tokenIdx - 1; - } - nodePtr = nodes + nodePtr->parent; - } + switch (next) { + case OT_EMPTY: + + /* No tokens and no characters for the OT_EMPTY leaf. */ break; - case BINARY: - if (nodePtr->left > OT_NONE) { - int left = nodePtr->left; - nodePtr->left = OT_NONE; - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - nodePtr->left = OT_NONE - parsePtr->numTokens; - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = start; - destPtr++; - destPtr->type = TCL_TOKEN_OPERATOR; - parsePtr->numTokens += 2; - } - switch (left) { - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - litList, nextLiteral++, parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + left; - } - } else if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; + case OT_LITERAL: - nodePtr->right = OT_NONE; - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - if (lexeme != nodePtr->lexeme) { - if (lexeme != (nodePtr->lexeme & ~NODE_TYPE)) { - Tcl_Panic("lexeme mismatch"); - } - } + /* + * Skip any white space that comes before the literal. + */ - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - tokenIdx = OT_NONE - nodePtr->left; - destPtr = parsePtr->tokenPtr + tokenIdx + 1; - destPtr->start = start; - destPtr->size = scanned; - destPtr->numComponents = 0; - } - start +=scanned; - numBytes -= scanned; - switch (right) { - case OT_LITERAL: - scanned = GenerateTokensForLiteral(start, numBytes, - litList, nextLiteral++, parsePtr); - start +=scanned; - numBytes -= scanned; - break; - case OT_TOKENS: - copied = CopyTokens(tokenPtr, parsePtr); - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += copied; - break; - default: - nodePtr = nodes + right; - } + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + /* + * Reparse the literal to get pointers into source string. + */ + + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->start = start; + subExprTokenPtr->size = scanned; + subExprTokenPtr->numComponents = 1; + subExprTokenPtr[1].type = TCL_TOKEN_TEXT; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + subExprTokenPtr[1].numComponents = 0; + + parsePtr->numTokens += 2; + start += scanned; + numBytes -= scanned; + break; + + case OT_TOKENS: { + /* + * tokenPtr points to a token sequence that came from parsing a + * Tcl word. A Tcl word is made up of a sequence of one or more + * elements. When the word is only a single element, it's been the + * historical practice to replace the TCL_TOKEN_WORD token + * directly with a TCL_TOKEN_SUB_EXPR token. However, when the + * word has multiple elements, a TCL_TOKEN_WORD token is kept as a + * grouping device so that TCL_TOKEN_SUB_EXPR always has only one + * element. Wise or not, these are the rules the Tcl expr parser + * has followed, and for the sake of those few callers of + * Tcl_ParseExpr() we do not change them now. Internally, we can + * do better. + */ + + int toCopy = tokenPtr->numComponents + 1; + + if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { + /* + * Single element word. Copy tokens and convert the leading + * token to TCL_TOKEN_SUB_EXPR. + */ + + TclGrowParseTokenArray(parsePtr, toCopy); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + parsePtr->numTokens += toCopy; } else { - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - tokenIdx = OT_NONE - nodePtr->left; - nodePtr->left = OT_NONE; - destPtr = parsePtr->tokenPtr + tokenIdx; - destPtr->size = start - destPtr->start; - destPtr->numComponents = parsePtr->numTokens-tokenIdx-1; - } - nodePtr = nodes + nodePtr->parent; + /* + * Multiple element word. Create a TCL_TOKEN_SUB_EXPR token to + * lead, with fields initialized from the leading token, then + * copy entire set of word tokens. + */ + + TclGrowParseTokenArray(parsePtr, toCopy+1); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *subExprTokenPtr = *tokenPtr; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->numComponents++; + subExprTokenPtr++; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + parsePtr->numTokens += toCopy + 1; } + + scanned = tokenPtr->start + tokenPtr->size - start; + start += scanned; + numBytes -= scanned; + tokenPtr += toCopy; break; } - } -} -#endif - -/* - *---------------------------------------------------------------------- - * - * Tcl_ParseExpr -- - * - * Given a string, the numBytes bytes starting at start, this function - * parses it as a Tcl expression and stores information about the - * structure of the expression in the Tcl_Parse struct indicated by the - * caller. - * - * Results: - * If the string is successfully parsed as a valid Tcl expression, TCL_OK - * is returned, and data about the expression structure is written to - * *parsePtr. If the string cannot be parsed as a valid Tcl expression, - * TCL_ERROR is returned, and if interp is non-NULL, an error message is - * written to interp. - * - * Side effects: - * If there is insufficient space in parsePtr to hold all the information - * about the expression, then additional space is malloc-ed. If the - * function returns TCL_OK then the caller must eventually invoke - * Tcl_FreeParse to release any additional space that was allocated. - * - *---------------------------------------------------------------------- - */ -int -Tcl_ParseExpr( - Tcl_Interp *interp, /* Used for error reporting. */ - CONST char *start, /* Start of source string to parse. */ - int numBytes, /* Number of bytes in string. If < 0, the - * string consists of all bytes up to the - * first null character. */ - Tcl_Parse *parsePtr) /* Structure to fill with information about - * the parsed expression; any previous - * information in the structure is ignored. */ -{ -#ifndef PARSE_DIRECT_EXPR_TOKENS - OpNode *opTree = NULL; /* Will point to the tree of operators */ - Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ - Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse parse; /* Holds the Tcl_Tokens of substitutions */ + default: - int code = ParseExpr(interp, start, numBytes, &opTree, litList, - funcList, &parse); + /* + * Advance to the child node, which is an operator. + */ - if (numBytes < 0) { - numBytes = (start ? strlen(start) : 0); - } + nodePtr = nodes + next; - TclParseInit(interp, start, numBytes, parsePtr); - if (code == TCL_OK) { - ConvertTreeToTokens(interp, start, numBytes, opTree, litList, - parse.tokenPtr, parsePtr); - } else { - /* TODO: copy over any error info to *parsePtr */ - } + /* + * Skip any white space that comes before the subexpression. + */ - Tcl_FreeParse(&parse); - Tcl_DecrRefCount(funcList); - Tcl_DecrRefCount(litList); - ckfree((char *) opTree); - return code; -#else -#define NUM_STATIC_NODES 64 - ExprNode staticNodes[NUM_STATIC_NODES]; - ExprNode *lastOrphanPtr, *nodes = staticNodes; - int nodesAvailable = NUM_STATIC_NODES; - int nodesUsed = 0; - Tcl_Parse scratch; /* Parsing scratch space */ - Tcl_Obj *msg = NULL, *post = NULL; - int scanned = 0, code = TCL_OK, insertMark = 0; - CONST char *mark = "_@_"; - CONST int limit = 25; - static CONST unsigned char prec[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 15, 15, 5, 16, 16, 16, 13, 13, 11, 10, 9, 6, 6, 14, 14, - 13, 13, 12, 12, 8, 7, 12, 12, 17, 12, 12, 3, 1, 0, 0, 0, - 0, 18, 18, 18, 2, 4, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, - }; + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; - if (numBytes < 0) { - numBytes = (start ? strlen(start) : 0); - } + /* + * Generate tokens for the operator / subexpression... + */ - TclParseInit(interp, start, numBytes, &scratch); - TclParseInit(interp, start, numBytes, parsePtr); + switch (nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: - /* - * Initialize the parse tree with the special "START" node. - */ + /* + * Historical practice has been to have no Tcl_Tokens for + * these operators. + */ - nodes->lexeme = START; - nodes->left = -1; - nodes->right = -1; - nodes->parent = -1; - nodes->token = -1; - lastOrphanPtr = nodes; - nodesUsed++; + break; - while ((code == TCL_OK) && (lastOrphanPtr->lexeme != END)) { - ExprNode *nodePtr, *lastNodePtr; - Tcl_Token *tokenPtr; + default: { - /* - * Each pass through this loop adds one more ExprNode. Allocate space - * for one if required. - */ + /* + * Remember the index of the last subexpression we were + * working on -- that of our parent. We'll stack it later. + */ - if (nodesUsed >= nodesAvailable) { - int lastOrphanIdx = lastOrphanPtr - nodes; - int size = nodesUsed * 2; - ExprNode *newPtr; + parentIdx = subExprTokenIdx; - if (nodes == staticNodes) { - nodes = NULL; - } - do { - newPtr = (ExprNode *) attemptckrealloc((char *) nodes, - (unsigned int) size * sizeof(ExprNode)); - } while ((newPtr == NULL) - && ((size -= (size - nodesUsed) / 2) > nodesUsed)); - if (newPtr == NULL) { - msg = Tcl_NewStringObj( - "not enough memory to parse expression", -1); - code = TCL_ERROR; - continue; - } - nodesAvailable = size; - if (nodes == NULL) { - memcpy(newPtr, staticNodes, - (size_t) nodesUsed * sizeof(ExprNode)); - } - nodes = newPtr; - lastOrphanPtr = nodes + lastOrphanIdx; - } - nodePtr = nodes + nodesUsed; - lastNodePtr = nodePtr - 1; + /* + * Verify space for the two leading Tcl_Tokens representing + * the subexpression rooted by this operator. The first + * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second of + * type TCL_TOKEN_OPERATOR. + */ - /* - * Skip white space between lexemes. - */ + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenIdx = parsePtr->numTokens; + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + parsePtr->numTokens += 2; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr[1].type = TCL_TOKEN_OPERATOR; - scanned = TclParseAllWhiteSpace(start, numBytes); - start += scanned; - numBytes -= scanned; + /* + * Our current position scanning the string is the starting + * point for this subexpression. + */ - scanned = ParseLexeme(start, numBytes, &(nodePtr->lexeme), NULL); + subExprTokenPtr->start = start; - /* - * Use context to categorize the lexemes that are ambiguous. - */ + /* + * Eventually, we know that the numComponents field of the + * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means + * we can make other use of this field for now to track the + * stack of subexpressions we have pending. + */ - if ((NODE_TYPE & nodePtr->lexeme) == 0) { - switch (nodePtr->lexeme) { - case INVALID: - msg = Tcl_ObjPrintf( - "invalid character \"%.*s\"", scanned, start); - code = TCL_ERROR; - continue; - case INCOMPLETE: - msg = Tcl_ObjPrintf( - "incomplete operator \"%.*s\"", scanned, start); - code = TCL_ERROR; - continue; - case BAREWORD: - if (start[scanned+TclParseAllWhiteSpace( - start+scanned, numBytes-scanned)] == '(') { - nodePtr->lexeme = FUNCTION; - } else { - Tcl_Obj *objPtr = Tcl_NewStringObj(start, scanned); - Tcl_IncrRefCount(objPtr); - code = Tcl_ConvertToType(NULL, objPtr, &tclBooleanType); - Tcl_DecrRefCount(objPtr); - if (code == TCL_OK) { - nodePtr->lexeme = BOOLEAN; - } else { - msg = Tcl_ObjPrintf( - "invalid bareword \"%.*s%s\"", - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "..."); - post = Tcl_ObjPrintf( - "should be \"$%.*s%s\" or \"{%.*s%s}\"", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - Tcl_AppendPrintfToObj(post, - " or \"%.*s%s(...)\" or ...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - continue; - } - } + subExprTokenPtr[1].numComponents = parentIdx; break; - case PLUS: - case MINUS: - if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { - nodePtr->lexeme |= BINARY; - } else { - nodePtr->lexeme |= UNARY; - } } + } + break; } - /* - * Add node to parse tree based on category. - */ + /* Determine which way to exit the node on this pass. */ + router: + switch (nodePtr->mark) { + case MARK_LEFT: + next = nodePtr->left; + break; - switch (NODE_TYPE & nodePtr->lexeme) { - case LEAF: { - CONST char *end; + case MARK_RIGHT: + next = nodePtr->right; - if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { - CONST char *operand = - scratch.tokenPtr[lastNodePtr->token].start; + /* + * Skip any white space that comes before the operator. + */ - msg = Tcl_ObjPrintf("missing operator at %s", mark); - if (operand[0] == '0') { - Tcl_Obj *copy = Tcl_NewStringObj(operand, - start + scanned - operand); - if (TclCheckBadOctal(NULL, Tcl_GetString(copy))) { - post = Tcl_NewStringObj( - "looks like invalid octal number", -1); - } - Tcl_DecrRefCount(copy); - } - scanned = 0; - insertMark = 1; - code = TCL_ERROR; - continue; - } + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; - if (scratch.numTokens+1 >= scratch.tokensAvailable) { - TclExpandTokenArray(&scratch); - } - nodePtr->token = scratch.numTokens; - tokenPtr = scratch.tokenPtr + nodePtr->token; - tokenPtr->type = TCL_TOKEN_SUB_EXPR; - tokenPtr->start = start; - scratch.numTokens++; + /* + * Here we scan from the string the operator corresponding to + * nodePtr->lexeme. + */ - switch (nodePtr->lexeme) { - case NUMBER: - case BOOLEAN: - tokenPtr = scratch.tokenPtr + scratch.numTokens; - tokenPtr->type = TCL_TOKEN_TEXT; - tokenPtr->start = start; - tokenPtr->size = scanned; - tokenPtr->numComponents = 0; - scratch.numTokens++; + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - break; + switch(nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: - case QUOTED: - code = Tcl_ParseQuotedString(interp, start, numBytes, - &scratch, 1, &end); - if (code != TCL_OK) { - scanned = scratch.term - start; - scanned += (scanned < numBytes); - continue; - } - scanned = end - start; - break; - - case BRACED: - code = Tcl_ParseBraces(interp, start, numBytes, - &scratch, 1, &end); - if (code != TCL_OK) { - continue; - } - scanned = end - start; - break; + /* + * No tokens for these lexemes -> nothing to do. + */ - case VARIABLE: - code = Tcl_ParseVarName(interp, start, numBytes, &scratch, 1); - if (code != TCL_OK) { - scanned = scratch.term - start; - scanned += (scanned < numBytes); - continue; - } - tokenPtr = scratch.tokenPtr + nodePtr->token + 1; - if (tokenPtr->type != TCL_TOKEN_VARIABLE) { - msg = Tcl_NewStringObj("invalid character \"$\"", -1); - code = TCL_ERROR; - continue; - } - scanned = tokenPtr->size; break; - case SCRIPT: - tokenPtr = scratch.tokenPtr + scratch.numTokens; - tokenPtr->type = TCL_TOKEN_COMMAND; - tokenPtr->start = start; - tokenPtr->numComponents = 0; + default: - end = start + numBytes; - start++; - while (1) { - Tcl_Parse nested; - code = Tcl_ParseCommand(interp, - start, (end - start), 1, &nested); - if (code != TCL_OK) { - parsePtr->term = nested.term; - parsePtr->errorType = nested.errorType; - parsePtr->incomplete = nested.incomplete; - break; - } - start = (nested.commandStart + nested.commandSize); - Tcl_FreeParse(&nested); - if ((nested.term < end) && (*nested.term == ']') - && !nested.incomplete) { - break; - } + /* + * Record in the TCL_TOKEN_OPERATOR token the pointers into + * the string marking where the operator is. + */ - if (start == end) { - msg = Tcl_NewStringObj("missing close-bracket", -1); - parsePtr->term = tokenPtr->start; - parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; - parsePtr->incomplete = 1; - code = TCL_ERROR; - break; - } - } - end = start; - start = tokenPtr->start; - if (code != TCL_OK) { - scanned = parsePtr->term - start; - scanned += (scanned < numBytes); - continue; - } - scanned = end - start; - tokenPtr->size = scanned; - scratch.numTokens++; + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; break; } - tokenPtr = scratch.tokenPtr + nodePtr->token; - tokenPtr->size = scanned; - tokenPtr->numComponents = scratch.numTokens - nodePtr->token - 1; - - nodePtr->left = -1; - nodePtr->right = -1; - nodePtr->parent = -1; - lastOrphanPtr = nodePtr; - nodesUsed++; - break; - } - - case UNARY: - if ((NODE_TYPE & lastNodePtr->lexeme) == LEAF) { - msg = Tcl_ObjPrintf("missing operator at %s", mark); - scanned = 0; - insertMark = 1; - code = TCL_ERROR; - continue; - } - nodePtr->left = -1; - nodePtr->right = -1; - nodePtr->parent = -1; - - if (scratch.numTokens >= scratch.tokensAvailable) { - TclExpandTokenArray(&scratch); - } - nodePtr->token = scratch.numTokens; - tokenPtr = scratch.tokenPtr + nodePtr->token; - tokenPtr->type = TCL_TOKEN_OPERATOR; - tokenPtr->start = start; - tokenPtr->size = scanned; - tokenPtr->numComponents = 0; - scratch.numTokens++; - - lastOrphanPtr = nodePtr; - nodesUsed++; + start += scanned; + numBytes -= scanned; break; - case BINARY: { - ExprNode *otherPtr = NULL; - unsigned char precedence = prec[nodePtr->lexeme]; - - if ((nodePtr->lexeme == CLOSE_PAREN) - && (lastNodePtr->lexeme == OPEN_PAREN)) { - if (lastNodePtr[-1].lexeme == FUNCTION) { - /* - * Normally, "()" is a syntax error, but as a special case - * accept it as an argument list for a function. - */ - - scanned = 0; - nodePtr->lexeme = EMPTY; - nodePtr->left = -1; - nodePtr->right = -1; - nodePtr->parent = -1; - nodePtr->token = -1; - - lastOrphanPtr = nodePtr; - nodesUsed++; - break; - - } - msg = Tcl_ObjPrintf("empty subexpression at %s", mark); - scanned = 0; - insertMark = 1; - code = TCL_ERROR; - continue; - } - + case MARK_PARENT: + switch (nodePtr->lexeme) { + case START: - if ((NODE_TYPE & lastNodePtr->lexeme) != LEAF) { - if (prec[lastNodePtr->lexeme] > precedence) { - if (lastNodePtr->lexeme == OPEN_PAREN) { - msg = Tcl_NewStringObj("unbalanced open paren", -1); - } else if (lastNodePtr->lexeme == COMMA) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; - } else if (lastNodePtr->lexeme == START) { - msg = Tcl_NewStringObj("empty expression", -1); - } - } else if (nodePtr->lexeme == CLOSE_PAREN) { - msg = Tcl_NewStringObj("unbalanced close paren", -1); - } else if ((nodePtr->lexeme == COMMA) - && (lastNodePtr->lexeme == OPEN_PAREN) - && (lastNodePtr[-1].lexeme == FUNCTION)) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; - } - if (msg == NULL) { - msg = Tcl_ObjPrintf("missing operand at %s", mark); - scanned = 0; - insertMark = 1; - } - code = TCL_ERROR; - continue; - } + /* When we get back to the START node, we're done. */ + return; - while (1) { - if (lastOrphanPtr->parent >= 0) { - otherPtr = nodes + lastOrphanPtr->parent; - } else if (lastOrphanPtr->left >= 0) { - Tcl_Panic("Tcl_ParseExpr: left closure programming error"); - } else { - lastOrphanPtr->parent = lastOrphanPtr - nodes; - otherPtr = lastOrphanPtr; - } - otherPtr--; + case COMMA: + case COLON: - if (prec[otherPtr->lexeme] < precedence) { - break; - } + /* No tokens for these lexemes -> nothing to do. */ + break; - if (prec[otherPtr->lexeme] == precedence) { - /* - * Special association rules for the ternary operators. - */ + case OPEN_PAREN: - if ((otherPtr->lexeme == QUESTION) - && (lastOrphanPtr->lexeme != COLON)) { - break; - } - if ((otherPtr->lexeme == COLON) - && (nodePtr->lexeme == QUESTION)) { - break; - } + /* + * Skip past matching close paren. + */ - /* - * Right association rules for exponentiation. - */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + start += scanned; + numBytes -= scanned; + break; - if (nodePtr->lexeme == EXPON) { - break; - } - } + default: /* - * Some checks before linking. + * Before we leave this node/operator/subexpression for the + * last time, finish up its tokens.... + * + * Our current position scanning the string is where the + * substring for the subexpression ends. */ - if ((otherPtr->lexeme == OPEN_PAREN) - && (nodePtr->lexeme != CLOSE_PAREN)) { - lastOrphanPtr = otherPtr; - msg = Tcl_NewStringObj("unbalanced open paren", -1); - code = TCL_ERROR; - break; - } - if ((otherPtr->lexeme == QUESTION) - && (lastOrphanPtr->lexeme != COLON)) { - msg = Tcl_ObjPrintf( - "missing operator \":\" at %s", mark); - scanned = 0; - insertMark = 1; - code = TCL_ERROR; - break; - } - if ((lastOrphanPtr->lexeme == COLON) - && (otherPtr->lexeme != QUESTION)) { - msg = Tcl_NewStringObj( - "unexpected operator \":\" without preceding \"?\"", - -1); - code = TCL_ERROR; - break; - } + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr->size = start - subExprTokenPtr->start; /* - * Link orphan as right operand of otherPtr. + * All the Tcl_Tokens allocated and filled belong to + * this subexpresion. The first token is the leading + * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) + * are its components. */ - otherPtr->right = lastOrphanPtr - nodes; - lastOrphanPtr->parent = otherPtr - nodes; - lastOrphanPtr = otherPtr; - - if (otherPtr->lexeme == OPEN_PAREN) { - /* - * CLOSE_PAREN can only close one OPEN_PAREN. - */ - - tokenPtr = scratch.tokenPtr + otherPtr->token; - tokenPtr->size = start + scanned - tokenPtr->start; - break; - } - if (otherPtr->lexeme == START) { - /* - * Don't backtrack beyond the start. - */ - - break; - } - } - if (code != TCL_OK) { - continue; - } - - if (nodePtr->lexeme == CLOSE_PAREN) { - if (otherPtr->lexeme == START) { - msg = Tcl_NewStringObj("unbalanced close paren", -1); - code = TCL_ERROR; - continue; - } + subExprTokenPtr->numComponents = + (parsePtr->numTokens - subExprTokenIdx) - 1; /* - * Create no node for a CLOSE_PAREN lexeme. + * Finally, as we return up the tree to our parent, pop the + * parent subexpression off our subexpression stack, and + * fill in the zero numComponents for the operator Tcl_Token. */ + parentIdx = subExprTokenPtr[1].numComponents; + subExprTokenPtr[1].numComponents = 0; + subExprTokenIdx = parentIdx; break; } - if ((nodePtr->lexeme == COMMA) && ((otherPtr->lexeme != OPEN_PAREN) - || (otherPtr[-1].lexeme != FUNCTION))) { - msg = Tcl_NewStringObj( - "unexpected \",\" outside function argument list", -1); - code = TCL_ERROR; - continue; - } - - if (lastOrphanPtr->lexeme == COLON) { - msg = Tcl_NewStringObj( - "unexpected operator \":\" without preceding \"?\"", - -1); - code = TCL_ERROR; - continue; - } - /* - * Link orphan as left operand of new node. + * Since we're returning to parent, skip child handling code. */ - nodePtr->right = -1; - - if (scratch.numTokens >= scratch.tokensAvailable) { - TclExpandTokenArray(&scratch); - } - nodePtr->token = scratch.numTokens; - tokenPtr = scratch.tokenPtr + nodePtr->token; - tokenPtr->type = TCL_TOKEN_OPERATOR; - tokenPtr->start = start; - tokenPtr->size = scanned; - tokenPtr->numComponents = 0; - scratch.numTokens++; - - nodePtr->left = lastOrphanPtr - nodes; - nodePtr->parent = lastOrphanPtr->parent; - lastOrphanPtr->parent = nodePtr - nodes; - lastOrphanPtr = nodePtr; - nodesUsed++; - break; + nodePtr = nodes + nodePtr->p.parent; + goto router; } - } - - start += scanned; - numBytes -= scanned; } - - if (code == TCL_OK) { - /* - * Shift tokens from scratch space to caller space. - */ - - GenerateTokens(nodes, &scratch, parsePtr); - } else { - if (parsePtr->errorType == TCL_PARSE_SUCCESS) { - parsePtr->errorType = TCL_PARSE_SYNTAX; - parsePtr->term = start; - } - if (interp == NULL) { - if (msg) { - Tcl_DecrRefCount(msg); - } - } else { - if (msg == NULL) { - msg = Tcl_GetObjResult(interp); - } - Tcl_AppendPrintfToObj(msg, - "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", - ((start - limit) < scratch.string) ? "" : "...", - ((start - limit) < scratch.string) - ? (start - scratch.string) : limit - 3, - ((start - limit) < scratch.string) - ? scratch.string : start - limit + 3, - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "...", - insertMark ? mark : "", - (start + scanned + limit > scratch.end) - ? scratch.end - (start + scanned) : limit-3, - start + scanned, - (start + scanned + limit > scratch.end) ? "" : "..."); - if (post != NULL) { - Tcl_AppendToObj(msg, ";\n", -1); - Tcl_AppendObjToObj(msg, post); - Tcl_DecrRefCount(post); - } - Tcl_SetObjResult(interp, msg); - numBytes = scratch.end - scratch.string; - Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( - "\n (parsing expression \"%.*s%s\")", - (numBytes < limit) ? numBytes : limit - 3, - scratch.string, (numBytes < limit) ? "" : "...")); - } - } - - if (nodes != staticNodes) { - ckfree((char *)nodes); - } - Tcl_FreeParse(&scratch); - return code; -#endif } -#ifdef PARSE_DIRECT_EXPR_TOKENS /* *---------------------------------------------------------------------- * - * GenerateTokens -- + * Tcl_ParseExpr -- + * + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and stores information about the + * structure of the expression in the Tcl_Parse struct indicated by the + * caller. + * + * Results: + * If the string is successfully parsed as a valid Tcl expression, TCL_OK + * is returned, and data about the expression structure is written to + * *parsePtr. If the string cannot be parsed as a valid Tcl expression, + * TCL_ERROR is returned, and if interp is non-NULL, an error message is + * written to interp. * - * Routine that generates Tcl_Tokens that represent a Tcl expression and - * writes them to *parsePtr. The parse tree of the expression is in the - * array of ExprNodes, nodes. Some of the Tcl_Tokens are copied from - * scratch space at *scratchPtr, where the parsing pass that constructed - * the parse tree left them. + * Side effects: + * If there is insufficient space in parsePtr to hold all the information + * about the expression, then additional space is malloc-ed. If the + * function returns TCL_OK then the caller must eventually invoke + * Tcl_FreeParse to release any additional space that was allocated. * *---------------------------------------------------------------------- */ -static void -GenerateTokens( - ExprNode *nodes, - Tcl_Parse *scratchPtr, - Tcl_Parse *parsePtr) +int +Tcl_ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. If < 0, the + * string consists of all bytes up to the + * first null character. */ + Tcl_Parse *parsePtr) /* Structure to fill with information about + * the parsed expression; any previous + * information in the structure is ignored. */ { - ExprNode *nodePtr = nodes + nodes->right; - Tcl_Token *sourcePtr, *destPtr, *tokenPtr = scratchPtr->tokenPtr; - int toCopy; - CONST char *end = tokenPtr->start + tokenPtr->size; - - while (nodePtr->lexeme != START) { - switch (NODE_TYPE & nodePtr->lexeme) { - case BINARY: - if (nodePtr->left >= 0) { - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - sourcePtr = scratchPtr->tokenPtr + nodePtr->token; - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - nodePtr->token = parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = tokenPtr->start; - destPtr++; - *destPtr = *sourcePtr; - parsePtr->numTokens += 2; - } - nodePtr = nodes + nodePtr->left; - nodes[nodePtr->parent].left = -1; - } else if (nodePtr->right >= 0) { - tokenPtr += tokenPtr->numComponents + 1; - nodePtr = nodes + nodePtr->right; - nodes[nodePtr->parent].right = -1; - } else { - if ((nodePtr->lexeme != COMMA) && (nodePtr->lexeme != COLON)) { - destPtr = parsePtr->tokenPtr + nodePtr->token; - destPtr->size = end - destPtr->start; - destPtr->numComponents = - parsePtr->numTokens - nodePtr->token - 1; - } - nodePtr = nodes + nodePtr->parent; - } - break; - - case UNARY: - if (nodePtr->right >= 0) { - sourcePtr = scratchPtr->tokenPtr + nodePtr->token; - if (nodePtr->lexeme != OPEN_PAREN) { - if (parsePtr->numTokens + 1 >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - nodePtr->token = parsePtr->numTokens; - destPtr->type = TCL_TOKEN_SUB_EXPR; - destPtr->start = tokenPtr->start; - destPtr++; - *destPtr = *sourcePtr; - parsePtr->numTokens += 2; - } - if (tokenPtr == sourcePtr) { - tokenPtr += tokenPtr->numComponents + 1; - } - nodePtr = nodes + nodePtr->right; - nodes[nodePtr->parent].right = -1; - } else { - if (nodePtr->lexeme != OPEN_PAREN) { - destPtr = parsePtr->tokenPtr + nodePtr->token; - destPtr->size = end - destPtr->start; - destPtr->numComponents = - parsePtr->numTokens - nodePtr->token - 1; - } else { - sourcePtr = scratchPtr->tokenPtr + nodePtr->token; - end = sourcePtr->start + sourcePtr->size; - } - nodePtr = nodes + nodePtr->parent; - } - break; - - case LEAF: - switch (nodePtr->lexeme) { - case EMPTY: - break; - - case BRACED: - case QUOTED: - sourcePtr = scratchPtr->tokenPtr + nodePtr->token; - end = sourcePtr->start + sourcePtr->size; - if (sourcePtr->numComponents > 1) { - toCopy = sourcePtr->numComponents; - if (tokenPtr == sourcePtr) { - tokenPtr += toCopy + 1; - } - sourcePtr->numComponents++; - while (parsePtr->numTokens + toCopy + 1 - >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - *destPtr++ = *sourcePtr; - *destPtr = *sourcePtr++; - destPtr->type = TCL_TOKEN_WORD; - destPtr->numComponents = toCopy; - destPtr++; - memcpy((VOID *) destPtr, (VOID *) sourcePtr, - (size_t) (toCopy * sizeof(Tcl_Token))); - parsePtr->numTokens += toCopy + 2; - break; - } + int code; + OpNode *opTree = NULL; /* Will point to the tree of operators. */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals. */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names. */ + Tcl_Parse *exprParsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions. */ - default: - sourcePtr = scratchPtr->tokenPtr + nodePtr->token; - end = sourcePtr->start + sourcePtr->size; - toCopy = sourcePtr->numComponents + 1; - if (tokenPtr == sourcePtr) { - tokenPtr += toCopy; - } - while (parsePtr->numTokens + toCopy - 1 - >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } - destPtr = parsePtr->tokenPtr + parsePtr->numTokens; - memcpy((VOID *) destPtr, (VOID *) sourcePtr, - (size_t) (toCopy * sizeof(Tcl_Token))); - parsePtr->numTokens += toCopy; - break; + if (numBytes < 0) { + numBytes = (start ? strlen(start) : 0); + } - } - nodePtr = nodes + nodePtr->parent; - break; + code = ParseExpr(interp, start, numBytes, &opTree, litList, funcList, + exprParsePtr, 1 /* parseOnly */); + Tcl_DecrRefCount(funcList); + Tcl_DecrRefCount(litList); - } + TclParseInit(interp, start, numBytes, parsePtr); + if (code == TCL_OK) { + ConvertTreeToTokens(start, numBytes, + opTree, exprParsePtr->tokenPtr, parsePtr); + } else { + parsePtr->term = exprParsePtr->term; + parsePtr->errorType = exprParsePtr->errorType; } + + Tcl_FreeParse(exprParsePtr); + TclStackFree(interp, exprParsePtr); + ckfree(opTree); + return code; } -#endif /* *---------------------------------------------------------------------- @@ -1914,83 +1874,29 @@ GenerateTokens( static int ParseLexeme( - CONST char *start, /* Start of lexeme to parse. */ + const char *start, /* Start of lexeme to parse. */ int numBytes, /* Number of bytes in string. */ unsigned char *lexemePtr, /* Write code of parsed lexeme to this * storage. */ Tcl_Obj **literalPtr) /* Write corresponding literal value to this storage, if non-NULL. */ { - CONST char *end; + const char *end; int scanned; Tcl_UniChar ch; Tcl_Obj *literal = NULL; + unsigned char byte; if (numBytes == 0) { *lexemePtr = END; return 0; } - switch (*start) { - case '[': - *lexemePtr = SCRIPT; - return 1; - - case '{': - *lexemePtr = BRACED; - return 1; - - case '(': - *lexemePtr = OPEN_PAREN; - return 1; - - case ')': - *lexemePtr = CLOSE_PAREN; - return 1; - - case '$': - *lexemePtr = VARIABLE; - return 1; - - case '\"': - *lexemePtr = QUOTED; - return 1; - - case ',': - *lexemePtr = COMMA; - return 1; - - case '/': - *lexemePtr = DIVIDE; + byte = UCHAR(*start); + if (byte < sizeof(Lexeme) && Lexeme[byte] != 0) { + *lexemePtr = Lexeme[byte]; return 1; - - case '%': - *lexemePtr = MOD; - return 1; - - case '+': - *lexemePtr = PLUS; - return 1; - - case '-': - *lexemePtr = MINUS; - return 1; - - case '?': - *lexemePtr = QUESTION; - return 1; - - case ':': - *lexemePtr = COLON; - return 1; - - case '^': - *lexemePtr = BIT_XOR; - return 1; - - case '~': - *lexemePtr = BIT_NOT; - return 1; - + } + switch (byte) { case '*': if ((numBytes > 1) && (start[1] == '*')) { *lexemePtr = EXPON; @@ -2063,10 +1969,11 @@ ParseLexeme( if ((numBytes > 1) && (start[1] == 'n') && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { /* - * Must make this check so we can tell the difference between - * the "in" operator and the "int" function name and the - * "infinity" numeric value. + * Must make this check so we can tell the difference between the + * "in" operator and the "int" function name and the "infinity" + * numeric value. */ + *lexemePtr = IN_LIST; return 2; } @@ -2096,25 +2003,67 @@ ParseLexeme( literal = Tcl_NewObj(); if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end, TCL_PARSE_NO_WHITESPACE) == TCL_OK) { - TclInitStringRep(literal, start, end-start); - *lexemePtr = NUMBER; - if (literalPtr) { - *literalPtr = literal; + if (end < start + numBytes && !isalnum(UCHAR(*end)) + && UCHAR(*end) != '_') { + + number: + TclInitStringRep(literal, start, end-start); + *lexemePtr = NUMBER; + if (literalPtr) { + *literalPtr = literal; + } else { + Tcl_DecrRefCount(literal); + } + return (end-start); } else { - Tcl_DecrRefCount(literal); + unsigned char lexeme; + + /* + * We have a number followed directly by bareword characters + * (alpha, digit, underscore). Is this a number followed by + * bareword syntax error? Or should we join into one bareword? + * Example: Inf + luence + () becomes a valid function call. + * [Bug 3401704] + */ + if (literal->typePtr == &tclDoubleType) { + const char *p = start; + + while (p < end) { + if (!isalnum(UCHAR(*p++))) { + /* + * The number has non-bareword characters, so we + * must treat it as a number. + */ + goto number; + } + } + } + ParseLexeme(end, numBytes-(end-start), &lexeme, NULL); + if ((NODE_TYPE & lexeme) == BINARY) { + /* + * The bareword characters following the number take the + * form of an operator (eq, ne, in, ni, ...) so we treat + * as number + operator. + */ + goto number; + } + + /* + * Otherwise, fall through and parse the whole as a bareword. + */ } - return (end-start); } if (Tcl_UtfCharComplete(start, numBytes)) { scanned = Tcl_UtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; scanned = Tcl_UtfToUniChar(utfBytes, &ch); } - if (!isalpha(UCHAR(ch))) { + if (!isalnum(UCHAR(ch))) { *lexemePtr = INVALID; Tcl_DecrRefCount(literal); return scanned; @@ -2127,6 +2076,7 @@ ParseLexeme( scanned = Tcl_UtfToUniChar(end, &ch); } else { char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, end, (size_t) numBytes); utfBytes[numBytes] = '\0'; scanned = Tcl_UtfToUniChar(utfBytes, &ch); @@ -2141,144 +2091,6 @@ ParseLexeme( } return (end-start); } - -#ifdef USE_EXPR_TOKENS -/* - * Boolean variable that controls whether expression compilation tracing is - * enabled. - */ - -#ifdef TCL_COMPILE_DEBUG -static int traceExprComp = 0; -#endif /* TCL_COMPILE_DEBUG */ - -/* - * Definitions of numeric codes representing each expression operator. The - * order of these must match the entries in the operatorTable below. Also the - * codes for the relational operators (OP_LESS, OP_GREATER, OP_LE, OP_GE, - * OP_EQ, and OP_NE) must be consecutive and in that order. Note that OP_PLUS - * and OP_MINUS represent both unary and binary operators. - */ - -#define OP_MULT 0 -#define OP_DIVIDE 1 -#define OP_MOD 2 -#define OP_PLUS 3 -#define OP_MINUS 4 -#define OP_LSHIFT 5 -#define OP_RSHIFT 6 -#define OP_LESS 7 -#define OP_GREATER 8 -#define OP_LE 9 -#define OP_GE 10 -#define OP_EQ 11 -#define OP_NEQ 12 -#define OP_BITAND 13 -#define OP_BITXOR 14 -#define OP_BITOR 15 -#define OP_LAND 16 -#define OP_LOR 17 -#define OP_QUESTY 18 -#define OP_LNOT 19 -#define OP_BITNOT 20 -#define OP_STREQ 21 -#define OP_STRNEQ 22 -#define OP_EXPON 23 -#define OP_IN_LIST 24 -#define OP_NOT_IN_LIST 25 - -/* - * Table describing the expression operators. Entries in this table must - * correspond to the definitions of numeric codes for operators just above. - */ - -static int opTableInitialized = 0; /* 0 means not yet initialized. */ - -TCL_DECLARE_MUTEX(opMutex) - -typedef struct OperatorDesc { - char *name; /* Name of the operator. */ - int numOperands; /* Number of operands. 0 if the operator - * requires special handling. */ - int instruction; /* Instruction opcode for the operator. - * Ignored if numOperands is 0. */ -} OperatorDesc; - -static OperatorDesc operatorTable[] = { - {"*", 2, INST_MULT}, - {"/", 2, INST_DIV}, - {"%", 2, INST_MOD}, - {"+", 0}, - {"-", 0}, - {"<<", 2, INST_LSHIFT}, - {">>", 2, INST_RSHIFT}, - {"<", 2, INST_LT}, - {">", 2, INST_GT}, - {"<=", 2, INST_LE}, - {">=", 2, INST_GE}, - {"==", 2, INST_EQ}, - {"!=", 2, INST_NEQ}, - {"&", 2, INST_BITAND}, - {"^", 2, INST_BITXOR}, - {"|", 2, INST_BITOR}, - {"&&", 0}, - {"||", 0}, - {"?", 0}, - {"!", 1, INST_LNOT}, - {"~", 1, INST_BITNOT}, - {"eq", 2, INST_STR_EQ}, - {"ne", 2, INST_STR_NEQ}, - {"**", 2, INST_EXPON}, - {"in", 2, INST_LIST_IN}, - {"ni", 2, INST_LIST_NOT_IN}, - {NULL} -}; - -/* - * Hashtable used to map the names of expression operators to the index of - * their OperatorDesc description. - */ - -static Tcl_HashTable opHashTable; - -#endif /* USE_EXPR_TOKENS */ - -/* - * Declarations for local procedures to this file: - */ - -#ifdef USE_EXPR_TOKENS -static void CompileCondExpr(Tcl_Interp *interp, - Tcl_Token *exprTokenPtr, int *convertPtr, - CompileEnv *envPtr); -static void CompileLandOrLorExpr(Tcl_Interp *interp, - Tcl_Token *exprTokenPtr, int opIndex, - CompileEnv *envPtr); -static void CompileMathFuncCall(Tcl_Interp *interp, - Tcl_Token *exprTokenPtr, CONST char *funcName, - CompileEnv *envPtr); -static void CompileSubExpr(Tcl_Interp *interp, - Tcl_Token *exprTokenPtr, int *convertPtr, - CompileEnv *envPtr); -#endif /* USE_EXPR_TOKENS */ -static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, - Tcl_Obj *const litObjv[], Tcl_Obj *funcList, - Tcl_Token *tokenPtr, int *convertPtr, - CompileEnv *envPtr); - -/* - * Macro used to debug the execution of the expression compiler. - */ - -#ifdef TCL_COMPILE_DEBUG -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) \ - if (traceExprComp) { \ - fprintf(stderr, "CompileSubExpr: \"%.*s\", token \"%.*s\"\n", \ - (exprLength), (exprBytes), (tokenLength), (tokenBytes)); \ - } -#else -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) -#endif /* TCL_COMPILE_DEBUG */ /* *---------------------------------------------------------------------- @@ -2286,15 +2098,10 @@ static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, * TclCompileExpr -- * * This procedure compiles a string containing a Tcl expression into Tcl - * bytecodes. This procedure is the top-level interface to the the - * expression compilation module, and is used by such public procedures - * as Tcl_ExprString, Tcl_ExprStringObj, Tcl_ExprLong, Tcl_ExprDouble, - * Tcl_ExprBoolean, and Tcl_ExprBooleanObj. + * bytecodes. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. @@ -2302,403 +2109,427 @@ static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, *---------------------------------------------------------------------- */ -int +void TclCompileExpr( Tcl_Interp *interp, /* Used for error reporting. */ - CONST char *script, /* The source script to compile. */ - int numBytes, /* Number of bytes in script. If < 0, the - * string consists of all bytes up to the - * first null character. */ - CompileEnv *envPtr) /* Holds resulting instructions. */ + const char *script, /* The source script to compile. */ + int numBytes, /* Number of bytes in script. */ + CompileEnv *envPtr, /* Holds resulting instructions. */ + int optimize) /* 0 for one-off expressions. */ { -#ifndef USE_EXPR_TOKENS OpNode *opTree = NULL; /* Will point to the tree of operators */ Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse parse; /* Holds the Tcl_Tokens of substitutions */ + Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions */ int code = ParseExpr(interp, script, numBytes, &opTree, litList, - funcList, &parse); + funcList, parsePtr, 0 /* parseOnly */); if (code == TCL_OK) { - int litObjc, needsNumConversion = 1; - Tcl_Obj **litObjv; - - /* TIP #280 : Track Lines within the expression */ - TclAdvanceLines(&envPtr->line, script, - script + TclParseAllWhiteSpace(script, numBytes)); - /* * Valid parse; compile the tree. */ - Tcl_ListObjGetElements(NULL, litList, &litObjc, &litObjv); - CompileExprTree(interp, opTree, litObjv, funcList, parse.tokenPtr, - &needsNumConversion, envPtr); - if (needsNumConversion) { - /* - * Attempt to convert the expression result to an int or double. - * This is done in order to support Tcl's policy of interpreting - * operands if at all possible as first integers, else - * floating-point numbers. - */ + int objc; + Tcl_Obj *const *litObjv; + Tcl_Obj **funcObjv; - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } + /* TIP #280 : Track Lines within the expression */ + TclAdvanceLines(&envPtr->line, script, + script + TclParseAllWhiteSpace(script, numBytes)); + + TclListObjGetElements(NULL, litList, &objc, (Tcl_Obj ***)&litObjv); + TclListObjGetElements(NULL, funcList, &objc, &funcObjv); + CompileExprTree(interp, opTree, 0, &litObjv, funcObjv, + parsePtr->tokenPtr, envPtr, optimize); + } else { + TclCompileSyntaxError(interp, envPtr); } - Tcl_FreeParse(&parse); + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); Tcl_DecrRefCount(funcList); Tcl_DecrRefCount(litList); - ckfree((char *) opTree); - return code; -#else - Tcl_Parse parse; - int needsNumConversion = 1; - - /* - * If this is the first time we've been called, initialize the table of - * expression operators. - */ + ckfree(opTree); +} + +/* + *---------------------------------------------------------------------- + * + * ExecConstantExprTree -- + * Compiles and executes bytecode for the subexpression tree at index + * in the nodes array. This subexpression must be constant, made up + * of only constant operators (not functions) and literals. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. + * + *---------------------------------------------------------------------- + */ - if (numBytes < 0) { - numBytes = (script? strlen(script) : 0); - } - if (!opTableInitialized) { - Tcl_MutexLock(&opMutex); - if (!opTableInitialized) { - int i; - - Tcl_InitHashTable(&opHashTable, TCL_STRING_KEYS); - for (i = 0; operatorTable[i].name != NULL; i++) { - int new; - - Tcl_HashEntry *hPtr = Tcl_CreateHashEntry(&opHashTable, - operatorTable[i].name, &new); - if (new) { - Tcl_SetHashValue(hPtr, (ClientData) INT2PTR(i)); - } - } - opTableInitialized = 1; - } - Tcl_MutexUnlock(&opMutex); - } +static int +ExecConstantExprTree( + Tcl_Interp *interp, + OpNode *nodes, + int index, + Tcl_Obj *const **litObjvPtr) +{ + CompileEnv *envPtr; + ByteCode *byteCodePtr; + int code; + Tcl_Obj *byteCodeObj = Tcl_NewObj(); + NRE_callback *rootPtr = TOP_CB(interp); /* - * Parse the expression then compile it. + * Note we are compiling an expression with literal arguments. This means + * there can be no [info frame] calls when we execute the resulting + * bytecode, so there's no need to tend to TIP 280 issues. */ - if (TCL_OK != Tcl_ParseExpr(interp, script, numBytes, &parse)) { - return TCL_ERROR; - } - - /* TIP #280 : Track Lines within the expression */ - TclAdvanceLines (&envPtr->line, script, parse.tokenPtr->start); - - CompileSubExpr(interp, parse.tokenPtr, &needsNumConversion, envPtr); - - if (needsNumConversion) { - /* - * Attempt to convert the primary's object to an int or double. This - * is done in order to support Tcl's policy of interpreting operands - * if at all possible as first integers, else floating-point numbers. - */ - - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } - Tcl_FreeParse(&parse); - - return TCL_OK; -#endif + envPtr = TclStackAlloc(interp, sizeof(CompileEnv)); + TclInitCompileEnv(interp, envPtr, NULL, 0, NULL, 0); + CompileExprTree(interp, nodes, index, litObjvPtr, NULL, NULL, envPtr, + 0 /* optimize */); + TclEmitOpcode(INST_DONE, envPtr); + Tcl_IncrRefCount(byteCodeObj); + TclInitByteCodeObj(byteCodeObj, envPtr); + TclFreeCompileEnv(envPtr); + TclStackFree(interp, envPtr); + byteCodePtr = byteCodeObj->internalRep.twoPtrValue.ptr1; + TclNRExecuteByteCode(interp, byteCodePtr); + code = TclNRRunCallbacks(interp, TCL_OK, rootPtr); + Tcl_DecrRefCount(byteCodeObj); + return code; } /* *---------------------------------------------------------------------- * * CompileExprTree -- - * [???] + * + * Compiles and writes to envPtr instructions for the subexpression tree + * at index in the nodes array. (*litObjvPtr) must point to the proper + * location in a corresponding literals list. Likewise, when non-NULL, + * funcObjv and tokenPtr must point into matching arrays of function + * names and Tcl_Token's derived from earlier call to ParseExpr(). When + * optimize is true, any constant subexpressions will be precomputed. * * Results: * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. * *---------------------------------------------------------------------- */ -typedef struct JumpList { - JumpFixup jump; - int depth; - int offset; - int convert; - struct JumpList *next; -} JumpList; - static void CompileExprTree( Tcl_Interp *interp, OpNode *nodes, - Tcl_Obj *const litObjv[], - Tcl_Obj *funcList, + int index, + Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, Tcl_Token *tokenPtr, - int *convertPtr, - CompileEnv *envPtr) + CompileEnv *envPtr, + int optimize) { - OpNode *nodePtr = nodes; - int nextFunc = 0; + OpNode *nodePtr = nodes + index; + OpNode *rootPtr = nodePtr; + int numWords = 0; JumpList *jumpPtr = NULL; - static CONST int instruction[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, INST_ADD, INST_SUB, 0, /* COMMA */ - INST_MULT, INST_DIV, INST_MOD, INST_LT, - INST_GT, INST_BITAND, INST_BITXOR, INST_BITOR, - 0, /* QUESTION */ 0, /* COLON */ - INST_LSHIFT, INST_RSHIFT, INST_LE, INST_GE, - INST_EQ, INST_NEQ, 0, /* AND */ 0, /* OR */ - INST_STR_EQ, INST_STR_NEQ, INST_EXPON, INST_LIST_IN, - INST_LIST_NOT_IN, 0, /* CLOSE_PAREN */ 0, /* END */ - 0, 0, 0, - 0, INST_UPLUS, INST_UMINUS, 0, /* FUNCTION */ - 0, /* START */ 0, /* OPEN_PAREN */ - INST_LNOT, INST_BITNOT - }; + int convert = 1; while (1) { - switch (NODE_TYPE & nodePtr->lexeme) { - case UNARY: - if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; - - nodePtr->right = OT_NONE; - if (nodePtr->lexeme == FUNCTION) { - Tcl_DString cmdName; - Tcl_Obj *funcName; - CONST char *p; - int length; - - Tcl_DStringInit(&cmdName); - Tcl_DStringAppend(&cmdName, "tcl::mathfunc::", -1); - Tcl_ListObjIndex(NULL, funcList, nextFunc++, &funcName); - p = Tcl_GetStringFromObj(funcName, &length); - Tcl_DStringAppend(&cmdName, p, length); - TclEmitPush(TclRegisterNewNSLiteral(envPtr, - Tcl_DStringValue(&cmdName), - Tcl_DStringLength(&cmdName)), envPtr); - Tcl_DStringFree(&cmdName); - } - switch (right) { - case OT_EMPTY: - break; - case OT_LITERAL: - /* TODO: reduce constant expressions */ - TclEmitPush( TclAddLiteralObj( - envPtr, *litObjv++, NULL), envPtr); - break; - case OT_TOKENS: - if (tokenPtr->type != TCL_TOKEN_WORD) { - Tcl_Panic("unexpected token type %d\n", - tokenPtr->type); - } - TclCompileTokens(interp, tokenPtr+1, - tokenPtr->numComponents, envPtr); - tokenPtr += tokenPtr->numComponents + 1; - break; - default: - nodePtr = nodes + right; - } - } else { - if (nodePtr->lexeme == START) { - /* We're done */ - return; + int next; + JumpList *freePtr, *newJump; + + if (nodePtr->mark == MARK_LEFT) { + next = nodePtr->left; + + if (nodePtr->lexeme == QUESTION) { + convert = 1; + } + } else if (nodePtr->mark == MARK_RIGHT) { + next = nodePtr->right; + + switch (nodePtr->lexeme) { + case FUNCTION: { + Tcl_DString cmdName; + const char *p; + int length; + + Tcl_DStringInit(&cmdName); + TclDStringAppendLiteral(&cmdName, "tcl::mathfunc::"); + p = TclGetStringFromObj(*funcObjv, &length); + funcObjv++; + Tcl_DStringAppend(&cmdName, p, length); + TclEmitPush(TclRegisterNewCmdLiteral(envPtr, + Tcl_DStringValue(&cmdName), + Tcl_DStringLength(&cmdName)), envPtr); + Tcl_DStringFree(&cmdName); + + /* + * Start a count of the number of words in this function + * command invocation. In case there's already a count in + * progress (nested functions), save it in our unused "left" + * field for restoring later. + */ + + nodePtr->left = numWords; + numWords = 2; /* Command plus one argument */ + break; + } + case QUESTION: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump); + break; + case COLON: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, + &jumpPtr->jump); + TclAdjustStackDepth(-1, envPtr); + if (convert) { + jumpPtr->jump.jumpType = TCL_TRUE_JUMP; + } + convert = 1; + break; + case AND: + case OR: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) + ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, &jumpPtr->jump); + break; + } + } else { + int pc1, pc2, target; + + switch (nodePtr->lexeme) { + case START: + case QUESTION: + if (convert && (nodePtr == rootPtr)) { + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); } - if (nodePtr->lexeme == OPEN_PAREN) { - /* do nothing */ - } else if (nodePtr->lexeme == FUNCTION) { - int numWords = (nodePtr[1].left - OT_NONE) + 1; - if (numWords < 255) { - TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr); - } else { - TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr); - } - *convertPtr = 1; + break; + case OPEN_PAREN: + + /* do nothing */ + break; + case FUNCTION: + /* + * Use the numWords count we've kept to invoke the function + * command with the correct number of arguments. + */ + + if (numWords < 255) { + TclEmitInvoke(envPtr, INST_INVOKE_STK1, numWords); } else { - TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); - *convertPtr = 0; + TclEmitInvoke(envPtr, INST_INVOKE_STK4, numWords); } - nodePtr = nodes + nodePtr->parent; + + /* + * Restore any saved numWords value. + */ + + numWords = nodePtr->left; + convert = 1; + break; + case COMMA: + /* + * Each comma implies another function argument. + */ + + numWords++; + break; + case COLON: + CLANG_ASSERT(jumpPtr); + if (jumpPtr->jump.jumpType == TCL_TRUE_JUMP) { + jumpPtr->jump.jumpType = TCL_UNCONDITIONAL_JUMP; + convert = 1; + } + target = jumpPtr->jump.codeOffset + 2; + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + target += 3; + } + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + TclFixupForwardJump(envPtr, &jumpPtr->jump, + target - jumpPtr->jump.codeOffset, 127); + + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + case AND: + case OR: + CLANG_ASSERT(jumpPtr); + pc1 = CurrentOffset(envPtr); + TclEmitInstInt1((nodePtr->lexeme == AND) ? INST_JUMP_FALSE1 + : INST_JUMP_TRUE1, 0, envPtr); + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); + pc2 = CurrentOffset(envPtr); + TclEmitInstInt1(INST_JUMP1, 0, envPtr); + TclAdjustStackDepth(-1, envPtr); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc1, + envPtr->codeStart + pc1 + 1); + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + pc2 += 3; + } + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc2, + envPtr->codeStart + pc2 + 1); + convert = 0; + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + default: + TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); + convert = 0; + break; } + if (nodePtr == rootPtr) { + /* We're done */ + + return; + } + nodePtr = nodes + nodePtr->p.parent; + continue; + } + + nodePtr->mark++; + switch (next) { + case OT_EMPTY: + numWords = 1; /* No arguments, so just the command */ break; - case BINARY: - if (nodePtr->left > OT_NONE) { - int left = nodePtr->left; - nodePtr->left = OT_NONE; - /* TODO: reduce constant expressions */ - if (nodePtr->lexeme == QUESTION) { - JumpList *newJump = (JumpList *) - TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) - TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - *convertPtr = 1; - } else if (nodePtr->lexeme == AND || nodePtr->lexeme == OR) { - JumpList *newJump = (JumpList *) - TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) - TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) - TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - } - switch (left) { - case OT_LITERAL: - TclEmitPush(TclAddLiteralObj(envPtr, *litObjv++, NULL), - envPtr); - break; - case OT_TOKENS: - if (tokenPtr->type != TCL_TOKEN_WORD) { - Tcl_Panic("unexpected token type %d\n", - tokenPtr->type); - } - TclCompileTokens(interp, tokenPtr+1, - tokenPtr->numComponents, envPtr); - tokenPtr += tokenPtr->numComponents + 1; - break; - default: - nodePtr = nodes + left; - } - } else if (nodePtr->right > OT_NONE) { - int right = nodePtr->right; - - nodePtr->right = OT_NONE; - if (nodePtr->lexeme == QUESTION) { - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, - &(jumpPtr->jump)); - } else if (nodePtr->lexeme == COLON) { - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->jump)); - envPtr->currStackDepth = jumpPtr->depth; - jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart); - jumpPtr->convert = *convertPtr; - *convertPtr = 1; - } else if (nodePtr->lexeme == AND) { - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, - &(jumpPtr->jump)); - } else if (nodePtr->lexeme == OR) { - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, - &(jumpPtr->jump)); - } - switch (right) { - case OT_LITERAL: - TclEmitPush(TclAddLiteralObj(envPtr, *litObjv++, NULL), - envPtr); - break; - case OT_TOKENS: - if (tokenPtr->type != TCL_TOKEN_WORD) { - Tcl_Panic("unexpected token type %d\n", - tokenPtr->type); - } - TclCompileTokens(interp, tokenPtr+1, - tokenPtr->numComponents, envPtr); - tokenPtr += tokenPtr->numComponents + 1; - break; - default: - nodePtr = nodes + right; + case OT_LITERAL: { + Tcl_Obj *const *litObjv = *litObjvPtr; + Tcl_Obj *literal = *litObjv; + + if (optimize) { + int length; + const char *bytes = TclGetStringFromObj(literal, &length); + int index = TclRegisterNewLiteral(envPtr, bytes, length); + Tcl_Obj *objPtr = TclFetchLiteral(envPtr, index); + + if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { + /* + * Would like to do this: + * + * lePtr->objPtr = literal; + * Tcl_IncrRefCount(literal); + * Tcl_DecrRefCount(objPtr); + * + * However, the design of the "global" and "local" + * LiteralTable does not permit the value of lePtr->objPtr + * to change. So rather than replace lePtr->objPtr, we do + * surgery to transfer our desired intrep into it. + */ + + objPtr->typePtr = literal->typePtr; + objPtr->internalRep = literal->internalRep; + literal->typePtr = NULL; } + TclEmitPush(index, envPtr); } else { - if (nodePtr->lexeme == COMMA || nodePtr->lexeme == QUESTION) { - /* do nothing */ - } else if (nodePtr->lexeme == COLON) { - if (TclFixupForwardJump(envPtr, &(jumpPtr->next->jump), - (envPtr->codeNext - envPtr->codeStart) - - jumpPtr->next->jump.codeOffset, 127)) { - jumpPtr->offset += 3; - } - TclFixupForwardJump(envPtr, &(jumpPtr->jump), - jumpPtr->offset - jumpPtr->jump.codeOffset, 127); - *convertPtr |= jumpPtr->convert; - envPtr->currStackDepth = jumpPtr->depth + 1; - jumpPtr = jumpPtr->next->next; - TclStackFree(interp); - TclStackFree(interp); - } else if (nodePtr->lexeme == AND) { - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, - &(jumpPtr->next->jump)); - TclEmitPush(TclRegisterNewLiteral(envPtr, "1", 1), envPtr); - } else if (nodePtr->lexeme == OR) { - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, - &(jumpPtr->next->jump)); - TclEmitPush(TclRegisterNewLiteral(envPtr, "0", 1), envPtr); - } else { - TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); - *convertPtr = 0; - } - if ((nodePtr->lexeme == AND) || (nodePtr->lexeme == OR)) { - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->next->jump)); - TclFixupForwardJumpToHere(envPtr, - &(jumpPtr->next->jump), 127); - if (TclFixupForwardJumpToHere(envPtr, - &(jumpPtr->jump), 127)) { - jumpPtr->next->next->jump.codeOffset += 3; - } - TclEmitPush(TclRegisterNewLiteral(envPtr, - (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); - TclFixupForwardJumpToHere(envPtr, - &(jumpPtr->next->next->jump), 127); - *convertPtr = 0; - envPtr->currStackDepth = jumpPtr->depth + 1; - jumpPtr = jumpPtr->next->next->next; - TclStackFree(interp); - TclStackFree(interp); - TclStackFree(interp); - } - nodePtr = nodes + nodePtr->parent; + /* + * When optimize==0, we know the expression is a one-off and + * there's nothing to be gained from sharing literals when + * they won't live long, and the copies we have already have + * an appropriate intrep. In this case, skip literal + * registration that would enable sharing, and use the routine + * that preserves intreps. + */ + + TclEmitPush(TclAddLiteralObj(envPtr, literal, NULL), envPtr); } + (*litObjvPtr)++; break; } - } -} + case OT_TOKENS: + CompileTokens(envPtr, tokenPtr, interp); + tokenPtr += tokenPtr->numComponents + 1; + break; + default: + if (optimize && nodes[next].constant) { + Tcl_InterpState save = Tcl_SaveInterpState(interp, TCL_OK); -static int -OpCmd( - Tcl_Interp *interp, - OpNode *nodes, - Tcl_Obj * const litObjv[]) -{ - CompileEnv compEnv; - ByteCode *byteCodePtr; - int code, tmp=1; - Tcl_Obj *byteCodeObj = Tcl_NewObj(); + if (ExecConstantExprTree(interp, nodes, next, litObjvPtr) + == TCL_OK) { + int index; + Tcl_Obj *objPtr = Tcl_GetObjResult(interp); - /* - * Note we are compiling an expression with literal arguments. This means - * there can be no [info frame] calls when we execute the resulting - * bytecode, so there's no need to tend to TIP 280 issues. - */ + /* + * Don't generate a string rep, but if we have one + * already, then use it to share via the literal table. + */ - TclInitCompileEnv(interp, &compEnv, NULL, 0, NULL, 0); - CompileExprTree(interp, nodes, litObjv, NULL, NULL, &tmp, &compEnv); - TclEmitOpcode(INST_DONE, &compEnv); - Tcl_IncrRefCount(byteCodeObj); - TclInitByteCodeObj(byteCodeObj, &compEnv); - TclFreeCompileEnv(&compEnv); - byteCodePtr = (ByteCode *) byteCodeObj->internalRep.otherValuePtr; - code = TclExecuteByteCode(interp, byteCodePtr); - Tcl_DecrRefCount(byteCodeObj); - return code; + if (objPtr->bytes) { + Tcl_Obj *tableValue; + + index = TclRegisterNewLiteral(envPtr, objPtr->bytes, + objPtr->length); + tableValue = TclFetchLiteral(envPtr, index); + if ((tableValue->typePtr == NULL) && + (objPtr->typePtr != NULL)) { + /* + * Same intrep surgery as for OT_LITERAL. + */ + + tableValue->typePtr = objPtr->typePtr; + tableValue->internalRep = objPtr->internalRep; + objPtr->typePtr = NULL; + } + } else { + index = TclAddLiteralObj(envPtr, objPtr, NULL); + } + TclEmitPush(index, envPtr); + } else { + TclCompileSyntaxError(interp, envPtr); + } + Tcl_RestoreInterpState(interp, save); + convert = 0; + } else { + nodePtr = nodes + next; + } + } + } } + +/* + *---------------------------------------------------------------------- + * + * TclSingleOpCmd -- + * + * Implements the commands: ~, !, <<, >>, %, !=, ne, in, ni + * in the ::tcl::mathop namespace. These commands have no + * extension to arbitrary arguments; they accept only exactly one + * or exactly two arguments as suitable for the operator. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ int TclSingleOpCmd( @@ -2707,25 +2538,51 @@ TclSingleOpCmd( int objc, Tcl_Obj *const objv[]) { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + TclOpCmdClientData *occdPtr = clientData; unsigned char lexeme; OpNode nodes[2]; + Tcl_Obj *const *litObjv = objv + 1; - if (objc != 1+occdPtr->numArgs) { + if (objc != 1 + occdPtr->i.numArgs) { Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); return TCL_ERROR; } - ParseLexeme(occdPtr->operator, strlen(occdPtr->operator), &lexeme, NULL); + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; nodes[0].right = 1; nodes[1].lexeme = lexeme; - nodes[1].left = OT_LITERAL; + if (objc == 2) { + nodes[1].mark = MARK_RIGHT; + } else { + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + } nodes[1].right = OT_LITERAL; - nodes[1].parent = 0; + nodes[1].p.parent = 0; - return OpCmd(interp, nodes, objv+1); + return ExecConstantExprTree(interp, nodes, 0, &litObjv); } + +/* + *---------------------------------------------------------------------- + * + * TclSortingOpCmd -- + * Implements the commands: + * <, <=, >, >=, ==, eq + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by computing the AND of the base + * operator applied to all neighbor argument pairs. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ int TclSortingOpCmd( @@ -2739,49 +2596,73 @@ TclSortingOpCmd( if (objc < 3) { Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); } else { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - Tcl_Obj **litObjv = (Tcl_Obj **) ckalloc(2*(objc-2)*sizeof(Tcl_Obj *)); - OpNode *nodes = (OpNode *) ckalloc(2*(objc-2)*sizeof(OpNode)); + TclOpCmdClientData *occdPtr = clientData; + Tcl_Obj **litObjv = TclStackAlloc(interp, + 2 * (objc-2) * sizeof(Tcl_Obj *)); + OpNode *nodes = TclStackAlloc(interp, 2 * (objc-2) * sizeof(OpNode)); unsigned char lexeme; int i, lastAnd = 1; + Tcl_Obj *const *litObjPtrPtr = litObjv; - ParseLexeme(occdPtr->operator, strlen(occdPtr->operator), - &lexeme, NULL); + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); litObjv[0] = objv[1]; nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; for (i=2; i<objc-1; i++) { litObjv[2*(i-1)-1] = objv[i]; nodes[2*(i-1)-1].lexeme = lexeme; + nodes[2*(i-1)-1].mark = MARK_LEFT; nodes[2*(i-1)-1].left = OT_LITERAL; nodes[2*(i-1)-1].right = OT_LITERAL; litObjv[2*(i-1)] = objv[i]; nodes[2*(i-1)].lexeme = AND; + nodes[2*(i-1)].mark = MARK_LEFT; nodes[2*(i-1)].left = lastAnd; - nodes[lastAnd].parent = 2*(i-1); + nodes[lastAnd].p.parent = 2*(i-1); nodes[2*(i-1)].right = 2*(i-1)+1; - nodes[2*(i-1)+1].parent= 2*(i-1); + nodes[2*(i-1)+1].p.parent= 2*(i-1); lastAnd = 2*(i-1); } litObjv[2*(objc-2)-1] = objv[objc-1]; nodes[2*(objc-2)-1].lexeme = lexeme; + nodes[2*(objc-2)-1].mark = MARK_LEFT; nodes[2*(objc-2)-1].left = OT_LITERAL; nodes[2*(objc-2)-1].right = OT_LITERAL; nodes[0].right = lastAnd; - nodes[lastAnd].parent = 0; + nodes[lastAnd].p.parent = 0; - code = OpCmd(interp, nodes, litObjv); + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); - ckfree((char *) nodes); - ckfree((char *) litObjv); + TclStackFree(interp, nodes); + TclStackFree(interp, litObjv); } return code; } + +/* + *---------------------------------------------------------------------- + * + * TclVariadicOpCmd -- + * Implements the commands: +, *, &, |, ^, ** + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by repeatedly applying the base + * operator with suitable associative rules. When fewer than two + * arguments are provided, suitable identity values are returned. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ int TclVariadicOpCmd( @@ -2790,570 +2671,133 @@ TclVariadicOpCmd( int objc, Tcl_Obj *const objv[]) { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + TclOpCmdClientData *occdPtr = clientData; unsigned char lexeme; int code; if (objc < 2) { - Tcl_SetObjResult(interp, Tcl_NewIntObj(occdPtr->numArgs)); + Tcl_SetObjResult(interp, Tcl_NewIntObj(occdPtr->i.identity)); return TCL_OK; } - ParseLexeme(occdPtr->operator, strlen(occdPtr->operator), &lexeme, NULL); + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); lexeme |= BINARY; if (objc == 2) { Tcl_Obj *litObjv[2]; OpNode nodes[2]; int decrMe = 0; + Tcl_Obj *const *litObjPtrPtr = litObjv; if (lexeme == EXPON) { - litObjv[1] = Tcl_NewIntObj(occdPtr->numArgs); + litObjv[1] = Tcl_NewIntObj(occdPtr->i.identity); Tcl_IncrRefCount(litObjv[1]); decrMe = 1; litObjv[0] = objv[1]; nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; nodes[0].right = 1; nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; nodes[1].left = OT_LITERAL; nodes[1].right = OT_LITERAL; - nodes[1].parent = 0; + nodes[1].p.parent = 0; } else { if (lexeme == DIVIDE) { litObjv[0] = Tcl_NewDoubleObj(1.0); } else { - litObjv[0] = Tcl_NewIntObj(occdPtr->numArgs); + litObjv[0] = Tcl_NewIntObj(occdPtr->i.identity); } Tcl_IncrRefCount(litObjv[0]); litObjv[1] = objv[1]; nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; nodes[0].right = 1; nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; nodes[1].left = OT_LITERAL; nodes[1].right = OT_LITERAL; - nodes[1].parent = 0; + nodes[1].p.parent = 0; } - code = OpCmd(interp, nodes, litObjv); + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); Tcl_DecrRefCount(litObjv[decrMe]); return code; } else { - OpNode *nodes = (OpNode *) ckalloc((objc-1)*sizeof(OpNode)); + Tcl_Obj *const *litObjv = objv + 1; + OpNode *nodes = TclStackAlloc(interp, (objc-1) * sizeof(OpNode)); int i, lastOp = OT_LITERAL; nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; if (lexeme == EXPON) { - for (i=objc-2; i>0; i-- ) { + for (i=objc-2; i>0; i--) { nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; nodes[i].left = OT_LITERAL; nodes[i].right = lastOp; if (lastOp >= 0) { - nodes[lastOp].parent = i; + nodes[lastOp].p.parent = i; } lastOp = i; } } else { - for (i=1; i<objc-1; i++ ) { + for (i=1; i<objc-1; i++) { nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; nodes[i].left = lastOp; if (lastOp >= 0) { - nodes[lastOp].parent = i; + nodes[lastOp].p.parent = i; } nodes[i].right = OT_LITERAL; lastOp = i; } } nodes[0].right = lastOp; - nodes[lastOp].parent = 0; + nodes[lastOp].p.parent = 0; - code = OpCmd(interp, nodes, objv+1); - - ckfree((char *) nodes); + code = ExecConstantExprTree(interp, nodes, 0, &litObjv); + TclStackFree(interp, nodes); return code; } } - -int -TclNoIdentOpCmd( - ClientData clientData, - Tcl_Interp *interp, - int objc, - Tcl_Obj *const objv[]) -{ - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); - return TCL_ERROR; - } - return TclVariadicOpCmd(clientData, interp, objc, objv); -} /* *---------------------------------------------------------------------- * - * TclFinalizeCompilation -- - * - * Clean up the compilation environment so it can later be properly - * reinitialized. This procedure is called by Tcl_Finalize(). + * TclNoIdentOpCmd -- + * Implements the commands: -, / + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary non-zero number of arguments by repeatedly applying the base + * operator with suitable associative rules. When no arguments are + * provided, an error is raised. * * Results: - * None. + * A standard Tcl return code and result left in interp. * * Side effects: - * Cleans up the compilation environment. At the moment, just the table - * of expression operators is freed. - * - *---------------------------------------------------------------------- - */ - -void -TclFinalizeCompilation(void) -{ -#ifdef USE_EXPR_TOKENS - Tcl_MutexLock(&opMutex); - if (opTableInitialized) { - Tcl_DeleteHashTable(&opHashTable); - opTableInitialized = 0; - } - Tcl_MutexUnlock(&opMutex); -#endif -} - -#ifdef USE_EXPR_TOKENS -/* - *---------------------------------------------------------------------- - * - * CompileSubExpr -- - * - * Given a pointer to a TCL_TOKEN_SUB_EXPR token describing a - * subexpression, this procedure emits instructions to evaluate the - * subexpression at runtime. - * - * Results: * None. * - * Side effects: - * Adds instructions to envPtr to evaluate the subexpression. - * *---------------------------------------------------------------------- */ -static void -CompileSubExpr( - Tcl_Interp *interp, /* Interp in which to compile expression */ - Tcl_Token *exprTokenPtr, /* Points to TCL_TOKEN_SUB_EXPR token to - * compile. */ - int *convertPtr, /* Writes 0 here if it is determined the - * final INST_TRY_CVT_TO_NUMERIC is - * not needed */ - CompileEnv *envPtr) /* Holds resulting instructions. */ -{ - /* - * Switch on the type of the first token after the subexpression token. - */ - - Tcl_Token *tokenPtr = exprTokenPtr+1; - TRACE(exprTokenPtr->start, exprTokenPtr->size, - tokenPtr->start, tokenPtr->size); - switch (tokenPtr->type) { - case TCL_TOKEN_WORD: - TclCompileTokens(interp, tokenPtr+1, tokenPtr->numComponents, envPtr); - break; - - case TCL_TOKEN_TEXT: - TclEmitPush(TclRegisterNewLiteral(envPtr, - tokenPtr->start, tokenPtr->size), envPtr); - break; - - case TCL_TOKEN_BS: { - char buffer[TCL_UTF_MAX]; - int length = Tcl_UtfBackslash(tokenPtr->start, NULL, buffer); - TclEmitPush(TclRegisterNewLiteral(envPtr, buffer, length), envPtr); - break; - } - - case TCL_TOKEN_COMMAND: - TclCompileScript(interp, tokenPtr->start+1, tokenPtr->size-2, envPtr); - break; - - case TCL_TOKEN_VARIABLE: - TclCompileTokens(interp, tokenPtr, 1, envPtr); - break; - - case TCL_TOKEN_SUB_EXPR: - CompileSubExpr(interp, tokenPtr, convertPtr, envPtr); - break; - - case TCL_TOKEN_OPERATOR: { - /* - * Look up the operator. If the operator isn't found, treat it as a - * math function. - */ - - OperatorDesc *opDescPtr; - Tcl_HashEntry *hPtr; - CONST char *operator; - Tcl_DString opBuf; - int opIndex; - - Tcl_DStringInit(&opBuf); - operator = Tcl_DStringAppend(&opBuf, tokenPtr->start, tokenPtr->size); - hPtr = Tcl_FindHashEntry(&opHashTable, operator); - if (hPtr == NULL) { - CompileMathFuncCall(interp, exprTokenPtr, operator, envPtr); - Tcl_DStringFree(&opBuf); - break; - } - Tcl_DStringFree(&opBuf); - opIndex = PTR2INT(Tcl_GetHashValue(hPtr)); - opDescPtr = &(operatorTable[opIndex]); - - /* - * If the operator is "normal", compile it using information from the - * operator table. - */ - - if (opDescPtr->numOperands > 0) { - tokenPtr++; - CompileSubExpr(interp, tokenPtr, convertPtr, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - - if (opDescPtr->numOperands == 2) { - CompileSubExpr(interp, tokenPtr, convertPtr, envPtr); - } - TclEmitOpcode(opDescPtr->instruction, envPtr); - *convertPtr = 0; - break; - } - - /* - * The operator requires special treatment, and is either "+" or "-", - * or one of "&&", "||" or "?". - */ - - switch (opIndex) { - case OP_PLUS: - case OP_MINUS: { - Tcl_Token *afterSubexprPtr = exprTokenPtr - + exprTokenPtr->numComponents+1; - tokenPtr++; - CompileSubExpr(interp, tokenPtr, convertPtr, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Check whether the "+" or "-" is unary. - */ - - if (tokenPtr == afterSubexprPtr) { - TclEmitOpcode(((opIndex==OP_PLUS)? INST_UPLUS : INST_UMINUS), - envPtr); - break; - } - - /* - * The "+" or "-" is binary. - */ - - CompileSubExpr(interp, tokenPtr, convertPtr, envPtr); - TclEmitOpcode(((opIndex==OP_PLUS)? INST_ADD : INST_SUB), envPtr); - *convertPtr = 0; - break; - } - - case OP_LAND: - case OP_LOR: - CompileLandOrLorExpr(interp, exprTokenPtr, opIndex, envPtr); - *convertPtr = 0; - break; - - case OP_QUESTY: - CompileCondExpr(interp, exprTokenPtr, convertPtr, envPtr); - break; - - default: - Tcl_Panic("CompileSubExpr: unexpected operator %d " - "requiring special treatment", opIndex); - } /* end switch on operator requiring special treatment */ - break; - - } - - default: - Tcl_Panic("CompileSubExpr: unexpected token type %d", tokenPtr->type); - } -} - -/* - *---------------------------------------------------------------------- - * - * CompileLandOrLorExpr -- - * - * This procedure compiles a Tcl logical and ("&&") or logical or ("||") - * subexpression. - * - * Results: - * None. - * - * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. - * - *---------------------------------------------------------------------- - */ - -static void -CompileLandOrLorExpr( - Tcl_Interp *interp, /* Interp in which compile takes place */ - Tcl_Token *exprTokenPtr, /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "&&" or "||" operator. */ - int opIndex, /* A code describing the expression operator: - * either OP_LAND or OP_LOR. */ - CompileEnv *envPtr) /* Holds resulting instructions. */ -{ - JumpFixup shortCircuitFixup;/* Used to fix up the short circuit jump after - * the first subexpression. */ - JumpFixup shortCircuitFixup2; - /* Used to fix up the second jump to the - * short-circuit target. */ - JumpFixup endFixup; /* Used to fix up jump to the end. */ - int convert = 0; - int savedStackDepth = envPtr->currStackDepth; - Tcl_Token *tokenPtr = exprTokenPtr+2; - - /* - * Emit code for the first operand. - */ - - CompileSubExpr(interp, tokenPtr, &convert, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Emit the short-circuit jump. - */ - - TclEmitForwardJump(envPtr, - ((opIndex==OP_LAND)? TCL_FALSE_JUMP : TCL_TRUE_JUMP), - &shortCircuitFixup); - - /* - * Emit code for the second operand. - */ - - CompileSubExpr(interp, tokenPtr, &convert, envPtr); - - /* - * The result is the boolean value of the second operand. We code this in - * a somewhat contorted manner to be able to reuse the shortCircuit value - * and save one INST_JUMP. - */ - - TclEmitForwardJump(envPtr, - ((opIndex==OP_LAND)? TCL_FALSE_JUMP : TCL_TRUE_JUMP), - &shortCircuitFixup2); - - if (opIndex == OP_LAND) { - TclEmitPush(TclRegisterNewLiteral(envPtr, "1", 1), envPtr); - } else { - TclEmitPush(TclRegisterNewLiteral(envPtr, "0", 1), envPtr); - } - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &endFixup); - - /* - * Fixup the short-circuit jumps and push the shortCircuit value. Note - * that shortCircuitFixup2 is always a short jump. - */ - - TclFixupForwardJumpToHere(envPtr, &shortCircuitFixup2, 127); - if (TclFixupForwardJumpToHere(envPtr, &shortCircuitFixup, 127)) { - /* - * shortCircuit jump grown by 3 bytes: update endFixup. - */ - - endFixup.codeOffset += 3; - } - - if (opIndex == OP_LAND) { - TclEmitPush(TclRegisterNewLiteral(envPtr, "0", 1), envPtr); - } else { - TclEmitPush(TclRegisterNewLiteral(envPtr, "1", 1), envPtr); - } - - TclFixupForwardJumpToHere(envPtr, &endFixup, 127); - envPtr->currStackDepth = savedStackDepth + 1; -} - -/* - *---------------------------------------------------------------------- - * - * CompileCondExpr -- - * - * This procedure compiles a Tcl conditional expression: - * condExpr ::= lorExpr ['?' condExpr ':' condExpr] - * - * Results: - * None. - * - * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. - * - *---------------------------------------------------------------------- - */ - -static void -CompileCondExpr( - Tcl_Interp *interp, /* Interp in which compile takes place */ - Tcl_Token *exprTokenPtr, /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "?" operator. */ - int *convertPtr, /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr) /* Holds resulting instructions. */ -{ - JumpFixup jumpAroundThenFixup, jumpAroundElseFixup; - /* Used to update or replace one-byte jumps - * around the then and else expressions when - * their target PCs are determined. */ - Tcl_Token *tokenPtr = exprTokenPtr+2; - int elseCodeOffset, dist, convert = 0; - int convertThen = 1, convertElse = 1; - int savedStackDepth = envPtr->currStackDepth; - - /* - * Emit code for the test. - */ - - CompileSubExpr(interp, tokenPtr, &convert, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Emit the jump to the "else" expression if the test was false. - */ - - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpAroundThenFixup); - - /* - * Compile the "then" expression. Note that if a subexpression is only a - * primary, we need to try to convert it to numeric. We do this to support - * Tcl's policy of interpreting operands if at all possible as first - * integers, else floating-point numbers. - */ - - CompileSubExpr(interp, tokenPtr, &convertThen, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Emit an unconditional jump around the "else" condExpr. - */ - - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpAroundElseFixup); - - /* - * Compile the "else" expression. - */ - - envPtr->currStackDepth = savedStackDepth; - elseCodeOffset = (envPtr->codeNext - envPtr->codeStart); - CompileSubExpr(interp, tokenPtr, &convertElse, envPtr); - - /* - * Fix up the second jump around the "else" expression. - */ - - dist = (envPtr->codeNext - envPtr->codeStart) - - jumpAroundElseFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &jumpAroundElseFixup, dist, 127)) { - /* - * Update the else expression's starting code offset since it moved - * down 3 bytes too. - */ - - elseCodeOffset += 3; - } - - /* - * Fix up the first jump to the "else" expression if the test was false. - */ - - dist = (elseCodeOffset - jumpAroundThenFixup.codeOffset); - TclFixupForwardJump(envPtr, &jumpAroundThenFixup, dist, 127); - *convertPtr = convertThen || convertElse; - - envPtr->currStackDepth = savedStackDepth + 1; -} - -/* - *---------------------------------------------------------------------- - * - * CompileMathFuncCall -- - * - * This procedure compiles a call on a math function in an expression: - * mathFuncCall ::= funcName '(' [condExpr {',' condExpr}] ')' - * - * Results: - * None. - * - * Side effects: - * Adds instructions to envPtr to evaluate the math function at - * runtime. - * - *---------------------------------------------------------------------- - */ - -static void -CompileMathFuncCall( - Tcl_Interp *interp, /* Interp in which compile takes place */ - Tcl_Token *exprTokenPtr, /* Points to TCL_TOKEN_SUB_EXPR token - * containing the math function call. */ - CONST char *funcName, /* Name of the math function. */ - CompileEnv *envPtr) /* Holds resulting instructions. */ +int +TclNoIdentOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - Tcl_DString cmdName; - int objIndex; - Tcl_Token *tokenPtr, *afterSubexprPtr; - int argCount; - - /* - * Prepend "tcl::mathfunc::" to the function name, to produce the name of - * a command that evaluates the function. Push that command name on the - * stack, in a literal registered to the namespace so that resolution can - * be cached. - */ - - Tcl_DStringInit(&cmdName); - Tcl_DStringAppend(&cmdName, "tcl::mathfunc::", -1); - Tcl_DStringAppend(&cmdName, funcName, -1); - objIndex = TclRegisterNewNSLiteral(envPtr, Tcl_DStringValue(&cmdName), - Tcl_DStringLength(&cmdName)); - TclEmitPush(objIndex, envPtr); - Tcl_DStringFree(&cmdName); - - /* - * Compile any arguments for the function. - */ - - argCount = 1; - tokenPtr = exprTokenPtr+2; - afterSubexprPtr = exprTokenPtr + (exprTokenPtr->numComponents + 1); - while (tokenPtr != afterSubexprPtr) { - int convert = 0; - - ++argCount; - CompileSubExpr(interp, tokenPtr, &convert, envPtr); - tokenPtr += (tokenPtr->numComponents + 1); - } + TclOpCmdClientData *occdPtr = clientData; - /* - * Invoke the function. - */ - - if (argCount < 255) { - TclEmitInstInt1(INST_INVOKE_STK1, argCount, envPtr); - } else { - TclEmitInstInt4(INST_INVOKE_STK4, argCount, envPtr); + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); + return TCL_ERROR; } + return TclVariadicOpCmd(clientData, interp, objc, objv); } -#endif - /* * Local Variables: * mode: c |