diff options
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r-- | generic/tclCompExpr.c | 3389 |
1 files changed, 2574 insertions, 815 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 42342b1..94c1bd6 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1,658 +1,2107 @@ -/* +/* * tclCompExpr.c -- * - * This file contains the code to compile Tcl expressions. - * - * Copyright (c) 1997 Sun Microsystems, Inc. + * This file contains the code to parse and compile Tcl expressions and + * implementations of the Tcl commands corresponding to expression + * operators, such as the command ::tcl::mathop::+ . * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * Contributions from Don Porter, NIST, 2006-2007. (not subject to US copyright) * - * RCS: @(#) $Id: tclCompExpr.c,v 1.3 1999/04/16 00:46:44 stanton Exp $ + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclCompile.h" +#include "tclCompile.h" /* CompileEnv */ + +/* + * Expression parsing takes place in the routine ParseExpr(). It takes a + * string as input, parses that string, and generates a representation of the + * expression in the form of a tree of operators, a list of literals, a list + * of function names, and an array of Tcl_Token's within a Tcl_Parse struct. + * The tree is composed of OpNodes. + */ + +typedef struct OpNode { + int left; /* "Pointer" to the left operand. */ + int right; /* "Pointer" to the right operand. */ + union { + int parent; /* "Pointer" to the parent operand. */ + int prev; /* "Pointer" joining incomplete tree stack */ + } p; + unsigned char lexeme; /* Code that identifies the operator. */ + unsigned char precedence; /* Precedence of the operator */ + unsigned char mark; /* Mark used to control traversal. */ + unsigned char constant; /* Flag marking constant subexpressions. */ +} OpNode; + +/* + * The storage for the tree is dynamically allocated array of OpNodes. The + * array is grown as parsing needs dictate according to a scheme similar to + * Tcl's string growth algorithm, so that the resizing costs are O(N) and so + * that we use at least half the memory allocated as expressions get large. + * + * Each OpNode in the tree represents an operator in the expression, either + * unary or binary. When parsing is completed successfully, a binary operator + * OpNode will have its left and right fields filled with "pointers" to its + * left and right operands. A unary operator OpNode will have its right field + * filled with a pointer to its single operand. When an operand is a + * subexpression the "pointer" takes the form of the index -- a non-negative + * integer -- into the OpNode storage array where the root of that + * subexpression parse tree is found. + * + * Non-operator elements of the expression do not get stored in the OpNode + * tree. They are stored in the other structures according to their type. + * Literal values get appended to the literal list. Elements that denote forms + * of quoting or substitution known to the Tcl parser get stored as + * Tcl_Tokens. These non-operator elements of the expression are the leaves of + * the completed parse tree. When an operand of an OpNode is one of these leaf + * elements, the following negative integer codes are used to indicate which + * kind of elements it is. + */ + +enum OperandTypes { + OT_LITERAL = -3, /* Operand is a literal in the literal list */ + OT_TOKENS = -2, /* Operand is sequence of Tcl_Tokens */ + OT_EMPTY = -1 /* "Operand" is an empty string. This is a special + * case used only to represent the EMPTY lexeme. See + * below. */ +}; + +/* + * Readable macros to test whether a "pointer" value points to an operator. + * They operate on the "non-negative integer -> operator; negative integer -> + * a non-operator OperandType" distinction. + */ + +#define IsOperator(l) ((l) >= 0) +#define NotOperator(l) ((l) < 0) + +/* + * Note that it is sufficient to store in the tree just the type of leaf + * operand, without any explicit pointer to which leaf. This is true because + * the traversals of the completed tree we perform are known to visit the + * leaves in the same order as the original parse. + * + * In a completed parse tree, those OpNodes that are themselves (roots of + * subexpression trees that are) operands of some operator store in their + * p.parent field a "pointer" to the OpNode of that operator. The p.parent + * field permits a traversal of the tree within a non-recursive routine + * (ConvertTreeToTokens() and CompileExprTree()). This means that even + * expression trees of great depth pose no risk of blowing the C stack. + * + * While the parse tree is being constructed, the same memory space is used to + * hold the p.prev field which chains together a stack of incomplete trees + * awaiting their right operands. + * + * The lexeme field is filled in with the lexeme of the operator that is + * returned by the ParseLexeme() routine. Only lexemes for unary and binary + * operators get stored in an OpNode. Other lexmes get different treatement. + * + * The precedence field provides a place to store the precedence of the + * operator, so it need not be looked up again and again. + * + * The mark field is use to control the traversal of the tree, so that it can + * be done non-recursively. The mark values are: + */ + +enum Marks { + MARK_LEFT, /* Next step of traversal is to visit left subtree */ + MARK_RIGHT, /* Next step of traversal is to visit right subtree */ + MARK_PARENT /* Next step of traversal is to return to parent */ +}; /* - * The stuff below is a bit of a hack so that this file can be used in - * environments that include no UNIX, i.e. no errno: just arrange to use - * the errno from tclExecute.c here. + * The constant field is a boolean flag marking which subexpressions are + * completely known at compile time, and are eligible for computing then + * rather than waiting until run time. */ -#ifndef TCL_GENERIC_ONLY -#include "tclPort.h" -#else -#define NO_ERRNO_H -#endif +/* + * Each lexeme belongs to one of four categories, which determine its place in + * the parse tree. We use the two high bits of the (unsigned char) value to + * store a NODE_TYPE code. + */ -#ifdef NO_ERRNO_H -extern int errno; /* Use errno from tclExecute.c. */ -#define ERANGE 34 -#endif +#define NODE_TYPE 0xC0 /* - * Boolean variable that controls whether expression compilation tracing - * is enabled. + * The four category values are LEAF, UNARY, and BINARY, explained below, and + * "uncategorized", which is used either temporarily, until context determines + * which of the other three categories is correct, or for lexemes like + * INVALID, which aren't really lexemes at all, but indicators of a parsing + * error. Note that the codes must be distinct to distinguish categories, but + * need not take the form of a bit array. */ -#ifdef TCL_COMPILE_DEBUG -static int traceExprComp = 0; -#endif /* TCL_COMPILE_DEBUG */ +#define BINARY 0x40 /* This lexeme is a binary operator. An OpNode + * representing it should go into the parse + * tree, and two operands should be parsed for + * it in the expression. */ +#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode + * representing it should go into the parse + * tree, and one operand should be parsed for + * it in the expression. */ +#define LEAF 0xC0 /* This lexeme is a leaf operand in the parse + * tree. No OpNode will be placed in the tree + * for it. Either a literal value will be + * appended to the list of literals in this + * expression, or appropriate Tcl_Tokens will + * be appended in a Tcl_Parse struct to + * represent those leaves that require some + * form of substitution. */ + +/* Uncategorized lexemes */ + +#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or + * BINARY_PLUS according to context. */ +#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or + * BINARY_MINUS according to context. */ +#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to + * FUNCTION or a parse error according to + * context and value. */ +#define INCOMPLETE 4 /* A parse error. Used only when the single + * "=" is encountered. */ +#define INVALID 5 /* A parse error. Used when any punctuation + * appears that's not a supported operator. */ + +/* Leaf lexemes */ + +#define NUMBER (LEAF | 1) + /* For literal numbers */ +#define SCRIPT (LEAF | 2) + /* Script substitution; [foo] */ +#define BOOLEAN (LEAF | BAREWORD) + /* For literal booleans */ +#define BRACED (LEAF | 4) + /* Braced string; {foo bar} */ +#define VARIABLE (LEAF | 5) + /* Variable substitution; $x */ +#define QUOTED (LEAF | 6) + /* Quoted string; "foo $bar [soom]" */ +#define EMPTY (LEAF | 7) + /* Used only for an empty argument list to a + * function. Represents the empty string + * within parens in the expression: rand() */ + +/* Unary operator lexemes */ + +#define UNARY_PLUS (UNARY | PLUS) +#define UNARY_MINUS (UNARY | MINUS) +#define FUNCTION (UNARY | BAREWORD) + /* This is a bit of "creative interpretation" + * on the part of the parser. A function call + * is parsed into the parse tree according to + * the perspective that the function name is a + * unary operator and its argument list, + * enclosed in parens, is its operand. The + * additional requirements not implied + * generally by treatment as a unary operator + * -- for example, the requirement that the + * operand be enclosed in parens -- are hard + * coded in the relevant portions of + * ParseExpr(). We trade off the need to + * include such exceptional handling in the + * code against the need we would otherwise + * have for more lexeme categories. */ +#define START (UNARY | 4) + /* This lexeme isn't parsed from the + * expression text at all. It represents the + * start of the expression and sits at the + * root of the parse tree where it serves as + * the start/end point of traversals. */ +#define OPEN_PAREN (UNARY | 5) + /* Another bit of creative interpretation, + * where we treat "(" as a unary operator with + * the sub-expression between it and its + * matching ")" as its operand. See + * CLOSE_PAREN below. */ +#define NOT (UNARY | 6) +#define BIT_NOT (UNARY | 7) + +/* Binary operator lexemes */ + +#define BINARY_PLUS (BINARY | PLUS) +#define BINARY_MINUS (BINARY | MINUS) +#define COMMA (BINARY | 3) + /* The "," operator is a low precedence binary + * operator that separates the arguments in a + * function call. The additional constraint + * that this operator can only legally appear + * at the right places within a function call + * argument list are hard coded within + * ParseExpr(). */ +#define MULT (BINARY | 4) +#define DIVIDE (BINARY | 5) +#define MOD (BINARY | 6) +#define LESS (BINARY | 7) +#define GREATER (BINARY | 8) +#define BIT_AND (BINARY | 9) +#define BIT_XOR (BINARY | 10) +#define BIT_OR (BINARY | 11) +#define QUESTION (BINARY | 12) + /* These two lexemes make up the */ +#define COLON (BINARY | 13) + /* ternary conditional operator, $x ? $y : $z. + * We treat them as two binary operators to + * avoid another lexeme category, and code the + * additional constraints directly in + * ParseExpr(). For instance, the right + * operand of a "?" operator must be a ":" + * operator. */ +#define LEFT_SHIFT (BINARY | 14) +#define RIGHT_SHIFT (BINARY | 15) +#define LEQ (BINARY | 16) +#define GEQ (BINARY | 17) +#define EQUAL (BINARY | 18) +#define NEQ (BINARY | 19) +#define AND (BINARY | 20) +#define OR (BINARY | 21) +#define STREQ (BINARY | 22) +#define STRNEQ (BINARY | 23) +#define EXPON (BINARY | 24) + /* Unlike the other binary operators, EXPON is + * right associative and this distinction is + * coded directly in ParseExpr(). */ +#define IN_LIST (BINARY | 25) +#define NOT_IN_LIST (BINARY | 26) +#define CLOSE_PAREN (BINARY | 27) + /* By categorizing the CLOSE_PAREN lexeme as a + * BINARY operator, the normal parsing rules + * for binary operators assure that a close + * paren will not directly follow another + * operator, and the machinery already in + * place to connect operands to operators + * according to precedence performs most of + * the work of matching open and close parens + * for us. In the end though, a close paren is + * not really a binary operator, and some + * special coding in ParseExpr() make sure we + * never put an actual CLOSE_PAREN node in the + * parse tree. The sub-expression between + * parens becomes the single argument of the + * matching OPEN_PAREN unary operator. */ +#define END (BINARY | 28) + /* This lexeme represents the end of the + * string being parsed. Treating it as a + * binary operator follows the same logic as + * the CLOSE_PAREN lexeme and END pairs with + * START, in the same way that CLOSE_PAREN + * pairs with OPEN_PAREN. */ /* - * The ExprInfo structure describes the state of compiling an expression. - * A pointer to an ExprInfo record is passed among the routines in - * this module. + * When ParseExpr() builds the parse tree it must choose which operands to + * connect to which operators. This is done according to operator precedence. + * The greater an operator's precedence the greater claim it has to link to an + * available operand. The Precedence enumeration lists the precedence values + * used by Tcl expression operators, from lowest to highest claim. Each + * precedence level is commented with the operators that hold that precedence. */ -typedef struct ExprInfo { - Tcl_Interp *interp; /* Used for error reporting. */ - Tcl_Parse *parsePtr; /* Structure filled with information about - * the parsed expression. */ - char *expr; /* The expression that was originally passed - * to TclCompileExpr. */ - char *lastChar; /* Points just after last byte of expr. */ - int hasOperators; /* Set 1 if the expr has operators; 0 if - * expr is only a primary. If 1 after - * compiling an expr, a tryCvtToNumeric - * instruction is emitted to convert the - * primary to a number if possible. */ - int exprIsJustVarRef; /* Set 1 if the expr consists of just a - * variable reference as in the expression - * of "if $b then...". Otherwise 0. If 1 the - * expr is compiled out-of-line in order to - * implement expr's 2 level substitution - * semantics properly. */ - int exprIsComparison; /* Set 1 if the top-level operator in the - * expr is a comparison. Otherwise 0. If 1, - * because the operands might be strings, - * the expr is compiled out-of-line in order - * to implement expr's 2 level substitution - * semantics properly. */ -} ExprInfo; +enum Precedence { + PREC_END = 1, /* END */ + PREC_START, /* START */ + PREC_CLOSE_PAREN, /* ")" */ + PREC_OPEN_PAREN, /* "(" */ + PREC_COMMA, /* "," */ + PREC_CONDITIONAL, /* "?", ":" */ + PREC_OR, /* "||" */ + PREC_AND, /* "&&" */ + PREC_BIT_OR, /* "|" */ + PREC_BIT_XOR, /* "^" */ + PREC_BIT_AND, /* "&" */ + PREC_EQUAL, /* "==", "!=", "eq", "ne", "in", "ni" */ + PREC_COMPARE, /* "<", ">", "<=", ">=" */ + PREC_SHIFT, /* "<<", ">>" */ + PREC_ADD, /* "+", "-" */ + PREC_MULT, /* "*", "/", "%" */ + PREC_EXPON, /* "**" */ + PREC_UNARY /* "+", "-", FUNCTION, "!", "~" */ +}; /* - * Definitions of numeric codes representing each expression operator. - * The order of these must match the entries in the operatorTable below. - * Also the codes for the relational operators (OP_LESS, OP_GREATER, - * OP_LE, OP_GE, OP_EQ, and OP_NE) must be consecutive and in that order. - * Note that OP_PLUS and OP_MINUS represent both unary and binary operators. + * Here the same information contained in the comments above is stored in + * inverted form, so that given a lexeme, one can quickly look up its + * precedence value. */ -#define OP_MULT 0 -#define OP_DIVIDE 1 -#define OP_MOD 2 -#define OP_PLUS 3 -#define OP_MINUS 4 -#define OP_LSHIFT 5 -#define OP_RSHIFT 6 -#define OP_LESS 7 -#define OP_GREATER 8 -#define OP_LE 9 -#define OP_GE 10 -#define OP_EQ 11 -#define OP_NEQ 12 -#define OP_BITAND 13 -#define OP_BITXOR 14 -#define OP_BITOR 15 -#define OP_LAND 16 -#define OP_LOR 17 -#define OP_QUESTY 18 -#define OP_LNOT 19 -#define OP_BITNOT 20 +static const unsigned char prec[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + PREC_ADD, /* BINARY_PLUS */ + PREC_ADD, /* BINARY_MINUS */ + PREC_COMMA, /* COMMA */ + PREC_MULT, /* MULT */ + PREC_MULT, /* DIVIDE */ + PREC_MULT, /* MOD */ + PREC_COMPARE, /* LESS */ + PREC_COMPARE, /* GREATER */ + PREC_BIT_AND, /* BIT_AND */ + PREC_BIT_XOR, /* BIT_XOR */ + PREC_BIT_OR, /* BIT_OR */ + PREC_CONDITIONAL, /* QUESTION */ + PREC_CONDITIONAL, /* COLON */ + PREC_SHIFT, /* LEFT_SHIFT */ + PREC_SHIFT, /* RIGHT_SHIFT */ + PREC_COMPARE, /* LEQ */ + PREC_COMPARE, /* GEQ */ + PREC_EQUAL, /* EQUAL */ + PREC_EQUAL, /* NEQ */ + PREC_AND, /* AND */ + PREC_OR, /* OR */ + PREC_EQUAL, /* STREQ */ + PREC_EQUAL, /* STRNEQ */ + PREC_EXPON, /* EXPON */ + PREC_EQUAL, /* IN_LIST */ + PREC_EQUAL, /* NOT_IN_LIST */ + PREC_CLOSE_PAREN, /* CLOSE_PAREN */ + PREC_END, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + PREC_UNARY, /* UNARY_PLUS */ + PREC_UNARY, /* UNARY_MINUS */ + PREC_UNARY, /* FUNCTION */ + PREC_START, /* START */ + PREC_OPEN_PAREN, /* OPEN_PAREN */ + PREC_UNARY, /* NOT*/ + PREC_UNARY, /* BIT_NOT*/ +}; /* - * Table describing the expression operators. Entries in this table must - * correspond to the definitions of numeric codes for operators just above. + * A table mapping lexemes to bytecode instructions, used by CompileExprTree(). */ -static int opTableInitialized = 0; /* 0 means not yet initialized. */ - -TCL_DECLARE_MUTEX(opMutex) - -typedef struct OperatorDesc { - char *name; /* Name of the operator. */ - int numOperands; /* Number of operands. 0 if the operator - * requires special handling. */ - int instruction; /* Instruction opcode for the operator. - * Ignored if numOperands is 0. */ -} OperatorDesc; - -OperatorDesc operatorTable[] = { - {"*", 2, INST_MULT}, - {"/", 2, INST_DIV}, - {"%", 2, INST_MOD}, - {"+", 0}, - {"-", 0}, - {"<<", 2, INST_LSHIFT}, - {">>", 2, INST_RSHIFT}, - {"<", 2, INST_LT}, - {">", 2, INST_GT}, - {"<=", 2, INST_LE}, - {">=", 2, INST_GE}, - {"==", 2, INST_EQ}, - {"!=", 2, INST_NEQ}, - {"&", 2, INST_BITAND}, - {"^", 2, INST_BITXOR}, - {"|", 2, INST_BITOR}, - {"&&", 0}, - {"||", 0}, - {"?", 0}, - {"!", 1, INST_LNOT}, - {"~", 1, INST_BITNOT}, - {NULL} +static const unsigned char instruction[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + INST_ADD, /* BINARY_PLUS */ + INST_SUB, /* BINARY_MINUS */ + 0, /* COMMA */ + INST_MULT, /* MULT */ + INST_DIV, /* DIVIDE */ + INST_MOD, /* MOD */ + INST_LT, /* LESS */ + INST_GT, /* GREATER */ + INST_BITAND, /* BIT_AND */ + INST_BITXOR, /* BIT_XOR */ + INST_BITOR, /* BIT_OR */ + 0, /* QUESTION */ + 0, /* COLON */ + INST_LSHIFT, /* LEFT_SHIFT */ + INST_RSHIFT, /* RIGHT_SHIFT */ + INST_LE, /* LEQ */ + INST_GE, /* GEQ */ + INST_EQ, /* EQUAL */ + INST_NEQ, /* NEQ */ + 0, /* AND */ + 0, /* OR */ + INST_STR_EQ, /* STREQ */ + INST_STR_NEQ, /* STRNEQ */ + INST_EXPON, /* EXPON */ + INST_LIST_IN, /* IN_LIST */ + INST_LIST_NOT_IN, /* NOT_IN_LIST */ + 0, /* CLOSE_PAREN */ + 0, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + INST_UPLUS, /* UNARY_PLUS */ + INST_UMINUS, /* UNARY_MINUS */ + 0, /* FUNCTION */ + 0, /* START */ + 0, /* OPEN_PAREN */ + INST_LNOT, /* NOT*/ + INST_BITNOT, /* BIT_NOT*/ }; /* - * Hashtable used to map the names of expression operators to the index - * of their OperatorDesc description. + * A table mapping a byte value to the corresponding lexeme for use by + * ParseLexeme(). */ -static Tcl_HashTable opHashTable; +static const unsigned char Lexeme[] = { + INVALID /* NUL */, INVALID /* SOH */, + INVALID /* STX */, INVALID /* ETX */, + INVALID /* EOT */, INVALID /* ENQ */, + INVALID /* ACK */, INVALID /* BEL */, + INVALID /* BS */, INVALID /* HT */, + INVALID /* LF */, INVALID /* VT */, + INVALID /* FF */, INVALID /* CR */, + INVALID /* SO */, INVALID /* SI */, + INVALID /* DLE */, INVALID /* DC1 */, + INVALID /* DC2 */, INVALID /* DC3 */, + INVALID /* DC4 */, INVALID /* NAK */, + INVALID /* SYN */, INVALID /* ETB */, + INVALID /* CAN */, INVALID /* EM */, + INVALID /* SUB */, INVALID /* ESC */, + INVALID /* FS */, INVALID /* GS */, + INVALID /* RS */, INVALID /* US */, + INVALID /* SPACE */, 0 /* ! or != */, + QUOTED /* " */, INVALID /* # */, + VARIABLE /* $ */, MOD /* % */, + 0 /* & or && */, INVALID /* ' */, + OPEN_PAREN /* ( */, CLOSE_PAREN /* ) */, + 0 /* * or ** */, PLUS /* + */, + COMMA /* , */, MINUS /* - */, + 0 /* . */, DIVIDE /* / */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-9 */ + COLON /* : */, INVALID /* ; */, + 0 /* < or << or <= */, + 0 /* == or INVALID */, + 0 /* > or >> or >= */, + QUESTION /* ? */, INVALID /* @ */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A-M */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* N-Z */ + SCRIPT /* [ */, INVALID /* \ */, + INVALID /* ] */, BIT_XOR /* ^ */, + INVALID /* _ */, INVALID /* ` */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a-m */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* n-z */ + BRACED /* { */, 0 /* | or || */, + INVALID /* } */, BIT_NOT /* ~ */, + INVALID /* DEL */ +}; /* - * Declarations for local procedures to this file: + * The JumpList struct is used to create a stack of data needed for the + * TclEmitForwardJump() and TclFixupForwardJump() calls that are performed + * when compiling the short-circuiting operators QUESTION/COLON, AND, and OR. + * Keeping a stack permits the CompileExprTree() routine to be non-recursive. */ -static int CompileCondExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, - CompileEnv *envPtr, Tcl_Token **endPtrPtr)); -static int CompileLandOrLorExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, int opIndex, - ExprInfo *infoPtr, CompileEnv *envPtr, - Tcl_Token **endPtrPtr)); -static int CompileMathFuncCall _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, char *funcName, - ExprInfo *infoPtr, CompileEnv *envPtr, - Tcl_Token **endPtrPtr)); -static int CompileSubExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, - CompileEnv *envPtr)); -static void LogSyntaxError _ANSI_ARGS_((ExprInfo *infoPtr)); +typedef struct JumpList { + JumpFixup jump; /* Pass this argument to matching calls of + * TclEmitForwardJump() and + * TclFixupForwardJump(). */ + struct JumpList *next; /* Point to next item on the stack */ +} JumpList; /* - * Macro used to debug the execution of the expression compiler. + * Declarations for local functions to this file: */ -#ifdef TCL_COMPILE_DEBUG -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) \ - if (traceExprComp) { \ - fprintf(stderr, "CompileSubExpr: \"%.*s\", token \"%.*s\"\n", \ - (exprLength), (exprBytes), (tokenLength), (tokenBytes)); \ - } -#else -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) -#endif /* TCL_COMPILE_DEBUG */ +static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, Tcl_Token *tokenPtr, + CompileEnv *envPtr, int optimize); +static void ConvertTreeToTokens(const char *start, int numBytes, + OpNode *nodes, Tcl_Token *tokenPtr, + Tcl_Parse *parsePtr); +static int ExecConstantExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj * const **litObjvPtr); +static int ParseExpr(Tcl_Interp *interp, const char *start, + int numBytes, OpNode **opTreePtr, + Tcl_Obj *litList, Tcl_Obj *funcList, + Tcl_Parse *parsePtr, int parseOnly); +static int ParseLexeme(const char *start, int numBytes, + unsigned char *lexemePtr, Tcl_Obj **literalPtr); /* *---------------------------------------------------------------------- * - * TclCompileExpr -- + * ParseExpr -- * - * This procedure compiles a string containing a Tcl expression into - * Tcl bytecodes. This procedure is the top-level interface to the - * the expression compilation module, and is used by such public - * procedures as Tcl_ExprString, Tcl_ExprStringObj, Tcl_ExprLong, - * Tcl_ExprDouble, Tcl_ExprBoolean, and Tcl_ExprBooleanObj. + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and constructs a tree representing the + * structure of the expression. The caller must pass in empty lists as + * the funcList and litList arguments. The elements of the parsed + * expression are returned to the caller as that tree, a list of literal + * values, a list of function names, and in Tcl_Tokens added to a + * Tcl_Parse struct passed in by the caller. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. - * - * envPtr->maxStackDepth is updated with the maximum number of stack - * elements needed to execute the expression. - * - * envPtr->exprIsJustVarRef is set 1 if the expression consisted of - * a single variable reference as in the expression of "if $b then...". - * Otherwise it is set 0. This is used to implement Tcl's two level - * expression substitution semantics properly. - * - * envPtr->exprIsComparison is set 1 if the top-level operator in the - * expr is a comparison. Otherwise it is set 0. If 1, because the - * operands might be strings, the expr is compiled out-of-line in order - * to implement expr's 2 level substitution semantics properly. + * If the string is successfully parsed as a valid Tcl expression, TCL_OK + * is returned, and data about the expression structure is written to the + * last four arguments. If the string cannot be parsed as a valid Tcl + * expression, TCL_ERROR is returned, and if interp is non-NULL, an error + * message is written to interp. * * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. + * Memory will be allocated. If TCL_OK is returned, the caller must clean + * up the returned data structures. The (OpNode *) value written to + * opTreePtr should be passed to ckfree() and the parsePtr argument + * should be passed to Tcl_FreeParse(). The elements appended to the + * litList and funcList will automatically be freed whenever the refcount + * on those lists indicates they can be freed. * *---------------------------------------------------------------------- */ -int -TclCompileExpr(interp, script, numBytes, envPtr) - Tcl_Interp *interp; /* Used for error reporting. */ - char *script; /* The source script to compile. */ - int numBytes; /* Number of bytes in script. If < 0, the - * string consists of all bytes up to the - * first null character. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ +static int +ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. */ + OpNode **opTreePtr, /* Points to space where a pointer to the + * allocated OpNode tree should go. */ + Tcl_Obj *litList, /* List to append literals to. */ + Tcl_Obj *funcList, /* List to append function names to. */ + Tcl_Parse *parsePtr, /* Structure to fill with tokens representing + * those operands that require run time + * substitutions. */ + int parseOnly) /* A boolean indicating whether the caller's + * aim is just a parse, or whether it will go + * on to compile the expression. Different + * optimizations are appropriate for the two + * scenarios. */ { - ExprInfo info; - Tcl_Parse parse; - Tcl_HashEntry *hPtr; - int maxDepth, new, i, code; + OpNode *nodes = NULL; /* Pointer to the OpNode storage array where + * we build the parse tree. */ + int nodesAvailable = 64; /* Initial size of the storage array. This + * value establishes a minimum tree memory + * cost of only about 1 kibyte, and is large + * enough for most expressions to parse with + * no need for array growth and + * reallocation. */ + int nodesUsed = 0; /* Number of OpNodes filled. */ + int scanned = 0; /* Capture number of byte scanned by parsing + * routines. */ + int lastParsed; /* Stores info about what the lexeme parsed + * the previous pass through the parsing loop + * was. If it was an operator, lastParsed is + * the index of the OpNode for that operator. + * If it was not an operator, lastParsed holds + * an OperandTypes value encoding what we need + * to know about it. */ + int incomplete; /* Index of the most recent incomplete tree in + * the OpNode array. Heads a stack of + * incomplete trees linked by p.prev. */ + int complete = OT_EMPTY; /* "Index" of the complete tree (that is, a + * complete subexpression) determined at the + * moment. OT_EMPTY is a nonsense value used + * only to silence compiler warnings. During a + * parse, complete will always hold an index + * or an OperandTypes value pointing to an + * actual leaf at the time the complete tree + * is needed. */ /* - * If this is the first time we've been called, initialize the table - * of expression operators. + * These variables control generation of the error message. */ - if (numBytes < 0) { - numBytes = (script? strlen(script) : 0); + Tcl_Obj *msg = NULL; /* The error message. */ + Tcl_Obj *post = NULL; /* In a few cases, an additional postscript + * for the error message, supplying more + * information after the error msg and + * location have been reported. */ + const char *errCode = NULL; /* The detail word of the errorCode list, or + * NULL to indicate that no changes to the + * errorCode are to be done. */ + const char *subErrCode = NULL; + /* Extra information for use in generating the + * errorCode. */ + const char *mark = "_@_"; /* In the portion of the complete error + * message where the error location is + * reported, this "mark" substring is inserted + * into the string being parsed to aid in + * pinpointing the location of the syntax + * error in the expression. */ + int insertMark = 0; /* A boolean controlling whether the "mark" + * should be inserted. */ + const int limit = 25; /* Portions of the error message are + * constructed out of substrings of the + * original expression. In order to keep the + * error message readable, we impose this + * limit on the substring size we extract. */ + + TclParseInit(interp, start, numBytes, parsePtr); + + nodes = attemptckalloc(nodesAvailable * sizeof(OpNode)); + if (nodes == NULL) { + TclNewLiteralStringObj(msg, "not enough memory to parse expression"); + errCode = "NOMEM"; + goto error; } - if (!opTableInitialized) { - Tcl_MutexLock(&opMutex); - if (!opTableInitialized) { - Tcl_InitHashTable(&opHashTable, TCL_STRING_KEYS); - for (i = 0; operatorTable[i].name != NULL; i++) { - hPtr = Tcl_CreateHashEntry(&opHashTable, - operatorTable[i].name, &new); - if (new) { - Tcl_SetHashValue(hPtr, (ClientData) i); - } + + /* + * Initialize the parse tree with the special "START" node. + */ + + nodes->lexeme = START; + nodes->precedence = prec[START]; + nodes->mark = MARK_RIGHT; + nodes->constant = 1; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + + /* + * Main parsing loop parses one lexeme per iteration. We exit the loop + * only when there's a syntax error with a "goto error" which takes us to + * the error handling code following the loop, or when we've successfully + * completed the parse and we return to the caller. + */ + + while (1) { + OpNode *nodePtr; /* Points to the OpNode we may fill this pass + * through the loop. */ + unsigned char lexeme; /* The lexeme we parse this iteration. */ + Tcl_Obj *literal; /* Filled by the ParseLexeme() call when a + * literal is parsed that has a Tcl_Obj rep + * worth preserving. */ + + /* + * Each pass through this loop adds up to one more OpNode. Allocate + * space for one if required. + */ + + if (nodesUsed >= nodesAvailable) { + int size = nodesUsed * 2; + OpNode *newPtr; + + do { + newPtr = attemptckrealloc(nodes, size * sizeof(OpNode)); + } while ((newPtr == NULL) + && ((size -= (size - nodesUsed) / 2) > nodesUsed)); + if (newPtr == NULL) { + TclNewLiteralStringObj(msg, + "not enough memory to parse expression"); + errCode = "NOMEM"; + goto error; } - opTableInitialized = 1; + nodesAvailable = size; + nodes = newPtr; } - Tcl_MutexUnlock(&opMutex); - } + nodePtr = nodes + nodesUsed; + + /* + * Skip white space between lexemes. + */ + + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + scanned = ParseLexeme(start, numBytes, &lexeme, &literal); + + /* + * Use context to categorize the lexemes that are ambiguous. + */ + + if ((NODE_TYPE & lexeme) == 0) { + int b; + + switch (lexeme) { + case INVALID: + msg = Tcl_ObjPrintf("invalid character \"%.*s\"", + scanned, start); + errCode = "BADCHAR"; + goto error; + case INCOMPLETE: + msg = Tcl_ObjPrintf("incomplete operator \"%.*s\"", + scanned, start); + errCode = "PARTOP"; + goto error; + case BAREWORD: + + /* + * Most barewords in an expression are a syntax error. The + * exceptions are that when a bareword is followed by an open + * paren, it might be a function call, and when the bareword + * is a legal literal boolean value, we accept that as well. + */ + + if (start[scanned+TclParseAllWhiteSpace( + start+scanned, numBytes-scanned)] == '(') { + lexeme = FUNCTION; + + /* + * When we compile the expression we'll need the function + * name, and there's no place in the parse tree to store + * it, so we keep a separate list of all the function + * names we've parsed in the order we found them. + */ + + Tcl_ListObjAppendElement(NULL, funcList, literal); + } else if (Tcl_GetBooleanFromObj(NULL,literal,&b) == TCL_OK) { + lexeme = BOOLEAN; + } else { + Tcl_DecrRefCount(literal); + msg = Tcl_ObjPrintf("invalid bareword \"%.*s%s\"", + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "..."); + post = Tcl_ObjPrintf( + "should be \"$%.*s%s\" or \"{%.*s%s}\"", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + Tcl_AppendPrintfToObj(post, " or \"%.*s%s(...)\" or ...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + errCode = "BAREWORD"; + if (start[0] == '0') { + const char *stop; + TclParseNumber(NULL, NULL, NULL, start, scanned, + &stop, TCL_PARSE_NO_WHITESPACE); + + if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { + switch (start[1]) { + case 'b': + Tcl_AppendToObj(post, + " (invalid binary number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "BINARY"; + break; + case 'o': + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; + break; + default: + if (isdigit(UCHAR(start[1]))) { + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; + } + break; + } + } + } + goto error; + } + break; + case PLUS: + case MINUS: + if (IsOperator(lastParsed)) { + /* + * A "+" or "-" coming just after another operator must be + * interpreted as a unary operator. + */ + + lexeme |= UNARY; + } else { + lexeme |= BINARY; + } + } + } /* Uncategorized lexemes */ + + /* + * Handle lexeme based on its category. + */ + + switch (NODE_TYPE & lexeme) { + case LEAF: { + /* + * Each LEAF results in either a literal getting appended to the + * litList, or a sequence of Tcl_Tokens representing a Tcl word + * getting appended to the parsePtr->tokens. No OpNode is filled + * for this lexeme. + */ + + Tcl_Token *tokenPtr; + const char *end = start; + int wordIndex; + int code = TCL_OK; + + /* + * A leaf operand appearing just after something that's not an + * operator is a syntax error. + */ + + if (NotOperator(lastParsed)) { + msg = Tcl_ObjPrintf("missing operator at %s", mark); + errCode = "MISSING"; + scanned = 0; + insertMark = 1; + + /* + * Free any literal to avoid a memleak. + */ + + if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) { + Tcl_DecrRefCount(literal); + } + goto error; + } + + switch (lexeme) { + case NUMBER: + case BOOLEAN: + /* + * TODO: Consider using a dict or hash to collapse all + * duplicate literals into a single representative value. + * (Like what is done with [split $s {}]). + * Pro: ~75% memory saving on expressions like + * {1+1+1+1+1+.....+1} (Convert "pointer + Tcl_Obj" cost + * to "pointer" cost only) + * Con: Cost of the dict store/retrieve on every literal in + * every expression when expressions like the above tend + * to be uncommon. + * The memory savings is temporary; Compiling to bytecode + * will collapse things as literals are registered + * anyway, so the savings applies only to the time + * between parsing and compiling. Possibly important due + * to high-water mark nature of memory allocation. + */ + + Tcl_ListObjAppendElement(NULL, litList, literal); + complete = lastParsed = OT_LITERAL; + start += scanned; + numBytes -= scanned; + continue; + + default: + break; + } + + /* + * Remaining LEAF cases may involve filling Tcl_Tokens, so make + * room for at least 2 more tokens. + */ + + TclGrowParseTokenArray(parsePtr, 2); + wordIndex = parsePtr->numTokens; + tokenPtr = parsePtr->tokenPtr + wordIndex; + tokenPtr->type = TCL_TOKEN_WORD; + tokenPtr->start = start; + parsePtr->numTokens++; + + switch (lexeme) { + case QUOTED: + code = Tcl_ParseQuotedString(NULL, start, numBytes, + parsePtr, 1, &end); + scanned = end - start; + break; + + case BRACED: + code = Tcl_ParseBraces(NULL, start, numBytes, + parsePtr, 1, &end); + scanned = end - start; + break; + + case VARIABLE: + code = Tcl_ParseVarName(NULL, start, numBytes, parsePtr, 1); + + /* + * Handle the quirk that Tcl_ParseVarName reports a successful + * parse even when it gets only a "$" with no variable name. + */ + + tokenPtr = parsePtr->tokenPtr + wordIndex + 1; + if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) { + TclNewLiteralStringObj(msg, "invalid character \"$\""); + errCode = "BADCHAR"; + goto error; + } + scanned = tokenPtr->size; + break; + + case SCRIPT: { + Tcl_Parse *nestedPtr = + TclStackAlloc(interp, sizeof(Tcl_Parse)); + + tokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + tokenPtr->type = TCL_TOKEN_COMMAND; + tokenPtr->start = start; + tokenPtr->numComponents = 0; + + end = start + numBytes; + start++; + while (1) { + code = Tcl_ParseCommand(interp, start, end - start, 1, + nestedPtr); + if (code != TCL_OK) { + parsePtr->term = nestedPtr->term; + parsePtr->errorType = nestedPtr->errorType; + parsePtr->incomplete = nestedPtr->incomplete; + break; + } + start = nestedPtr->commandStart + nestedPtr->commandSize; + Tcl_FreeParse(nestedPtr); + if ((nestedPtr->term < end) && (nestedPtr->term[0] == ']') + && !nestedPtr->incomplete) { + break; + } + + if (start == end) { + TclNewLiteralStringObj(msg, "missing close-bracket"); + parsePtr->term = tokenPtr->start; + parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; + parsePtr->incomplete = 1; + code = TCL_ERROR; + errCode = "UNBALANCED"; + break; + } + } + TclStackFree(interp, nestedPtr); + end = start; + start = tokenPtr->start; + scanned = end - start; + tokenPtr->size = scanned; + parsePtr->numTokens++; + break; + } /* SCRIPT case */ + } + if (code != TCL_OK) { + /* + * Here we handle all the syntax errors generated by the + * Tcl_Token generating parsing routines called in the switch + * just above. If the value of parsePtr->incomplete is 1, then + * the error was an unbalanced '[', '(', '{', or '"' and + * parsePtr->term is pointing to that unbalanced character. If + * the value of parsePtr->incomplete is 0, then the error is + * one of lacking whitespace following a quoted word, for + * example: expr {[an error {foo}bar]}, and parsePtr->term + * points to where the whitespace is missing. We reset our + * values of start and scanned so that when our error message + * is constructed, the location of the syntax error is sure to + * appear in it, even if the quoted expression is truncated. + */ + + start = parsePtr->term; + scanned = parsePtr->incomplete; + if (parsePtr->incomplete) { + errCode = "UNBALANCED"; + } + goto error; + } + + tokenPtr = parsePtr->tokenPtr + wordIndex; + tokenPtr->size = scanned; + tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1; + if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) { + /* + * When this expression is destined to be compiled, and a + * braced or quoted word within an expression is known at + * compile time (no runtime substitutions in it), we can store + * it as a literal rather than in its tokenized form. This is + * an advantage since the compiled bytecode is going to need + * the argument in Tcl_Obj form eventually, so it's just as + * well to get there now. Another advantage is that with this + * conversion, larger constant expressions might be grown and + * optimized. + * + * On the contrary, if the end goal of this parse is to fill a + * Tcl_Parse for a caller of Tcl_ParseExpr(), then it's + * wasteful to convert to a literal only to convert back again + * later. + */ + + literal = Tcl_NewObj(); + if (TclWordKnownAtCompileTime(tokenPtr, literal)) { + Tcl_ListObjAppendElement(NULL, litList, literal); + complete = lastParsed = OT_LITERAL; + parsePtr->numTokens = wordIndex; + break; + } + Tcl_DecrRefCount(literal); + } + complete = lastParsed = OT_TOKENS; + break; + } /* case LEAF */ + + case UNARY: + + /* + * A unary operator appearing just after something that's not an + * operator is a syntax error -- something trying to be the left + * operand of an operator that doesn't take one. + */ + + if (NotOperator(lastParsed)) { + msg = Tcl_ObjPrintf("missing operator at %s", mark); + scanned = 0; + insertMark = 1; + errCode = "MISSING"; + goto error; + } + + /* + * Create an OpNode for the unary operator. + */ + + nodePtr->lexeme = lexeme; + nodePtr->precedence = prec[lexeme]; + nodePtr->mark = MARK_RIGHT; + + /* + * A FUNCTION cannot be a constant expression, because Tcl allows + * functions to return variable results with the same arguments; + * for example, rand(). Other unary operators can root a constant + * expression, so long as the argument is a constant expression. + */ + + nodePtr->constant = (lexeme != FUNCTION); + + /* + * This unary operator is a new incomplete tree, so push it onto + * our stack of incomplete trees. Also remember it as the last + * lexeme we parsed. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + break; + + case BINARY: { + OpNode *incompletePtr; + unsigned char precedence = prec[lexeme]; + + /* + * A binary operator appearing just after another operator is a + * syntax error -- one of the two operators is missing an operand. + */ + + if (IsOperator(lastParsed)) { + if ((lexeme == CLOSE_PAREN) + && (nodePtr[-1].lexeme == OPEN_PAREN)) { + if (nodePtr[-2].lexeme == FUNCTION) { + /* + * Normally, "()" is a syntax error, but as a special + * case accept it as an argument list for a function. + * Treat this as a special LEAF lexeme, and restart + * the parsing loop with zero characters scanned. We + * will parse the ")" again the next time through, but + * with the OT_EMPTY leaf as the subexpression between + * the parens. + */ + + scanned = 0; + complete = lastParsed = OT_EMPTY; + break; + } + msg = Tcl_ObjPrintf("empty subexpression at %s", mark); + scanned = 0; + insertMark = 1; + errCode = "EMPTY"; + goto error; + } + + if (nodePtr[-1].precedence > precedence) { + if (nodePtr[-1].lexeme == OPEN_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; + } else if (nodePtr[-1].lexeme == COMMA) { + msg = Tcl_ObjPrintf( + "missing function argument at %s", mark); + scanned = 0; + insertMark = 1; + errCode = "MISSING"; + } else if (nodePtr[-1].lexeme == START) { + TclNewLiteralStringObj(msg, "empty expression"); + errCode = "EMPTY"; + } + } else if (lexeme == CLOSE_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; + } else if ((lexeme == COMMA) + && (nodePtr[-1].lexeme == OPEN_PAREN) + && (nodePtr[-2].lexeme == FUNCTION)) { + msg = Tcl_ObjPrintf("missing function argument at %s", + mark); + scanned = 0; + insertMark = 1; + errCode = "UNBALANCED"; + } + if (msg == NULL) { + msg = Tcl_ObjPrintf("missing operand at %s", mark); + scanned = 0; + insertMark = 1; + errCode = "MISSING"; + } + goto error; + } + + /* + * Here is where the tree comes together. At this point, we have a + * stack of incomplete trees corresponding to substrings that are + * incomplete expressions, followed by a complete tree + * corresponding to a substring that is itself a complete + * expression, followed by the binary operator we have just + * parsed. The incomplete trees can each be completed by adding a + * right operand. + * + * To illustrate with an example, when we parse the expression + * "1+2*3-4" and we reach this point having just parsed the "-" + * operator, we have these incomplete trees: START, "1+", and + * "2*". Next we have the complete subexpression "3". Last is the + * "-" we've just parsed. + * + * The next step is to join our complete tree to an operator. The + * choice is governed by the precedence and associativity of the + * competing operators. If we connect it as the right operand of + * our most recent incomplete tree, we get a new complete tree, + * and we can repeat the process. The while loop following repeats + * this until precedence indicates it is time to join the complete + * tree as the left operand of the just parsed binary operator. + * + * Continuing the example, the first pass through the loop will + * join "3" to "2*"; the next pass will join "2*3" to "1+". Then + * we'll exit the loop and join "1+2*3" to "-". When we return to + * parse another lexeme, our stack of incomplete trees is START + * and "1+2*3-". + */ + + while (1) { + incompletePtr = nodes + incomplete; + + if (incompletePtr->precedence < precedence) { + break; + } + + if (incompletePtr->precedence == precedence) { + /* + * Right association rules for exponentiation. + */ + + if (lexeme == EXPON) { + break; + } + + /* + * Special association rules for the conditional + * operators. The "?" and ":" operators have equal + * precedence, but must be linked up in sensible pairs. + */ + + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { + break; + } + if ((incompletePtr->lexeme == COLON) + && (lexeme == QUESTION)) { + break; + } + } + + /* + * Some special syntax checks... + */ + + /* Parens must balance */ + if ((incompletePtr->lexeme == OPEN_PAREN) + && (lexeme != CLOSE_PAREN)) { + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; + goto error; + } + + /* Right operand of "?" must be ":" */ + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { + msg = Tcl_ObjPrintf("missing operator \":\" at %s", mark); + scanned = 0; + insertMark = 1; + errCode = "MISSING"; + goto error; + } + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) + && (nodes[complete].lexeme == COLON) + && (incompletePtr->lexeme != QUESTION)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" " + "without preceding \"?\""); + errCode = "SURPRISE"; + goto error; + } + + /* + * Attach complete tree as right operand of most recent + * incomplete tree. + */ + + incompletePtr->right = complete; + if (IsOperator(complete)) { + nodes[complete].p.parent = incomplete; + incompletePtr->constant = incompletePtr->constant + && nodes[complete].constant; + } else { + incompletePtr->constant = incompletePtr->constant + && (complete == OT_LITERAL); + } + + /* + * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations + * each make up a single operator. Force them to agree whether + * they have a constant expression. + */ + + if ((incompletePtr->lexeme == QUESTION) + || (incompletePtr->lexeme == FUNCTION)) { + nodes[complete].constant = incompletePtr->constant; + } + + if (incompletePtr->lexeme == START) { + /* + * Completing the START tree indicates we're done. + * Transfer the parse tree to the caller and return. + */ + + *opTreePtr = nodes; + return TCL_OK; + } + + /* + * With a right operand attached, last incomplete tree has + * become the complete tree. Pop it from the incomplete tree + * stack. + */ + + complete = incomplete; + incomplete = incompletePtr->p.prev; + + /* CLOSE_PAREN can only close one OPEN_PAREN. */ + if (incompletePtr->lexeme == OPEN_PAREN) { + break; + } + } + + /* + * More syntax checks... + */ + + /* Parens must balance. */ + if (lexeme == CLOSE_PAREN) { + if (incompletePtr->lexeme != OPEN_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; + goto error; + } + } + + /* Commas must appear only in function argument lists. */ + if (lexeme == COMMA) { + if ((incompletePtr->lexeme != OPEN_PAREN) + || (incompletePtr[-1].lexeme != FUNCTION)) { + TclNewLiteralStringObj(msg, + "unexpected \",\" outside function argument list"); + errCode = "SURPRISE"; + goto error; + } + } + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" without preceding \"?\""); + errCode = "SURPRISE"; + goto error; + } + + /* + * Create no node for a CLOSE_PAREN lexeme. + */ + + if (lexeme == CLOSE_PAREN) { + break; + } + + /* + * Link complete tree as left operand of new node. + */ + + nodePtr->lexeme = lexeme; + nodePtr->precedence = precedence; + nodePtr->mark = MARK_LEFT; + nodePtr->left = complete; + + /* + * The COMMA operator cannot be optimized, since the function + * needs all of its arguments, and optimization would reduce the + * number. Other binary operators root constant expressions when + * both arguments are constant expressions. + */ + + nodePtr->constant = (lexeme != COMMA); + + if (IsOperator(complete)) { + nodes[complete].p.parent = nodesUsed; + nodePtr->constant = nodePtr->constant + && nodes[complete].constant; + } else { + nodePtr->constant = nodePtr->constant + && (complete == OT_LITERAL); + } + + /* + * With a left operand attached and a right operand missing, the + * just-parsed binary operator is root of a new incomplete tree. + * Push it onto the stack of incomplete trees. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + break; + } /* case BINARY */ + } /* lexeme handler */ + + /* Advance past the just-parsed lexeme */ + start += scanned; + numBytes -= scanned; + } /* main parsing loop */ /* - * Initialize the structure containing information abvout this - * expression compilation. + * We only get here if there's been an error. Any errors that didn't get a + * suitable parsePtr->errorType, get recorded as syntax errors. */ - info.interp = interp; - info.parsePtr = &parse; - info.expr = script; - info.lastChar = (script + numBytes); - info.hasOperators = 0; - info.exprIsJustVarRef = 1; /* will be set 0 if anything else is seen */ - info.exprIsComparison = 0; + error: + if (parsePtr->errorType == TCL_PARSE_SUCCESS) { + parsePtr->errorType = TCL_PARSE_SYNTAX; + } /* - * Parse the expression then compile it. + * Free any partial parse tree we've built. */ - maxDepth = 0; - code = Tcl_ParseExpr(interp, script, numBytes, &parse); - if (code != TCL_OK) { - goto done; + if (nodes != NULL) { + ckfree(nodes); } - code = CompileSubExpr(parse.tokenPtr, &info, envPtr); - if (code != TCL_OK) { - Tcl_FreeParse(&parse); - goto done; - } - maxDepth = envPtr->maxStackDepth; - - if (!info.hasOperators) { + if (interp == NULL) { /* - * Attempt to convert the primary's object to an int or double. - * This is done in order to support Tcl's policy of interpreting - * operands if at all possible as first integers, else - * floating-point numbers. + * Nowhere to report an error message, so just free it. */ - - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + + if (msg) { + Tcl_DecrRefCount(msg); + } + } else { + /* + * Construct the complete error message. Start with the simple error + * message, pulled from the interp result if necessary... + */ + + if (msg == NULL) { + msg = Tcl_GetObjResult(interp); + } + + /* + * Add a detailed quote from the bad expression, displaying and + * sometimes marking the precise location of the syntax error. + */ + + Tcl_AppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", + ((start - limit) < parsePtr->string) ? "" : "...", + ((start - limit) < parsePtr->string) + ? (int) (start - parsePtr->string) : limit - 3, + ((start - limit) < parsePtr->string) + ? parsePtr->string : start - limit + 3, + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "...", insertMark ? mark : "", + (start + scanned + limit > parsePtr->end) + ? (int) (parsePtr->end - start) - scanned : limit-3, + start + scanned, + (start + scanned + limit > parsePtr->end) ? "" : "..."); + + /* + * Next, append any postscript message. + */ + + if (post != NULL) { + Tcl_AppendToObj(msg, ";\n", -1); + Tcl_AppendObjToObj(msg, post); + Tcl_DecrRefCount(post); + } + Tcl_SetObjResult(interp, msg); + + /* + * Finally, place context information in the errorInfo. + */ + + numBytes = parsePtr->end - parsePtr->string; + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (parsing expression \"%.*s%s\")", + (numBytes < limit) ? numBytes : limit - 3, + parsePtr->string, (numBytes < limit) ? "" : "...")); + if (errCode) { + Tcl_SetErrorCode(interp, "TCL", "PARSE", "EXPR", errCode, + subErrCode, NULL); + } } - Tcl_FreeParse(&parse); - done: - envPtr->maxStackDepth = maxDepth; - envPtr->exprIsJustVarRef = info.exprIsJustVarRef; - envPtr->exprIsComparison = info.exprIsComparison; - return code; + return TCL_ERROR; } /* *---------------------------------------------------------------------- * - * TclFinalizeCompilation -- + * ConvertTreeToTokens -- * - * Clean up the compilation environment so it can later be - * properly reinitialized. This procedure is called by - * TclFinalizeCompExecEnv() in tclObj.c, which in turn is called - * by Tcl_Finalize(). + * Given a string, the numBytes bytes starting at start, and an OpNode + * tree and Tcl_Token array created by passing that same string to + * ParseExpr(), this function writes into *parsePtr the sequence of + * Tcl_Tokens needed so to satisfy the historical interface provided by + * Tcl_ParseExpr(). Note that this routine exists only for the sake of + * the public Tcl_ParseExpr() routine. It is not used by Tcl itself at + * all. * * Results: * None. * * Side effects: - * Cleans up the compilation environment. At the moment, just the - * table of expression operators is freed. + * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the + * parsed expression. * *---------------------------------------------------------------------- */ -void -TclFinalizeCompilation() +static void +ConvertTreeToTokens( + const char *start, + int numBytes, + OpNode *nodes, + Tcl_Token *tokenPtr, + Tcl_Parse *parsePtr) { - Tcl_MutexLock(&opMutex); - if (opTableInitialized) { - Tcl_DeleteHashTable(&opHashTable); - opTableInitialized = 0; + int subExprTokenIdx = 0; + OpNode *nodePtr = nodes; + int next = nodePtr->right; + + while (1) { + Tcl_Token *subExprTokenPtr; + int scanned, parentIdx; + unsigned char lexeme; + + /* + * Advance the mark so the next exit from this node won't retrace + * steps over ground already covered. + */ + + nodePtr->mark++; + + /* + * Handle next child node or leaf. + */ + + switch (next) { + case OT_EMPTY: + + /* No tokens and no characters for the OT_EMPTY leaf. */ + break; + + case OT_LITERAL: + + /* + * Skip any white space that comes before the literal. + */ + + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + /* + * Reparse the literal to get pointers into source string. + */ + + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->start = start; + subExprTokenPtr->size = scanned; + subExprTokenPtr->numComponents = 1; + subExprTokenPtr[1].type = TCL_TOKEN_TEXT; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + subExprTokenPtr[1].numComponents = 0; + + parsePtr->numTokens += 2; + start += scanned; + numBytes -= scanned; + break; + + case OT_TOKENS: { + /* + * tokenPtr points to a token sequence that came from parsing a + * Tcl word. A Tcl word is made up of a sequence of one or more + * elements. When the word is only a single element, it's been the + * historical practice to replace the TCL_TOKEN_WORD token + * directly with a TCL_TOKEN_SUB_EXPR token. However, when the + * word has multiple elements, a TCL_TOKEN_WORD token is kept as a + * grouping device so that TCL_TOKEN_SUB_EXPR always has only one + * element. Wise or not, these are the rules the Tcl expr parser + * has followed, and for the sake of those few callers of + * Tcl_ParseExpr() we do not change them now. Internally, we can + * do better. + */ + + int toCopy = tokenPtr->numComponents + 1; + + if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { + /* + * Single element word. Copy tokens and convert the leading + * token to TCL_TOKEN_SUB_EXPR. + */ + + TclGrowParseTokenArray(parsePtr, toCopy); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + parsePtr->numTokens += toCopy; + } else { + /* + * Multiple element word. Create a TCL_TOKEN_SUB_EXPR token to + * lead, with fields initialized from the leading token, then + * copy entire set of word tokens. + */ + + TclGrowParseTokenArray(parsePtr, toCopy+1); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *subExprTokenPtr = *tokenPtr; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->numComponents++; + subExprTokenPtr++; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + parsePtr->numTokens += toCopy + 1; + } + + scanned = tokenPtr->start + tokenPtr->size - start; + start += scanned; + numBytes -= scanned; + tokenPtr += toCopy; + break; + } + + default: + + /* + * Advance to the child node, which is an operator. + */ + + nodePtr = nodes + next; + + /* + * Skip any white space that comes before the subexpression. + */ + + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + /* + * Generate tokens for the operator / subexpression... + */ + + switch (nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + + /* + * Historical practice has been to have no Tcl_Tokens for + * these operators. + */ + + break; + + default: { + + /* + * Remember the index of the last subexpression we were + * working on -- that of our parent. We'll stack it later. + */ + + parentIdx = subExprTokenIdx; + + /* + * Verify space for the two leading Tcl_Tokens representing + * the subexpression rooted by this operator. The first + * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second of + * type TCL_TOKEN_OPERATOR. + */ + + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenIdx = parsePtr->numTokens; + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + parsePtr->numTokens += 2; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr[1].type = TCL_TOKEN_OPERATOR; + + /* + * Our current position scanning the string is the starting + * point for this subexpression. + */ + + subExprTokenPtr->start = start; + + /* + * Eventually, we know that the numComponents field of the + * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means + * we can make other use of this field for now to track the + * stack of subexpressions we have pending. + */ + + subExprTokenPtr[1].numComponents = parentIdx; + break; + } + } + break; + } + + /* Determine which way to exit the node on this pass. */ + router: + switch (nodePtr->mark) { + case MARK_LEFT: + next = nodePtr->left; + break; + + case MARK_RIGHT: + next = nodePtr->right; + + /* + * Skip any white space that comes before the operator. + */ + + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + /* + * Here we scan from the string the operator corresponding to + * nodePtr->lexeme. + */ + + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + switch(nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + + /* + * No tokens for these lexemes -> nothing to do. + */ + + break; + + default: + + /* + * Record in the TCL_TOKEN_OPERATOR token the pointers into + * the string marking where the operator is. + */ + + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + break; + } + + start += scanned; + numBytes -= scanned; + break; + + case MARK_PARENT: + switch (nodePtr->lexeme) { + case START: + + /* When we get back to the START node, we're done. */ + return; + + case COMMA: + case COLON: + + /* No tokens for these lexemes -> nothing to do. */ + break; + + case OPEN_PAREN: + + /* + * Skip past matching close paren. + */ + + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + start += scanned; + numBytes -= scanned; + break; + + default: + + /* + * Before we leave this node/operator/subexpression for the + * last time, finish up its tokens.... + * + * Our current position scanning the string is where the + * substring for the subexpression ends. + */ + + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr->size = start - subExprTokenPtr->start; + + /* + * All the Tcl_Tokens allocated and filled belong to + * this subexpresion. The first token is the leading + * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) + * are its components. + */ + + subExprTokenPtr->numComponents = + (parsePtr->numTokens - subExprTokenIdx) - 1; + + /* + * Finally, as we return up the tree to our parent, pop the + * parent subexpression off our subexpression stack, and + * fill in the zero numComponents for the operator Tcl_Token. + */ + + parentIdx = subExprTokenPtr[1].numComponents; + subExprTokenPtr[1].numComponents = 0; + subExprTokenIdx = parentIdx; + break; + } + + /* + * Since we're returning to parent, skip child handling code. + */ + + nodePtr = nodes + nodePtr->p.parent; + goto router; + } } - Tcl_MutexUnlock(&opMutex); } /* *---------------------------------------------------------------------- * - * CompileSubExpr -- + * Tcl_ParseExpr -- * - * Given a pointer to a TCL_TOKEN_SUB_EXPR token describing a - * subexpression, this procedure emits instructions to evaluate the - * subexpression at runtime. + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and stores information about the + * structure of the expression in the Tcl_Parse struct indicated by the + * caller. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * If the string is successfully parsed as a valid Tcl expression, TCL_OK + * is returned, and data about the expression structure is written to + * *parsePtr. If the string cannot be parsed as a valid Tcl expression, + * TCL_ERROR is returned, and if interp is non-NULL, an error message is + * written to interp. * - * envPtr->maxStackDepth is updated with the maximum number of stack - * elements needed to execute the subexpression. + * Side effects: + * If there is insufficient space in parsePtr to hold all the information + * about the expression, then additional space is malloc-ed. If the + * function returns TCL_OK then the caller must eventually invoke + * Tcl_FreeParse to release any additional space that was allocated. * - * envPtr->exprIsJustVarRef is set 1 if the subexpression consisted of - * a single variable reference as in the expression of "if $b then...". - * Otherwise it is set 0. This is used to implement Tcl's two level - * expression substitution semantics properly. + *---------------------------------------------------------------------- + */ + +int +Tcl_ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. If < 0, the + * string consists of all bytes up to the + * first null character. */ + Tcl_Parse *parsePtr) /* Structure to fill with information about + * the parsed expression; any previous + * information in the structure is ignored. */ +{ + int code; + OpNode *opTree = NULL; /* Will point to the tree of operators. */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals. */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names. */ + Tcl_Parse *exprParsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions. */ + + if (numBytes < 0) { + numBytes = (start ? strlen(start) : 0); + } + + code = ParseExpr(interp, start, numBytes, &opTree, litList, funcList, + exprParsePtr, 1 /* parseOnly */); + Tcl_DecrRefCount(funcList); + Tcl_DecrRefCount(litList); + + TclParseInit(interp, start, numBytes, parsePtr); + if (code == TCL_OK) { + ConvertTreeToTokens(start, numBytes, + opTree, exprParsePtr->tokenPtr, parsePtr); + } else { + parsePtr->term = exprParsePtr->term; + parsePtr->errorType = exprParsePtr->errorType; + } + + Tcl_FreeParse(exprParsePtr); + TclStackFree(interp, exprParsePtr); + ckfree(opTree); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * ParseLexeme -- * - * envPtr->exprIsComparison is set 1 if the top-level operator in the - * subexpression is a comparison. Otherwise it is set 0. If 1, because - * the operands might be strings, the expr is compiled out-of-line in - * order to implement expr's 2 level substitution semantics properly. + * Parse a single lexeme from the start of a string, scanning no more + * than numBytes bytes. + * + * Results: + * Returns the number of bytes scanned to produce the lexeme. * * Side effects: - * Adds instructions to envPtr to evaluate the subexpression. + * Code identifying lexeme parsed is writen to *lexemePtr. * *---------------------------------------------------------------------- */ static int -CompileSubExpr(exprTokenPtr, infoPtr, envPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * to compile. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ +ParseLexeme( + const char *start, /* Start of lexeme to parse. */ + int numBytes, /* Number of bytes in string. */ + unsigned char *lexemePtr, /* Write code of parsed lexeme to this + * storage. */ + Tcl_Obj **literalPtr) /* Write corresponding literal value to this + storage, if non-NULL. */ { - Tcl_Interp *interp = infoPtr->interp; - Tcl_Token *tokenPtr, *endPtr, *afterSubexprPtr; - OperatorDesc *opDescPtr; - Tcl_HashEntry *hPtr; - char *operator; - char savedChar; - int maxDepth, objIndex, opIndex, length, code; - char buffer[TCL_UTF_MAX]; - - if (exprTokenPtr->type != TCL_TOKEN_SUB_EXPR) { - panic("CompileSubExpr: token type %d not TCL_TOKEN_SUB_EXPR\n", - exprTokenPtr->type); + const char *end; + int scanned; + Tcl_UniChar ch; + Tcl_Obj *literal = NULL; + unsigned char byte; + + if (numBytes == 0) { + *lexemePtr = END; + return 0; + } + byte = UCHAR(*start); + if (byte < sizeof(Lexeme) && Lexeme[byte] != 0) { + *lexemePtr = Lexeme[byte]; + return 1; } - maxDepth = 0; - code = TCL_OK; + switch (byte) { + case '*': + if ((numBytes > 1) && (start[1] == '*')) { + *lexemePtr = EXPON; + return 2; + } + *lexemePtr = MULT; + return 1; - /* - * Switch on the type of the first token after the subexpression token. - * After processing it, advance tokenPtr to point just after the - * subexpression's last token. - */ - - tokenPtr = exprTokenPtr+1; - TRACE(exprTokenPtr->start, exprTokenPtr->size, - tokenPtr->start, tokenPtr->size); - switch (tokenPtr->type) { - case TCL_TOKEN_WORD: - code = TclCompileTokens(interp, tokenPtr+1, - tokenPtr->numComponents, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - infoPtr->exprIsJustVarRef = 0; - break; - - case TCL_TOKEN_TEXT: - if (tokenPtr->size > 0) { - objIndex = TclRegisterLiteral(envPtr, tokenPtr->start, - tokenPtr->size, /*onHeap*/ 0); - } else { - objIndex = TclRegisterLiteral(envPtr, "", 0, /*onHeap*/ 0); - } - TclEmitPush(objIndex, envPtr); - maxDepth = 1; - tokenPtr += 1; - infoPtr->exprIsJustVarRef = 0; - break; - - case TCL_TOKEN_BS: - length = Tcl_UtfBackslash(tokenPtr->start, (int *) NULL, - buffer); - if (length > 0) { - objIndex = TclRegisterLiteral(envPtr, buffer, length, - /*onHeap*/ 0); - } else { - objIndex = TclRegisterLiteral(envPtr, "", 0, /*onHeap*/ 0); - } - TclEmitPush(objIndex, envPtr); - maxDepth = 1; - tokenPtr += 1; - infoPtr->exprIsJustVarRef = 0; - break; - - case TCL_TOKEN_COMMAND: - code = TclCompileScript(interp, tokenPtr->start+1, - tokenPtr->size-2, /*nested*/ 1, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += 1; - infoPtr->exprIsJustVarRef = 0; - break; - - case TCL_TOKEN_VARIABLE: - code = TclCompileTokens(interp, tokenPtr, 1, envPtr); - if (code != TCL_OK) { - goto done; + case '=': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = EQUAL; + return 2; + } + *lexemePtr = INCOMPLETE; + return 1; + + case '!': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = NEQ; + return 2; + } + *lexemePtr = NOT; + return 1; + + case '&': + if ((numBytes > 1) && (start[1] == '&')) { + *lexemePtr = AND; + return 2; + } + *lexemePtr = BIT_AND; + return 1; + + case '|': + if ((numBytes > 1) && (start[1] == '|')) { + *lexemePtr = OR; + return 2; + } + *lexemePtr = BIT_OR; + return 1; + + case '<': + if (numBytes > 1) { + switch (start[1]) { + case '<': + *lexemePtr = LEFT_SHIFT; + return 2; + case '=': + *lexemePtr = LEQ; + return 2; } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - break; - - case TCL_TOKEN_SUB_EXPR: - infoPtr->exprIsComparison = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + } + *lexemePtr = LESS; + return 1; + + case '>': + if (numBytes > 1) { + switch (start[1]) { + case '>': + *lexemePtr = RIGHT_SHIFT; + return 2; + case '=': + *lexemePtr = GEQ; + return 2; } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - break; - - case TCL_TOKEN_OPERATOR: - /* - * Look up the operator. Temporarily overwrite the character - * just after the end of the operator with a 0 byte. If the - * operator isn't found, treat it as a math function. - */ + } + *lexemePtr = GREATER; + return 1; + case 'i': + if ((numBytes > 1) && (start[1] == 'n') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { /* - * TODO: Note that the string is modified in place. This is unsafe - * and will break if any of the routines called while the string is - * modified have side effects that depend on the original string - * being unmodified (e.g. adding an entry to the literal table). + * Must make this check so we can tell the difference between the + * "in" operator and the "int" function name and the "infinity" + * numeric value. */ - operator = tokenPtr->start; - savedChar = operator[tokenPtr->size]; - operator[tokenPtr->size] = 0; - hPtr = Tcl_FindHashEntry(&opHashTable, operator); - if (hPtr == NULL) { - code = CompileMathFuncCall(exprTokenPtr, operator, infoPtr, - envPtr, &endPtr); - operator[tokenPtr->size] = (char) savedChar; - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr = endPtr; - infoPtr->exprIsJustVarRef = 0; - infoPtr->exprIsComparison = 0; - break; + *lexemePtr = IN_LIST; + return 2; + } + break; + + case 'e': + if ((numBytes > 1) && (start[1] == 'q') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + *lexemePtr = STREQ; + return 2; + } + break; + + case 'n': + if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + switch (start[1]) { + case 'e': + *lexemePtr = STRNEQ; + return 2; + case 'i': + *lexemePtr = NOT_IN_LIST; + return 2; } - operator[tokenPtr->size] = (char) savedChar; - opIndex = (int) Tcl_GetHashValue(hPtr); - opDescPtr = &(operatorTable[opIndex]); + } + } + + literal = Tcl_NewObj(); + if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end, + TCL_PARSE_NO_WHITESPACE) == TCL_OK) { + if (end < start + numBytes && !isalnum(UCHAR(*end)) + && UCHAR(*end) != '_') { + + number: + TclInitStringRep(literal, start, end-start); + *lexemePtr = NUMBER; + if (literalPtr) { + *literalPtr = literal; + } else { + Tcl_DecrRefCount(literal); + } + return (end-start); + } else { + unsigned char lexeme; /* - * If the operator is "normal", compile it using information - * from the operator table. + * We have a number followed directly by bareword characters + * (alpha, digit, underscore). Is this a number followed by + * bareword syntax error? Or should we join into one bareword? + * Example: Inf + luence + () becomes a valid function call. + * [Bug 3401704] */ - - if (opDescPtr->numOperands > 0) { - tokenPtr++; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - - if (opDescPtr->numOperands == 2) { - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + if (literal->typePtr == &tclDoubleType) { + const char *p = start; + + while (p < end) { + if (!isalnum(UCHAR(*p++))) { + /* + * The number has non-bareword characters, so we + * must treat it as a number. + */ + goto number; } - maxDepth = TclMax((envPtr->maxStackDepth + 1), - maxDepth); - tokenPtr += (tokenPtr->numComponents + 1); } - TclEmitOpcode(opDescPtr->instruction, envPtr); - infoPtr->hasOperators = 1; - infoPtr->exprIsJustVarRef = 0; - infoPtr->exprIsComparison = - ((opIndex >= OP_LESS) && (opIndex <= OP_NEQ)); - break; } - + ParseLexeme(end, numBytes-(end-start), &lexeme, NULL); + if ((NODE_TYPE & lexeme) == BINARY) { + /* + * The bareword characters following the number take the + * form of an operator (eq, ne, in, ni, ...) so we treat + * as number + operator. + */ + goto number; + } + /* - * The operator requires special treatment, and is either - * "+" or "-", or one of "&&", "||" or "?". + * Otherwise, fall through and parse the whole as a bareword. */ - - switch (opIndex) { - case OP_PLUS: - case OP_MINUS: - tokenPtr++; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Check whether the "+" or "-" is unary. - */ - - afterSubexprPtr = exprTokenPtr - + exprTokenPtr->numComponents+1; - if (tokenPtr == afterSubexprPtr) { - TclEmitOpcode(((opIndex==OP_PLUS)? - INST_UPLUS : INST_UMINUS), - envPtr); - break; - } - - /* - * The "+" or "-" is binary. - */ - - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = TclMax((envPtr->maxStackDepth + 1), - maxDepth); - tokenPtr += (tokenPtr->numComponents + 1); - TclEmitOpcode(((opIndex==OP_PLUS)? INST_ADD : INST_SUB), - envPtr); - break; + } + } - case OP_LAND: - case OP_LOR: - code = CompileLandOrLorExpr(exprTokenPtr, opIndex, - infoPtr, envPtr, &endPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr = endPtr; - break; - - case OP_QUESTY: - code = CompileCondExpr(exprTokenPtr, infoPtr, - envPtr, &endPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr = endPtr; - break; - - default: - panic("CompileSubExpr: unexpected operator %d requiring special treatment\n", - opIndex); - } /* end switch on operator requiring special treatment */ - infoPtr->hasOperators = 1; - infoPtr->exprIsJustVarRef = 0; - infoPtr->exprIsComparison = 0; - break; + if (Tcl_UtfCharComplete(start, numBytes)) { + scanned = Tcl_UtfToUniChar(start, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; - default: - panic("CompileSubExpr: unexpected token type %d\n", - tokenPtr->type); + memcpy(utfBytes, start, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); } - - /* - * Verify that the subexpression token had the required number of - * subtokens: that we've advanced tokenPtr just beyond the - * subexpression's last token. For example, a "*" subexpression must - * contain the tokens for exactly two operands. - */ - - if (tokenPtr != (exprTokenPtr + exprTokenPtr->numComponents+1)) { - LogSyntaxError(infoPtr); - code = TCL_ERROR; + if (!isalnum(UCHAR(ch))) { + *lexemePtr = INVALID; + Tcl_DecrRefCount(literal); + return scanned; } - - done: - envPtr->maxStackDepth = maxDepth; - return code; + end = start; + while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { + end += scanned; + numBytes -= scanned; + if (Tcl_UtfCharComplete(end, numBytes)) { + scanned = Tcl_UtfToUniChar(end, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + + memcpy(utfBytes, end, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + } + *lexemePtr = BAREWORD; + if (literalPtr) { + Tcl_SetStringObj(literal, start, (int) (end-start)); + *literalPtr = literal; + } else { + Tcl_DecrRefCount(literal); + } + return (end-start); } /* *---------------------------------------------------------------------- * - * CompileLandOrLorExpr -- + * TclCompileExpr -- * - * This procedure compiles a Tcl logical and ("&&") or logical or - * ("||") subexpression. + * This procedure compiles a string containing a Tcl expression into Tcl + * bytecodes. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. - * - * envPtr->maxStackDepth is updated with the maximum number of stack - * elements needed to execute the expression. + * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. @@ -660,389 +2109,699 @@ CompileSubExpr(exprTokenPtr, infoPtr, envPtr) *---------------------------------------------------------------------- */ -static int -CompileLandOrLorExpr(exprTokenPtr, opIndex, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "&&" or "||" operator. */ - int opIndex; /* A code describing the expression - * operator: either OP_LAND or OP_LOR. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +void +TclCompileExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *script, /* The source script to compile. */ + int numBytes, /* Number of bytes in script. */ + CompileEnv *envPtr, /* Holds resulting instructions. */ + int optimize) /* 0 for one-off expressions. */ { - JumpFixup shortCircuitFixup; /* Used to fix up the short circuit jump - * after the first subexpression. */ - JumpFixup lhsTrueFixup, lhsEndFixup; - /* Used to fix up jumps used to convert the - * first operand to 0 or 1. */ - Tcl_Token *tokenPtr; - int dist, maxDepth, code; + OpNode *opTree = NULL; /* Will point to the tree of operators */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ + Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions */ - /* - * Emit code for the first operand. - */ - - maxDepth = 0; - tokenPtr = exprTokenPtr+2; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); + int code = ParseExpr(interp, script, numBytes, &opTree, litList, + funcList, parsePtr, 0 /* parseOnly */); - /* - * Convert the first operand to the result that Tcl requires: - * "0" or "1". Eventually we'll use a new instruction for this. - */ - - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &lhsTrueFixup); - TclEmitPush(TclRegisterLiteral(envPtr, "0", 1, /*onHeap*/ 0), envPtr); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &lhsEndFixup); - dist = (envPtr->codeNext - envPtr->codeStart) - lhsTrueFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &lhsTrueFixup, dist, 127)) { - badDist: - panic("CompileLandOrLorExpr: bad jump distance %d\n", dist); - } - TclEmitPush(TclRegisterLiteral(envPtr, "1", 1, /*onHeap*/ 0), envPtr); - dist = (envPtr->codeNext - envPtr->codeStart) - lhsEndFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &lhsEndFixup, dist, 127)) { - goto badDist; - } + if (code == TCL_OK) { + /* + * Valid parse; compile the tree. + */ - /* - * Emit the "short circuit" jump around the rest of the expression. - * Duplicate the "0" or "1" on top of the stack first to keep the - * jump from consuming it. - */ + int objc; + Tcl_Obj *const *litObjv; + Tcl_Obj **funcObjv; - TclEmitOpcode(INST_DUP, envPtr); - TclEmitForwardJump(envPtr, - ((opIndex==OP_LAND)? TCL_FALSE_JUMP : TCL_TRUE_JUMP), - &shortCircuitFixup); + /* TIP #280 : Track Lines within the expression */ + TclAdvanceLines(&envPtr->line, script, + script + TclParseAllWhiteSpace(script, numBytes)); - /* - * Emit code for the second operand. - */ - - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + TclListObjGetElements(NULL, litList, &objc, (Tcl_Obj ***)&litObjv); + TclListObjGetElements(NULL, funcList, &objc, &funcObjv); + CompileExprTree(interp, opTree, 0, &litObjv, funcObjv, + parsePtr->tokenPtr, envPtr, optimize); + } else { + TclCompileSyntaxError(interp, envPtr); } - maxDepth = TclMax((envPtr->maxStackDepth + 1), maxDepth); - tokenPtr += (tokenPtr->numComponents + 1); - /* - * Emit a "logical and" or "logical or" instruction. This does not try - * to "short- circuit" the evaluation of both operands, but instead - * ensures that we either have a "1" or a "0" result. - */ + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); + Tcl_DecrRefCount(funcList); + Tcl_DecrRefCount(litList); + ckfree(opTree); +} + +/* + *---------------------------------------------------------------------- + * + * ExecConstantExprTree -- + * Compiles and executes bytecode for the subexpression tree at index + * in the nodes array. This subexpression must be constant, made up + * of only constant operators (not functions) and literals. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. + * + *---------------------------------------------------------------------- + */ - TclEmitOpcode(((opIndex==OP_LAND)? INST_LAND : INST_LOR), envPtr); +static int +ExecConstantExprTree( + Tcl_Interp *interp, + OpNode *nodes, + int index, + Tcl_Obj *const **litObjvPtr) +{ + CompileEnv *envPtr; + ByteCode *byteCodePtr; + int code; + Tcl_Obj *byteCodeObj = Tcl_NewObj(); + NRE_callback *rootPtr = TOP_CB(interp); /* - * Now that we know the target of the forward jump, update it with the - * correct distance. + * Note we are compiling an expression with literal arguments. This means + * there can be no [info frame] calls when we execute the resulting + * bytecode, so there's no need to tend to TIP 280 issues. */ - dist = (envPtr->codeNext - envPtr->codeStart) - - shortCircuitFixup.codeOffset; - TclFixupForwardJump(envPtr, &shortCircuitFixup, dist, 127); - *endPtrPtr = tokenPtr; - - done: - envPtr->maxStackDepth = maxDepth; + envPtr = TclStackAlloc(interp, sizeof(CompileEnv)); + TclInitCompileEnv(interp, envPtr, NULL, 0, NULL, 0); + CompileExprTree(interp, nodes, index, litObjvPtr, NULL, NULL, envPtr, + 0 /* optimize */); + TclEmitOpcode(INST_DONE, envPtr); + Tcl_IncrRefCount(byteCodeObj); + TclInitByteCodeObj(byteCodeObj, envPtr); + TclFreeCompileEnv(envPtr); + TclStackFree(interp, envPtr); + byteCodePtr = byteCodeObj->internalRep.twoPtrValue.ptr1; + TclNRExecuteByteCode(interp, byteCodePtr); + code = TclNRRunCallbacks(interp, TCL_OK, rootPtr); + Tcl_DecrRefCount(byteCodeObj); return code; } /* *---------------------------------------------------------------------- * - * CompileCondExpr -- + * CompileExprTree -- * - * This procedure compiles a Tcl conditional expression: - * condExpr ::= lorExpr ['?' condExpr ':' condExpr] + * Compiles and writes to envPtr instructions for the subexpression tree + * at index in the nodes array. (*litObjvPtr) must point to the proper + * location in a corresponding literals list. Likewise, when non-NULL, + * funcObjv and tokenPtr must point into matching arrays of function + * names and Tcl_Token's derived from earlier call to ParseExpr(). When + * optimize is true, any constant subexpressions will be precomputed. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. - * - * envPtr->maxStackDepth is updated with the maximum number of stack - * elements needed to execute the expression. + * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. * *---------------------------------------------------------------------- */ -static int -CompileCondExpr(exprTokenPtr, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "?" operator. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +static void +CompileExprTree( + Tcl_Interp *interp, + OpNode *nodes, + int index, + Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, + Tcl_Token *tokenPtr, + CompileEnv *envPtr, + int optimize) { - JumpFixup jumpAroundThenFixup, jumpAroundElseFixup; - /* Used to update or replace one-byte jumps - * around the then and else expressions when - * their target PCs are determined. */ - Tcl_Token *tokenPtr; - int elseCodeOffset, dist, maxDepth, code; + OpNode *nodePtr = nodes + index; + OpNode *rootPtr = nodePtr; + int numWords = 0; + JumpList *jumpPtr = NULL; + int convert = 1; - /* - * Emit code for the test. - */ + while (1) { + int next; + JumpList *freePtr, *newJump; - maxDepth = 0; - tokenPtr = exprTokenPtr+2; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = envPtr->maxStackDepth; - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Emit the jump to the "else" expression if the test was false. - */ - - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpAroundThenFixup); + if (nodePtr->mark == MARK_LEFT) { + next = nodePtr->left; - /* - * Compile the "then" expression. Note that if a subexpression is only - * a primary, we need to try to convert it to numeric. We do this to - * support Tcl's policy of interpreting operands if at all possible as - * first integers, else floating-point numbers. - */ + if (nodePtr->lexeme == QUESTION) { + convert = 1; + } + } else if (nodePtr->mark == MARK_RIGHT) { + next = nodePtr->right; + + switch (nodePtr->lexeme) { + case FUNCTION: { + Tcl_DString cmdName; + const char *p; + int length; + + Tcl_DStringInit(&cmdName); + TclDStringAppendLiteral(&cmdName, "tcl::mathfunc::"); + p = TclGetStringFromObj(*funcObjv, &length); + funcObjv++; + Tcl_DStringAppend(&cmdName, p, length); + TclEmitPush(TclRegisterNewCmdLiteral(envPtr, + Tcl_DStringValue(&cmdName), + Tcl_DStringLength(&cmdName)), envPtr); + Tcl_DStringFree(&cmdName); + + /* + * Start a count of the number of words in this function + * command invocation. In case there's already a count in + * progress (nested functions), save it in our unused "left" + * field for restoring later. + */ + + nodePtr->left = numWords; + numWords = 2; /* Command plus one argument */ + break; + } + case QUESTION: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump); + break; + case COLON: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, + &jumpPtr->jump); + TclAdjustStackDepth(-1, envPtr); + if (convert) { + jumpPtr->jump.jumpType = TCL_TRUE_JUMP; + } + convert = 1; + break; + case AND: + case OR: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) + ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, &jumpPtr->jump); + break; + } + } else { + int pc1, pc2, target; + + switch (nodePtr->lexeme) { + case START: + case QUESTION: + if (convert && (nodePtr == rootPtr)) { + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + } + break; + case OPEN_PAREN: - infoPtr->hasOperators = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = TclMax(envPtr->maxStackDepth, maxDepth); - tokenPtr += (tokenPtr->numComponents + 1); - if (!infoPtr->hasOperators) { - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } + /* do nothing */ + break; + case FUNCTION: + /* + * Use the numWords count we've kept to invoke the function + * command with the correct number of arguments. + */ + + if (numWords < 255) { + TclEmitInvoke(envPtr, INST_INVOKE_STK1, numWords); + } else { + TclEmitInvoke(envPtr, INST_INVOKE_STK4, numWords); + } - /* - * Emit an unconditional jump around the "else" condExpr. - */ - - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &jumpAroundElseFixup); + /* + * Restore any saved numWords value. + */ - /* - * Compile the "else" expression. - */ + numWords = nodePtr->left; + convert = 1; + break; + case COMMA: + /* + * Each comma implies another function argument. + */ - elseCodeOffset = (envPtr->codeNext - envPtr->codeStart); - infoPtr->hasOperators = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - maxDepth = TclMax(envPtr->maxStackDepth, maxDepth); - tokenPtr += (tokenPtr->numComponents + 1); - if (!infoPtr->hasOperators) { - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } + numWords++; + break; + case COLON: + CLANG_ASSERT(jumpPtr); + if (jumpPtr->jump.jumpType == TCL_TRUE_JUMP) { + jumpPtr->jump.jumpType = TCL_UNCONDITIONAL_JUMP; + convert = 1; + } + target = jumpPtr->jump.codeOffset + 2; + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + target += 3; + } + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + TclFixupForwardJump(envPtr, &jumpPtr->jump, + target - jumpPtr->jump.codeOffset, 127); + + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + case AND: + case OR: + CLANG_ASSERT(jumpPtr); + pc1 = CurrentOffset(envPtr); + TclEmitInstInt1((nodePtr->lexeme == AND) ? INST_JUMP_FALSE1 + : INST_JUMP_TRUE1, 0, envPtr); + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); + pc2 = CurrentOffset(envPtr); + TclEmitInstInt1(INST_JUMP1, 0, envPtr); + TclAdjustStackDepth(-1, envPtr); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc1, + envPtr->codeStart + pc1 + 1); + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + pc2 += 3; + } + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc2, + envPtr->codeStart + pc2 + 1); + convert = 0; + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + default: + TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); + convert = 0; + break; + } + if (nodePtr == rootPtr) { + /* We're done */ - /* - * Fix up the second jump around the "else" expression. - */ + return; + } + nodePtr = nodes + nodePtr->p.parent; + continue; + } - dist = (envPtr->codeNext - envPtr->codeStart) - - jumpAroundElseFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &jumpAroundElseFixup, dist, 127)) { - /* - * Update the else expression's starting code offset since it - * moved down 3 bytes too. - */ - - elseCodeOffset += 3; - } - - /* - * Fix up the first jump to the "else" expression if the test was false. - */ - - dist = (elseCodeOffset - jumpAroundThenFixup.codeOffset); - TclFixupForwardJump(envPtr, &jumpAroundThenFixup, dist, 127); - *endPtrPtr = tokenPtr; + nodePtr->mark++; + switch (next) { + case OT_EMPTY: + numWords = 1; /* No arguments, so just the command */ + break; + case OT_LITERAL: { + Tcl_Obj *const *litObjv = *litObjvPtr; + Tcl_Obj *literal = *litObjv; + + if (optimize) { + int length; + const char *bytes = TclGetStringFromObj(literal, &length); + int index = TclRegisterNewLiteral(envPtr, bytes, length); + Tcl_Obj *objPtr = TclFetchLiteral(envPtr, index); + + if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { + /* + * Would like to do this: + * + * lePtr->objPtr = literal; + * Tcl_IncrRefCount(literal); + * Tcl_DecrRefCount(objPtr); + * + * However, the design of the "global" and "local" + * LiteralTable does not permit the value of lePtr->objPtr + * to change. So rather than replace lePtr->objPtr, we do + * surgery to transfer our desired intrep into it. + */ - done: - envPtr->maxStackDepth = maxDepth; - return code; + objPtr->typePtr = literal->typePtr; + objPtr->internalRep = literal->internalRep; + literal->typePtr = NULL; + } + TclEmitPush(index, envPtr); + } else { + /* + * When optimize==0, we know the expression is a one-off and + * there's nothing to be gained from sharing literals when + * they won't live long, and the copies we have already have + * an appropriate intrep. In this case, skip literal + * registration that would enable sharing, and use the routine + * that preserves intreps. + */ + + TclEmitPush(TclAddLiteralObj(envPtr, literal, NULL), envPtr); + } + (*litObjvPtr)++; + break; + } + case OT_TOKENS: + CompileTokens(envPtr, tokenPtr, interp); + tokenPtr += tokenPtr->numComponents + 1; + break; + default: + if (optimize && nodes[next].constant) { + Tcl_InterpState save = Tcl_SaveInterpState(interp, TCL_OK); + + if (ExecConstantExprTree(interp, nodes, next, litObjvPtr) + == TCL_OK) { + int index; + Tcl_Obj *objPtr = Tcl_GetObjResult(interp); + + /* + * Don't generate a string rep, but if we have one + * already, then use it to share via the literal table. + */ + + if (objPtr->bytes) { + Tcl_Obj *tableValue; + + index = TclRegisterNewLiteral(envPtr, objPtr->bytes, + objPtr->length); + tableValue = TclFetchLiteral(envPtr, index); + if ((tableValue->typePtr == NULL) && + (objPtr->typePtr != NULL)) { + /* + * Same intrep surgery as for OT_LITERAL. + */ + + tableValue->typePtr = objPtr->typePtr; + tableValue->internalRep = objPtr->internalRep; + objPtr->typePtr = NULL; + } + } else { + index = TclAddLiteralObj(envPtr, objPtr, NULL); + } + TclEmitPush(index, envPtr); + } else { + TclCompileSyntaxError(interp, envPtr); + } + Tcl_RestoreInterpState(interp, save); + convert = 0; + } else { + nodePtr = nodes + next; + } + } + } } /* *---------------------------------------------------------------------- * - * CompileMathFuncCall -- + * TclSingleOpCmd -- * - * This procedure compiles a call on a math function in an expression: - * mathFuncCall ::= funcName '(' [condExpr {',' condExpr}] ')' + * Implements the commands: ~, !, <<, >>, %, !=, ne, in, ni + * in the ::tcl::mathop namespace. These commands have no + * extension to arbitrary arguments; they accept only exactly one + * or exactly two arguments as suitable for the operator. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * A standard Tcl return code and result left in interp. * - * envPtr->maxStackDepth is updated with the maximum number of stack - * elements needed to execute the function. + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclSingleOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + TclOpCmdClientData *occdPtr = clientData; + unsigned char lexeme; + OpNode nodes[2]; + Tcl_Obj *const *litObjv = objv + 1; + + if (objc != 1 + occdPtr->i.numArgs) { + Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); + return TCL_ERROR; + } + + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + if (objc == 2) { + nodes[1].mark = MARK_RIGHT; + } else { + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + } + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; + + return ExecConstantExprTree(interp, nodes, 0, &litObjv); +} + +/* + *---------------------------------------------------------------------- + * + * TclSortingOpCmd -- + * Implements the commands: + * <, <=, >, >=, ==, eq + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by computing the AND of the base + * operator applied to all neighbor argument pairs. + * + * Results: + * A standard Tcl return code and result left in interp. * * Side effects: - * Adds instructions to envPtr to evaluate the math function at - * runtime. + * None. * *---------------------------------------------------------------------- */ -static int -CompileMathFuncCall(exprTokenPtr, funcName, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the math function call. */ - char *funcName; /* Name of the math function. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +int +TclSortingOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - Tcl_Interp *interp = infoPtr->interp; - Interp *iPtr = (Interp *) interp; - MathFunc *mathFuncPtr; - Tcl_HashEntry *hPtr; - Tcl_Token *tokenPtr, *afterSubexprPtr; - int maxDepth, code, i; + int code = TCL_OK; - /* - * Look up the MathFunc record for the function. - */ + if (objc < 3) { + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); + } else { + TclOpCmdClientData *occdPtr = clientData; + Tcl_Obj **litObjv = TclStackAlloc(interp, + 2 * (objc-2) * sizeof(Tcl_Obj *)); + OpNode *nodes = TclStackAlloc(interp, 2 * (objc-2) * sizeof(OpNode)); + unsigned char lexeme; + int i, lastAnd = 1; + Tcl_Obj *const *litObjPtrPtr = litObjv; + + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + + litObjv[0] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + for (i=2; i<objc-1; i++) { + litObjv[2*(i-1)-1] = objv[i]; + nodes[2*(i-1)-1].lexeme = lexeme; + nodes[2*(i-1)-1].mark = MARK_LEFT; + nodes[2*(i-1)-1].left = OT_LITERAL; + nodes[2*(i-1)-1].right = OT_LITERAL; + + litObjv[2*(i-1)] = objv[i]; + nodes[2*(i-1)].lexeme = AND; + nodes[2*(i-1)].mark = MARK_LEFT; + nodes[2*(i-1)].left = lastAnd; + nodes[lastAnd].p.parent = 2*(i-1); + + nodes[2*(i-1)].right = 2*(i-1)+1; + nodes[2*(i-1)+1].p.parent= 2*(i-1); + + lastAnd = 2*(i-1); + } + litObjv[2*(objc-2)-1] = objv[objc-1]; - code = TCL_OK; - maxDepth = 0; - hPtr = Tcl_FindHashEntry(&iPtr->mathFuncTable, funcName); - if (hPtr == NULL) { - Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), - "unknown math function \"", funcName, "\"", (char *) NULL); - code = TCL_ERROR; - goto done; + nodes[2*(objc-2)-1].lexeme = lexeme; + nodes[2*(objc-2)-1].mark = MARK_LEFT; + nodes[2*(objc-2)-1].left = OT_LITERAL; + nodes[2*(objc-2)-1].right = OT_LITERAL; + + nodes[0].right = lastAnd; + nodes[lastAnd].p.parent = 0; + + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); + + TclStackFree(interp, nodes); + TclStackFree(interp, litObjv); } - mathFuncPtr = (MathFunc *) Tcl_GetHashValue(hPtr); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * TclVariadicOpCmd -- + * Implements the commands: +, *, &, |, ^, ** + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by repeatedly applying the base + * operator with suitable associative rules. When fewer than two + * arguments are provided, suitable identity values are returned. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ - /* - * If not a builtin function, push an object with the function's name. - */ +int +TclVariadicOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + TclOpCmdClientData *occdPtr = clientData; + unsigned char lexeme; + int code; - if (mathFuncPtr->builtinFuncIndex < 0) { - TclEmitPush(TclRegisterLiteral(envPtr, funcName, -1, /*onHeap*/ 0), - envPtr); - maxDepth = 1; + if (objc < 2) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(occdPtr->i.identity)); + return TCL_OK; } - /* - * Compile any arguments for the function. - */ + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + lexeme |= BINARY; + + if (objc == 2) { + Tcl_Obj *litObjv[2]; + OpNode nodes[2]; + int decrMe = 0; + Tcl_Obj *const *litObjPtrPtr = litObjv; + + if (lexeme == EXPON) { + litObjv[1] = Tcl_NewIntObj(occdPtr->i.identity); + Tcl_IncrRefCount(litObjv[1]); + decrMe = 1; + litObjv[0] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; + } else { + if (lexeme == DIVIDE) { + litObjv[0] = Tcl_NewDoubleObj(1.0); + } else { + litObjv[0] = Tcl_NewIntObj(occdPtr->i.identity); + } + Tcl_IncrRefCount(litObjv[0]); + litObjv[1] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; + } - tokenPtr = exprTokenPtr+2; - afterSubexprPtr = exprTokenPtr + (exprTokenPtr->numComponents + 1); - if (mathFuncPtr->numArgs > 0) { - for (i = 0; i < mathFuncPtr->numArgs; i++) { - if (tokenPtr == afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too few arguments for math function", -1); - code = TCL_ERROR; - goto done; + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); + + Tcl_DecrRefCount(litObjv[decrMe]); + return code; + } else { + Tcl_Obj *const *litObjv = objv + 1; + OpNode *nodes = TclStackAlloc(interp, (objc-1) * sizeof(OpNode)); + int i, lastOp = OT_LITERAL; + + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + if (lexeme == EXPON) { + for (i=objc-2; i>0; i--) { + nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; + nodes[i].left = OT_LITERAL; + nodes[i].right = lastOp; + if (lastOp >= 0) { + nodes[lastOp].p.parent = i; + } + lastOp = i; } - infoPtr->exprIsComparison = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + } else { + for (i=1; i<objc-1; i++) { + nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; + nodes[i].left = lastOp; + if (lastOp >= 0) { + nodes[lastOp].p.parent = i; + } + nodes[i].right = OT_LITERAL; + lastOp = i; } - tokenPtr += (tokenPtr->numComponents + 1); - maxDepth++; } - if (tokenPtr != afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too many arguments for math function", -1); - code = TCL_ERROR; - goto done; - } - } else if (tokenPtr != afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too many arguments for math function", -1); - code = TCL_ERROR; - goto done; - } - - /* - * Compile the call on the math function. Note that the "objc" argument - * count for non-builtin functions is incremented by 1 to include the - * function name itself. - */ + nodes[0].right = lastOp; + nodes[lastOp].p.parent = 0; - if (mathFuncPtr->builtinFuncIndex >= 0) { /* a builtin function */ - TclEmitInstInt1(INST_CALL_BUILTIN_FUNC1, - mathFuncPtr->builtinFuncIndex, envPtr); - } else { - TclEmitInstInt1(INST_CALL_FUNC1, (mathFuncPtr->numArgs+1), envPtr); - } - *endPtrPtr = afterSubexprPtr; + code = ExecConstantExprTree(interp, nodes, 0, &litObjv); - done: - envPtr->maxStackDepth = maxDepth; - return code; + TclStackFree(interp, nodes); + return code; + } } /* *---------------------------------------------------------------------- * - * LogSyntaxError -- - * - * This procedure is invoked after an error occurs when compiling an - * expression. It sets the interpreter result to an error message - * describing the error. + * TclNoIdentOpCmd -- + * Implements the commands: -, / + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary non-zero number of arguments by repeatedly applying the base + * operator with suitable associative rules. When no arguments are + * provided, an error is raised. * * Results: - * None. + * A standard Tcl return code and result left in interp. * * Side effects: - * Sets the interpreter result to an error message describing the - * expression that was being compiled when the error occurred. + * None. * *---------------------------------------------------------------------- */ -static void -LogSyntaxError(infoPtr) - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ +int +TclNoIdentOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - int numBytes = (infoPtr->lastChar - infoPtr->expr); - char buffer[100]; + TclOpCmdClientData *occdPtr = clientData; - sprintf(buffer, "syntax error in expression \"%.*s\"", - ((numBytes > 60)? 60 : numBytes), infoPtr->expr); - Tcl_AppendStringsToObj(Tcl_GetObjResult(infoPtr->interp), - buffer, (char *) NULL); + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); + return TCL_ERROR; + } + return TclVariadicOpCmd(clientData, interp, objc, objv); } +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |