diff options
Diffstat (limited to 'generic/tclCompExpr.c')
| -rw-r--r-- | generic/tclCompExpr.c | 3229 |
1 files changed, 747 insertions, 2482 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 999fe0a..6bac221 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1,2020 +1,578 @@ -/* +/* * tclCompExpr.c -- * - * This file contains the code to parse and compile Tcl expressions - * and implementations of the Tcl commands corresponding to expression - * operators, such as the command ::tcl::mathop::+ . + * This file contains the code to compile Tcl expressions. * - * Contributions from Don Porter, NIST, 2006-2007. (not subject to US copyright) + * Copyright (c) 1997 Sun Microsystems, Inc. + * Copyright (c) 1998-2000 by Scriptics Corporation. * - * See the file "license.terms" for information on usage and redistribution of - * this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclCompile.h" /* CompileEnv */ +#include "tclCompile.h" /* - * Expression parsing takes place in the routine ParseExpr(). It takes a - * string as input, parses that string, and generates a representation of - * the expression in the form of a tree of operators, a list of literals, - * a list of function names, and an array of Tcl_Token's within a Tcl_Parse - * struct. The tree is composed of OpNodes. + * The stuff below is a bit of a hack so that this file can be used in + * environments that include no UNIX, i.e. no errno: just arrange to use + * the errno from tclExecute.c here. */ -typedef struct OpNode { - int left; /* "Pointer" to the left operand. */ - int right; /* "Pointer" to the right operand. */ - union { - int parent; /* "Pointer" to the parent operand. */ - int prev; /* "Pointer" joining incomplete tree stack */ - } p; - unsigned char lexeme; /* Code that identifies the operator. */ - unsigned char precedence; /* Precedence of the operator */ - unsigned char mark; /* Mark used to control traversal. */ - unsigned char constant; /* Flag marking constant subexpressions. */ -} OpNode; - -/* - * The storage for the tree is dynamically allocated array of OpNodes. The - * array is grown as parsing needs dictate according to a scheme similar to - * Tcl's string growth algorithm, so that the resizing costs are O(N) and so - * that we use at least half the memory allocated as expressions get large. - * - * Each OpNode in the tree represents an operator in the expression, either - * unary or binary. When parsing is completed successfully, a binary operator - * OpNode will have its left and right fields filled with "pointers" to its - * left and right operands. A unary operator OpNode will have its right field - * filled with a pointer to its single operand. When an operand is a - * subexpression the "pointer" takes the form of the index -- a non-negative - * integer -- into the OpNode storage array where the root of that - * subexpression parse tree is found. - * - * Non-operator elements of the expression do not get stored in the OpNode - * tree. They are stored in the other structures according to their type. - * Literal values get appended to the literal list. Elements that denote - * forms of quoting or substitution known to the Tcl parser get stored as - * Tcl_Tokens. These non-operator elements of the expression are the - * leaves of the completed parse tree. When an operand of an OpNode is - * one of these leaf elements, the following negative integer codes are used - * to indicate which kind of elements it is. - */ +#ifndef TCL_GENERIC_ONLY +#include "tclPort.h" +#else +#define NO_ERRNO_H +#endif -enum OperandTypes { - OT_LITERAL = -3, /* Operand is a literal in the literal list */ - OT_TOKENS = -2, /* Operand is sequence of Tcl_Tokens */ - OT_EMPTY = -1 /* "Operand" is an empty string. This is a - * special case used only to represent the - * EMPTY lexeme. See below. */ -}; +#ifdef NO_ERRNO_H +extern int errno; /* Use errno from tclExecute.c. */ +#define ERANGE 34 +#endif /* - * Readable macros to test whether a "pointer" value points to an operator. - * They operate on the "non-negative integer -> operator; negative integer -> - * a non-operator OperandType" distinction. + * Boolean variable that controls whether expression compilation tracing + * is enabled. */ -#define IsOperator(l) ((l) >= 0) -#define NotOperator(l) ((l) < 0) +#ifdef TCL_COMPILE_DEBUG +static int traceExprComp = 0; +#endif /* TCL_COMPILE_DEBUG */ /* - * Note that it is sufficient to store in the tree just the type of leaf - * operand, without any explicit pointer to which leaf. This is true because - * the traversals of the completed tree we perform are known to visit - * the leaves in the same order as the original parse. - * - * In a completed parse tree, those OpNodes that are themselves (roots of - * subexpression trees that are) operands of some operator store in their - * p.parent field a "pointer" to the OpNode of that operator. The p.parent - * field permits a traversal of the tree within a * non-recursive routine - * (ConvertTreeToTokens() and CompileExprTree()). This means that even - * expression trees of great depth pose no risk of blowing the C stack. - * - * While the parse tree is being constructed, the same memory space is used - * to hold the p.prev field which chains together a stack of incomplete - * trees awaiting their right operands. - * - * The lexeme field is filled in with the lexeme of the operator that is - * returned by the ParseLexeme() routine. Only lexemes for unary and - * binary operators get stored in an OpNode. Other lexmes get different - * treatement. - * - * The precedence field provides a place to store the precedence of the - * operator, so it need not be looked up again and again. - * - * The mark field is use to control the traversal of the tree, so - * that it can be done non-recursively. The mark values are: + * The ExprInfo structure describes the state of compiling an expression. + * A pointer to an ExprInfo record is passed among the routines in + * this module. */ -enum Marks { - MARK_LEFT, /* Next step of traversal is to visit left subtree */ - MARK_RIGHT, /* Next step of traversal is to visit right subtree */ - MARK_PARENT /* Next step of traversal is to return to parent */ -}; +typedef struct ExprInfo { + Tcl_Interp *interp; /* Used for error reporting. */ + Tcl_Parse *parsePtr; /* Structure filled with information about + * the parsed expression. */ + CONST char *expr; /* The expression that was originally passed + * to TclCompileExpr. */ + CONST char *lastChar; /* Points just after last byte of expr. */ + int hasOperators; /* Set 1 if the expr has operators; 0 if + * expr is only a primary. If 1 after + * compiling an expr, a tryCvtToNumeric + * instruction is emitted to convert the + * primary to a number if possible. */ +} ExprInfo; /* - * The constant field is a boolean flag marking which subexpressions are - * completely known at compile time, and are eligible for computing then - * rather than waiting until run time. + * Definitions of numeric codes representing each expression operator. + * The order of these must match the entries in the operatorTable below. + * Also the codes for the relational operators (OP_LESS, OP_GREATER, + * OP_LE, OP_GE, OP_EQ, and OP_NE) must be consecutive and in that order. + * Note that OP_PLUS and OP_MINUS represent both unary and binary operators. */ -/* - * Each lexeme belongs to one of four categories, which determine - * its place in the parse tree. We use the two high bits of the - * (unsigned char) value to store a NODE_TYPE code. - */ - -#define NODE_TYPE 0xC0 - -/* - * The four category values are LEAF, UNARY, and BINARY, explained below, - * and "uncategorized", which is used either temporarily, until context - * determines which of the other three categories is correct, or for - * lexemes like INVALID, which aren't really lexemes at all, but indicators - * of a parsing error. Note that the codes must be distinct to distinguish - * categories, but need not take the form of a bit array. - */ +#define OP_MULT 0 +#define OP_DIVIDE 1 +#define OP_MOD 2 +#define OP_PLUS 3 +#define OP_MINUS 4 +#define OP_LSHIFT 5 +#define OP_RSHIFT 6 +#define OP_LESS 7 +#define OP_GREATER 8 +#define OP_LE 9 +#define OP_GE 10 +#define OP_EQ 11 +#define OP_NEQ 12 +#define OP_BITAND 13 +#define OP_BITXOR 14 +#define OP_BITOR 15 +#define OP_LAND 16 +#define OP_LOR 17 +#define OP_QUESTY 18 +#define OP_LNOT 19 +#define OP_BITNOT 20 +#define OP_STREQ 21 +#define OP_STRNEQ 22 -#define BINARY 0x40 /* This lexeme is a binary operator. An - * OpNode representing it should go into the - * parse tree, and two operands should be - * parsed for it in the expression. */ -#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode - * representing it should go into the parse - * tree, and one operand should be parsed for - * it in the expression. */ -#define LEAF 0xC0 /* This lexeme is a leaf operand in the parse - * tree. No OpNode will be placed in the tree - * for it. Either a literal value will be - * appended to the list of literals in this - * expression, or appropriate Tcl_Tokens will - * be appended in a Tcl_Parse struct to - * represent those leaves that require some - * form of substitution. - */ - -/* Uncategorized lexemes */ - -#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or - * BINARY_PLUS according to context. */ -#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or - * BINARY_MINUS according to context. */ -#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to - * FUNCTION or a parse error according to - * context and value. */ -#define INCOMPLETE 4 /* A parse error. Used only when the single - * "=" is encountered. */ -#define INVALID 5 /* A parse error. Used when any punctuation - * appears that's not a supported operator. */ - -/* Leaf lexemes */ - -#define NUMBER ( LEAF | 1) /* For literal numbers */ -#define SCRIPT ( LEAF | 2) /* Script substitution; [foo] */ -#define BOOLEAN ( LEAF | BAREWORD) /* For literal booleans */ -#define BRACED ( LEAF | 4) /* Braced string; {foo bar} */ -#define VARIABLE ( LEAF | 5) /* Variable substitution; $x */ -#define QUOTED ( LEAF | 6) /* Quoted string; "foo $bar [soom]" */ -#define EMPTY ( LEAF | 7) /* Used only for an empty argument - * list to a function. Represents - * the empty string within parens in - * the expression: rand() */ - -/* Unary operator lexemes */ - -#define UNARY_PLUS ( UNARY | PLUS) -#define UNARY_MINUS ( UNARY | MINUS) -#define FUNCTION ( UNARY | BAREWORD) /* This is a bit of "creative - * interpretation" on the part of the - * parser. A function call is parsed - * into the parse tree according to - * the perspective that the function - * name is a unary operator and its - * argument list, enclosed in parens, - * is its operand. The additional - * requirements not implied generally - * by treatment as a unary operator -- - * for example, the requirement that - * the operand be enclosed in parens -- - * are hard coded in the relevant - * portions of ParseExpr(). We trade - * off the need to include such - * exceptional handling in the code - * against the need we would otherwise - * have for more lexeme categories. */ -#define START ( UNARY | 4) /* This lexeme isn't parsed from the - * expression text at all. It - * represents the start of the - * expression and sits at the root of - * the parse tree where it serves as - * the start/end point of traversals. */ -#define OPEN_PAREN ( UNARY | 5) /* Another bit of creative - * interpretation, where we treat "(" - * as a unary operator with the - * sub-expression between it and its - * matching ")" as its operand. See - * CLOSE_PAREN below. */ -#define NOT ( UNARY | 6) -#define BIT_NOT ( UNARY | 7) - -/* Binary operator lexemes */ - -#define BINARY_PLUS ( BINARY | PLUS) -#define BINARY_MINUS ( BINARY | MINUS) -#define COMMA ( BINARY | 3) /* The "," operator is a low precedence - * binary operator that separates the - * arguments in a function call. The - * additional constraint that this - * operator can only legally appear - * at the right places within a - * function call argument list are - * hard coded within ParseExpr(). */ -#define MULT ( BINARY | 4) -#define DIVIDE ( BINARY | 5) -#define MOD ( BINARY | 6) -#define LESS ( BINARY | 7) -#define GREATER ( BINARY | 8) -#define BIT_AND ( BINARY | 9) -#define BIT_XOR ( BINARY | 10) -#define BIT_OR ( BINARY | 11) -#define QUESTION ( BINARY | 12) /* These two lexemes make up the */ -#define COLON ( BINARY | 13) /* ternary conditional operator, - * $x ? $y : $z . We treat them as - * two binary operators to avoid - * another lexeme category, and - * code the additional constraints - * directly in ParseExpr(). For - * instance, the right operand of - * a "?" operator must be a ":" - * operator. */ -#define LEFT_SHIFT ( BINARY | 14) -#define RIGHT_SHIFT ( BINARY | 15) -#define LEQ ( BINARY | 16) -#define GEQ ( BINARY | 17) -#define EQUAL ( BINARY | 18) -#define NEQ ( BINARY | 19) -#define AND ( BINARY | 20) -#define OR ( BINARY | 21) -#define STREQ ( BINARY | 22) -#define STRNEQ ( BINARY | 23) -#define EXPON ( BINARY | 24) /* Unlike the other binary operators, - * EXPON is right associative and this - * distinction is coded directly in - * ParseExpr(). */ -#define IN_LIST ( BINARY | 25) -#define NOT_IN_LIST ( BINARY | 26) -#define CLOSE_PAREN ( BINARY | 27) /* By categorizing the CLOSE_PAREN - * lexeme as a BINARY operator, the - * normal parsing rules for binary - * operators assure that a close paren - * will not directly follow another - * operator, and the machinery already - * in place to connect operands to - * operators according to precedence - * performs most of the work of - * matching open and close parens for - * us. In the end though, a close - * paren is not really a binary - * operator, and some special coding - * in ParseExpr() make sure we never - * put an actual CLOSE_PAREN node - * in the parse tree. The - * sub-expression between parens - * becomes the single argument of - * the matching OPEN_PAREN unary - * operator. */ -#define END ( BINARY | 28) /* This lexeme represents the end of - * the string being parsed. Treating - * it as a binary operator follows the - * same logic as the CLOSE_PAREN lexeme - * and END pairs with START, in the - * same way that CLOSE_PAREN pairs with - * OPEN_PAREN. */ /* - * When ParseExpr() builds the parse tree it must choose which operands to - * connect to which operators. This is done according to operator precedence. - * The greater an operator's precedence the greater claim it has to link to - * an available operand. The Precedence enumeration lists the precedence - * values used by Tcl expression operators, from lowest to highest claim. - * Each precedence level is commented with the operators that hold that - * precedence. + * Table describing the expression operators. Entries in this table must + * correspond to the definitions of numeric codes for operators just above. */ -enum Precedence { - PREC_END = 1, /* END */ - PREC_START, /* START */ - PREC_CLOSE_PAREN, /* ")" */ - PREC_OPEN_PAREN, /* "(" */ - PREC_COMMA, /* "," */ - PREC_CONDITIONAL, /* "?", ":" */ - PREC_OR, /* "||" */ - PREC_AND, /* "&&" */ - PREC_BIT_OR, /* "|" */ - PREC_BIT_XOR, /* "^" */ - PREC_BIT_AND, /* "&" */ - PREC_EQUAL, /* "==", "!=", "eq", "ne", "in", "ni" */ - PREC_COMPARE, /* "<", ">", "<=", ">=" */ - PREC_SHIFT, /* "<<", ">>" */ - PREC_ADD, /* "+", "-" */ - PREC_MULT, /* "*", "/", "%" */ - PREC_EXPON, /* "**" */ - PREC_UNARY /* "+", "-", FUNCTION, "!", "~" */ +static int opTableInitialized = 0; /* 0 means not yet initialized. */ + +TCL_DECLARE_MUTEX(opMutex) + +typedef struct OperatorDesc { + CONST char *name; /* Name of the operator. */ + int numOperands; /* Number of operands. 0 if the operator + * requires special handling. */ + int instruction; /* Instruction opcode for the operator. + * Ignored if numOperands is 0. */ +} OperatorDesc; + +static CONST OperatorDesc operatorTable[] = { + {"*", 2, INST_MULT}, + {"/", 2, INST_DIV}, + {"%", 2, INST_MOD}, + {"+", 0, 0}, + {"-", 0, 0}, + {"<<", 2, INST_LSHIFT}, + {">>", 2, INST_RSHIFT}, + {"<", 2, INST_LT}, + {">", 2, INST_GT}, + {"<=", 2, INST_LE}, + {">=", 2, INST_GE}, + {"==", 2, INST_EQ}, + {"!=", 2, INST_NEQ}, + {"&", 2, INST_BITAND}, + {"^", 2, INST_BITXOR}, + {"|", 2, INST_BITOR}, + {"&&", 0, 0}, + {"||", 0, 0}, + {"?", 0, 0}, + {"!", 1, INST_LNOT}, + {"~", 1, INST_BITNOT}, + {"eq", 2, INST_STR_EQ}, + {"ne", 2, INST_STR_NEQ}, + {NULL, 0, 0} }; /* - * Here the same information contained in the comments above is stored - * in inverted form, so that given a lexeme, one can quickly look up - * its precedence value. + * Hashtable used to map the names of expression operators to the index + * of their OperatorDesc description. */ -static const unsigned char prec[] = { - /* Non-operator lexemes */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, - /* Binary operator lexemes */ - PREC_ADD, /* BINARY_PLUS */ - PREC_ADD, /* BINARY_MINUS */ - PREC_COMMA, /* COMMA */ - PREC_MULT, /* MULT */ - PREC_MULT, /* DIVIDE */ - PREC_MULT, /* MOD */ - PREC_COMPARE, /* LESS */ - PREC_COMPARE, /* GREATER */ - PREC_BIT_AND, /* BIT_AND */ - PREC_BIT_XOR, /* BIT_XOR */ - PREC_BIT_OR, /* BIT_OR */ - PREC_CONDITIONAL, /* QUESTION */ - PREC_CONDITIONAL, /* COLON */ - PREC_SHIFT, /* LEFT_SHIFT */ - PREC_SHIFT, /* RIGHT_SHIFT */ - PREC_COMPARE, /* LEQ */ - PREC_COMPARE, /* GEQ */ - PREC_EQUAL, /* EQUAL */ - PREC_EQUAL, /* NEQ */ - PREC_AND, /* AND */ - PREC_OR, /* OR */ - PREC_EQUAL, /* STREQ */ - PREC_EQUAL, /* STRNEQ */ - PREC_EXPON, /* EXPON */ - PREC_EQUAL, /* IN_LIST */ - PREC_EQUAL, /* NOT_IN_LIST */ - PREC_CLOSE_PAREN, /* CLOSE_PAREN */ - PREC_END, /* END */ - /* Expansion room for more binary operators */ - 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, - /* Unary operator lexemes */ - PREC_UNARY, /* UNARY_PLUS */ - PREC_UNARY, /* UNARY_MINUS */ - PREC_UNARY, /* FUNCTION */ - PREC_START, /* START */ - PREC_OPEN_PAREN, /* OPEN_PAREN */ - PREC_UNARY, /* NOT*/ - PREC_UNARY, /* BIT_NOT*/ -}; - -/* - * A table mapping lexemes to bytecode instructions, used by CompileExprTree(). - */ - -static const unsigned char instruction[] = { - /* Non-operator lexemes */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, - /* Binary operator lexemes */ - INST_ADD, /* BINARY_PLUS */ - INST_SUB, /* BINARY_MINUS */ - 0, /* COMMA */ - INST_MULT, /* MULT */ - INST_DIV, /* DIVIDE */ - INST_MOD, /* MOD */ - INST_LT, /* LESS */ - INST_GT, /* GREATER */ - INST_BITAND, /* BIT_AND */ - INST_BITXOR, /* BIT_XOR */ - INST_BITOR, /* BIT_OR */ - 0, /* QUESTION */ - 0, /* COLON */ - INST_LSHIFT, /* LEFT_SHIFT */ - INST_RSHIFT, /* RIGHT_SHIFT */ - INST_LE, /* LEQ */ - INST_GE, /* GEQ */ - INST_EQ, /* EQUAL */ - INST_NEQ, /* NEQ */ - 0, /* AND */ - 0, /* OR */ - INST_STR_EQ, /* STREQ */ - INST_STR_NEQ, /* STRNEQ */ - INST_EXPON, /* EXPON */ - INST_LIST_IN, /* IN_LIST */ - INST_LIST_NOT_IN, /* NOT_IN_LIST */ - 0, /* CLOSE_PAREN */ - 0, /* END */ - /* Expansion room for more binary operators */ - 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, - /* Unary operator lexemes */ - INST_UPLUS, /* UNARY_PLUS */ - INST_UMINUS, /* UNARY_MINUS */ - 0, /* FUNCTION */ - 0, /* START */ - 0, /* OPEN_PAREN */ - INST_LNOT, /* NOT*/ - INST_BITNOT, /* BIT_NOT*/ -}; - -/* - * A table mapping a byte value to the corresponding lexeme for use by - * ParseLexeme(). - */ - -static const unsigned char Lexeme[] = { - INVALID /* NUL */, INVALID /* SOH */, - INVALID /* STX */, INVALID /* ETX */, - INVALID /* EOT */, INVALID /* ENQ */, - INVALID /* ACK */, INVALID /* BEL */, - INVALID /* BS */, INVALID /* HT */, - INVALID /* LF */, INVALID /* VT */, - INVALID /* FF */, INVALID /* CR */, - INVALID /* SO */, INVALID /* SI */, - INVALID /* DLE */, INVALID /* DC1 */, - INVALID /* DC2 */, INVALID /* DC3 */, - INVALID /* DC4 */, INVALID /* NAK */, - INVALID /* SYN */, INVALID /* ETB */, - INVALID /* CAN */, INVALID /* EM */, - INVALID /* SUB */, INVALID /* ESC */, - INVALID /* FS */, INVALID /* GS */, - INVALID /* RS */, INVALID /* US */, - INVALID /* SPACE */, 0 /* ! or != */, - QUOTED /* " */, INVALID /* # */, - VARIABLE /* $ */, MOD /* % */, - 0 /* & or && */, INVALID /* ' */, - OPEN_PAREN /* ( */, CLOSE_PAREN /* ) */, - 0 /* * or ** */, PLUS /* + */, - COMMA /* , */, MINUS /* - */, - 0 /* . */, DIVIDE /* / */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-9 */ - COLON /* : */, INVALID /* ; */, - 0 /* < or << or <= */, - 0 /* == or INVALID */, - 0 /* > or >> or >= */, - QUESTION /* ? */, INVALID /* @ */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A-M */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* N-Z */ - SCRIPT /* [ */, INVALID /* \ */, - INVALID /* ] */, BIT_XOR /* ^ */, - INVALID /* _ */, INVALID /* ` */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a-m */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* n-z */ - BRACED /* { */, 0 /* | or || */, - INVALID /* } */, BIT_NOT /* ~ */, - INVALID /* DEL */ -}; +static Tcl_HashTable opHashTable; /* - * The JumpList struct is used to create a stack of data needed for the - * TclEmitForwardJump() and TclFixupForwardJump() calls that are performed - * when compiling the short-circuiting operators QUESTION/COLON, AND, and OR. - * Keeping a stack permits the CompileExprTree() routine to be non-recursive. + * Declarations for local procedures to this file: */ -typedef struct JumpList { - JumpFixup jump; /* Pass this argument to matching calls of - * TclEmitForwardJump() and - * TclFixupForwardJump(). */ - int depth; /* Remember the currStackDepth of the - * CompileEnv here. */ - int offset; /* Data used to compute jump lengths to pass - * to TclFixupForwardJump() */ - int convert; /* Temporary storage used to compute whether - * numeric conversion will be needed following - * the operator we're compiling. */ - struct JumpList *next; /* Point to next item on the stack */ -} JumpList; +static int CompileCondExpr _ANSI_ARGS_(( + Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, + CompileEnv *envPtr, Tcl_Token **endPtrPtr)); +static int CompileLandOrLorExpr _ANSI_ARGS_(( + Tcl_Token *exprTokenPtr, int opIndex, + ExprInfo *infoPtr, CompileEnv *envPtr, + Tcl_Token **endPtrPtr)); +static int CompileMathFuncCall _ANSI_ARGS_(( + Tcl_Token *exprTokenPtr, CONST char *funcName, + ExprInfo *infoPtr, CompileEnv *envPtr, + Tcl_Token **endPtrPtr)); +static int CompileSubExpr _ANSI_ARGS_(( + Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, + CompileEnv *envPtr)); +static void LogSyntaxError _ANSI_ARGS_((ExprInfo *infoPtr)); /* - * Declarations for local functions to this file: + * Macro used to debug the execution of the expression compiler. */ -static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, - int index, Tcl_Obj *const **litObjvPtr, - Tcl_Obj *const *funcObjv, Tcl_Token *tokenPtr, - CompileEnv *envPtr, int optimize); -static void ConvertTreeToTokens(const char *start, int numBytes, - OpNode *nodes, Tcl_Token *tokenPtr, - Tcl_Parse *parsePtr); -static int ExecConstantExprTree(Tcl_Interp *interp, OpNode *nodes, - int index, Tcl_Obj * const **litObjvPtr); -static int ParseExpr(Tcl_Interp *interp, const char *start, - int numBytes, OpNode **opTreePtr, - Tcl_Obj *litList, Tcl_Obj *funcList, - Tcl_Parse *parsePtr, int parseOnly); -static int ParseLexeme(const char *start, int numBytes, - unsigned char *lexemePtr, Tcl_Obj **literalPtr); - +#ifdef TCL_COMPILE_DEBUG +#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) \ + if (traceExprComp) { \ + fprintf(stderr, "CompileSubExpr: \"%.*s\", token \"%.*s\"\n", \ + (exprLength), (exprBytes), (tokenLength), (tokenBytes)); \ + } +#else +#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) +#endif /* TCL_COMPILE_DEBUG */ /* *---------------------------------------------------------------------- * - * ParseExpr -- + * TclCompileExpr -- * - * Given a string, the numBytes bytes starting at start, this function - * parses it as a Tcl expression and constructs a tree representing - * the structure of the expression. The caller must pass in empty - * lists as the funcList and litList arguments. The elements of the - * parsed expression are returned to the caller as that tree, a list of - * literal values, a list of function names, and in Tcl_Tokens - * added to a Tcl_Parse struct passed in by the caller. + * This procedure compiles a string containing a Tcl expression into + * Tcl bytecodes. This procedure is the top-level interface to the + * the expression compilation module, and is used by such public + * procedures as Tcl_ExprString, Tcl_ExprStringObj, Tcl_ExprLong, + * Tcl_ExprDouble, Tcl_ExprBoolean, and Tcl_ExprBooleanObj. * * Results: - * If the string is successfully parsed as a valid Tcl expression, TCL_OK - * is returned, and data about the expression structure is written to - * the last four arguments. If the string cannot be parsed as a valid - * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL, an - * error message is written to interp. + * The return value is TCL_OK on a successful compilation and TCL_ERROR + * on failure. If TCL_ERROR is returned, then the interpreter's result + * contains an error message. * * Side effects: - * Memory will be allocated. If TCL_OK is returned, the caller must - * clean up the returned data structures. The (OpNode *) value written - * to opTreePtr should be passed to ckfree() and the parsePtr argument - * should be passed to Tcl_FreeParse(). The elements appended to the - * litList and funcList will automatically be freed whenever the - * refcount on those lists indicates they can be freed. + * Adds instructions to envPtr to evaluate the expression at runtime. * *---------------------------------------------------------------------- */ -static int -ParseExpr( - Tcl_Interp *interp, /* Used for error reporting. */ - const char *start, /* Start of source string to parse. */ - int numBytes, /* Number of bytes in string. */ - OpNode **opTreePtr, /* Points to space where a pointer to the - * allocated OpNode tree should go. */ - Tcl_Obj *litList, /* List to append literals to. */ - Tcl_Obj *funcList, /* List to append function names to. */ - Tcl_Parse *parsePtr, /* Structure to fill with tokens representing - * those operands that require run time - * substitutions. */ - int parseOnly) /* A boolean indicating whether the caller's - * aim is just a parse, or whether it will go - * on to compile the expression. Different - * optimizations are appropriate for the - * two scenarios. */ +int +TclCompileExpr(interp, script, numBytes, envPtr) + Tcl_Interp *interp; /* Used for error reporting. */ + CONST char *script; /* The source script to compile. */ + int numBytes; /* Number of bytes in script. If < 0, the + * string consists of all bytes up to the + * first null character. */ + CompileEnv *envPtr; /* Holds resulting instructions. */ { - OpNode *nodes = NULL; /* Pointer to the OpNode storage array where - * we build the parse tree. */ - int nodesAvailable = 64; /* Initial size of the storage array. This - * value establishes a minimum tree memory cost - * of only about 1 kibyte, and is large enough - * for most expressions to parse with no need - * for array growth and reallocation. */ - int nodesUsed = 0; /* Number of OpNodes filled. */ - int scanned = 0; /* Capture number of byte scanned by - * parsing routines. */ - int lastParsed; /* Stores info about what the lexeme parsed - * the previous pass through the parsing loop - * was. If it was an operator, lastParsed is - * the index of the OpNode for that operator. - * If it was not an operator, lastParsed holds - * an OperandTypes value encoding what we - * need to know about it. */ - int incomplete; /* Index of the most recent incomplete tree - * in the OpNode array. Heads a stack of - * incomplete trees linked by p.prev. */ - int complete = OT_EMPTY; /* "Index" of the complete tree (that is, a - * complete subexpression) determined at the - * moment. OT_EMPTY is a nonsense value - * used only to silence compiler warnings. - * During a parse, complete will always hold - * an index or an OperandTypes value pointing - * to an actual leaf at the time the complete - * tree is needed. */ - - /* These variables control generation of the error message. */ - Tcl_Obj *msg = NULL; /* The error message. */ - Tcl_Obj *post = NULL; /* In a few cases, an additional postscript - * for the error message, supplying more - * information after the error msg and - * location have been reported. */ - const char *mark = "_@_"; /* In the portion of the complete error message - * where the error location is reported, this - * "mark" substring is inserted into the - * string being parsed to aid in pinpointing - * the location of the syntax error in the - * expression. */ - int insertMark = 0; /* A boolean controlling whether the "mark" - * should be inserted. */ - const int limit = 25; /* Portions of the error message are - * constructed out of substrings of the - * original expression. In order to keep the - * error message readable, we impose this limit - * on the substring size we extract. */ - - TclParseInit(interp, start, numBytes, parsePtr); - - nodes = (OpNode *) attemptckalloc(nodesAvailable * sizeof(OpNode)); - if (nodes == NULL) { - TclNewLiteralStringObj(msg, "not enough memory to parse expression"); - goto error; - } - - /* Initialize the parse tree with the special "START" node. */ - nodes->lexeme = START; - nodes->precedence = prec[START]; - nodes->mark = MARK_RIGHT; - nodes->constant = 1; - incomplete = lastParsed = nodesUsed; - nodesUsed++; + ExprInfo info; + Tcl_Parse parse; + Tcl_HashEntry *hPtr; + int new, i, code; /* - * Main parsing loop parses one lexeme per iteration. We exit the - * loop only when there's a syntax error with a "goto error" which - * takes us to the error handling code following the loop, or when - * we've successfully completed the parse and we return to the caller. + * If this is the first time we've been called, initialize the table + * of expression operators. */ - while (1) { - OpNode *nodePtr; /* Points to the OpNode we may fill this - * pass through the loop. */ - unsigned char lexeme; /* The lexeme we parse this iteration. */ - Tcl_Obj *literal; /* Filled by the ParseLexeme() call when - * a literal is parsed that has a Tcl_Obj - * rep worth preserving. */ - - /* - * Each pass through this loop adds up to one more OpNode. Allocate - * space for one if required. - */ - - if (nodesUsed >= nodesAvailable) { - int size = nodesUsed * 2; - OpNode *newPtr; - - do { - newPtr = (OpNode *) attemptckrealloc((char *) nodes, - (unsigned int) size * sizeof(OpNode)); - } while ((newPtr == NULL) - && ((size -= (size - nodesUsed) / 2) > nodesUsed)); - if (newPtr == NULL) { - TclNewLiteralStringObj(msg, - "not enough memory to parse expression"); - goto error; - } - nodesAvailable = size; - nodes = newPtr; - } - nodePtr = nodes + nodesUsed; - - /* Skip white space between lexemes. */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start += scanned; - numBytes -= scanned; - - scanned = ParseLexeme(start, numBytes, &lexeme, &literal); - - /* Use context to categorize the lexemes that are ambiguous. */ - if ((NODE_TYPE & lexeme) == 0) { - switch (lexeme) { - case INVALID: - msg = Tcl_ObjPrintf( - "invalid character \"%.*s\"", scanned, start); - goto error; - case INCOMPLETE: - msg = Tcl_ObjPrintf( - "incomplete operator \"%.*s\"", scanned, start); - goto error; - case BAREWORD: - - /* - * Most barewords in an expression are a syntax error. - * The exceptions are that when a bareword is followed by - * an open paren, it might be a function call, and when the - * bareword is a legal literal boolean value, we accept that - * as well. - */ - - if (start[scanned+TclParseAllWhiteSpace( - start+scanned, numBytes-scanned)] == '(') { - lexeme = FUNCTION; - - /* - * When we compile the expression we'll need the function - * name, and there's no place in the parse tree to store - * it, so we keep a separate list of all the function - * names we've parsed in the order we found them. - */ - - Tcl_ListObjAppendElement(NULL, funcList, literal); - } else { - int b; - if (Tcl_GetBooleanFromObj(NULL, literal, &b) == TCL_OK) { - lexeme = BOOLEAN; - } else { - Tcl_DecrRefCount(literal); - msg = Tcl_ObjPrintf( - "invalid bareword \"%.*s%s\"", - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "..."); - post = Tcl_ObjPrintf( - "should be \"$%.*s%s\" or \"{%.*s%s}\"", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - Tcl_AppendPrintfToObj(post, - " or \"%.*s%s(...)\" or ...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - if (start[0] == '0') { - const char *stop; - TclParseNumber(NULL, NULL, NULL, start, scanned, - &stop, TCL_PARSE_NO_WHITESPACE); - - if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { - parsePtr->errorType = TCL_PARSE_BAD_NUMBER; - - switch (start[1]) { - case 'b': - Tcl_AppendToObj(post, - " (invalid binary number?)", -1); - break; - case 'o': - Tcl_AppendToObj(post, - " (invalid octal number?)", -1); - break; - default: - if (isdigit(UCHAR(start[1]))) { - Tcl_AppendToObj(post, - " (invalid octal number?)", -1); - } - break; - } - } - } - goto error; - } - } - break; - case PLUS: - case MINUS: - if (IsOperator(lastParsed)) { - - /* - * A "+" or "-" coming just after another operator - * must be interpreted as a unary operator. - */ - - lexeme |= UNARY; - } else { - lexeme |= BINARY; - } - } - } /* Uncategorized lexemes */ - - /* Handle lexeme based on its category. */ - switch (NODE_TYPE & lexeme) { - - /* - * Each LEAF results in either a literal getting appended to the - * litList, or a sequence of Tcl_Tokens representing a Tcl word - * getting appended to the parsePtr->tokens. No OpNode is filled - * for this lexeme. - */ - - case LEAF: { - Tcl_Token *tokenPtr; - const char *end = start; - int wordIndex; - int code = TCL_OK; - - /* - * A leaf operand appearing just after something that's not an - * operator is a syntax error. - */ - - if (NotOperator(lastParsed)) { - msg = Tcl_ObjPrintf("missing operator at %s", mark); - scanned = 0; - insertMark = 1; - - /* Free any literal to avoid a memleak. */ - if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) { - Tcl_DecrRefCount(literal); - } - goto error; - } - - switch (lexeme) { - case NUMBER: - case BOOLEAN: - /* - * TODO: Consider using a dict or hash to collapse all - * duplicate literals into a single representative value. - * (Like what is done with [split $s {}]). - * Pro: ~75% memory saving on expressions like - * {1+1+1+1+1+.....+1} (Convert "pointer + Tcl_Obj" cost - * to "pointer" cost only) - * Con: Cost of the dict store/retrieve on every literal - * in every expression when expressions like the above - * tend to be uncommon. - * The memory savings is temporary; Compiling to bytecode - * will collapse things as literals are registered - * anyway, so the savings applies only to the time - * between parsing and compiling. Possibly important - * due to high-water mark nature of memory allocation. - */ - Tcl_ListObjAppendElement(NULL, litList, literal); - complete = lastParsed = OT_LITERAL; - start += scanned; - numBytes -= scanned; - continue; - - default: - break; - } - - /* - * Remaining LEAF cases may involve filling Tcl_Tokens, so - * make room for at least 2 more tokens. - */ - - TclGrowParseTokenArray(parsePtr, 2); - wordIndex = parsePtr->numTokens; - tokenPtr = parsePtr->tokenPtr + wordIndex; - tokenPtr->type = TCL_TOKEN_WORD; - tokenPtr->start = start; - parsePtr->numTokens++; - - switch (lexeme) { - case QUOTED: - code = Tcl_ParseQuotedString(NULL, start, numBytes, - parsePtr, 1, &end); - scanned = end - start; - break; - - case BRACED: - code = Tcl_ParseBraces(NULL, start, numBytes, - parsePtr, 1, &end); - scanned = end - start; - break; - - case VARIABLE: - code = Tcl_ParseVarName(NULL, start, numBytes, parsePtr, 1); - - /* - * Handle the quirk that Tcl_ParseVarName reports a successful - * parse even when it gets only a "$" with no variable name. - */ - - tokenPtr = parsePtr->tokenPtr + wordIndex + 1; - if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) { - TclNewLiteralStringObj(msg, "invalid character \"$\""); - goto error; - } - scanned = tokenPtr->size; - break; - - case SCRIPT: { - Tcl_Parse *nestedPtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); - - tokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - tokenPtr->type = TCL_TOKEN_COMMAND; - tokenPtr->start = start; - tokenPtr->numComponents = 0; - - end = start + numBytes; - start++; - while (1) { - code = Tcl_ParseCommand(interp, start, (end - start), 1, - nestedPtr); - if (code != TCL_OK) { - parsePtr->term = nestedPtr->term; - parsePtr->errorType = nestedPtr->errorType; - parsePtr->incomplete = nestedPtr->incomplete; - break; - } - start = (nestedPtr->commandStart + nestedPtr->commandSize); - Tcl_FreeParse(nestedPtr); - if ((nestedPtr->term < end) && (*(nestedPtr->term) == ']') - && !(nestedPtr->incomplete)) { - break; - } - - if (start == end) { - TclNewLiteralStringObj(msg, "missing close-bracket"); - parsePtr->term = tokenPtr->start; - parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; - parsePtr->incomplete = 1; - code = TCL_ERROR; - break; - } - } - TclStackFree(interp, nestedPtr); - end = start; - start = tokenPtr->start; - scanned = end - start; - tokenPtr->size = scanned; - parsePtr->numTokens++; - break; - } - } - if (code != TCL_OK) { - - /* - * Here we handle all the syntax errors generated by - * the Tcl_Token generating parsing routines called in the - * switch just above. If the value of parsePtr->incomplete - * is 1, then the error was an unbalanced '[', '(', '{', - * or '"' and parsePtr->term is pointing to that unbalanced - * character. If the value of parsePtr->incomplete is 0, - * then the error is one of lacking whitespace following a - * quoted word, for example: expr {[an error {foo}bar]}, - * and parsePtr->term points to where the whitespace is - * missing. We reset our values of start and scanned so that - * when our error message is constructed, the location of - * the syntax error is sure to appear in it, even if the - * quoted expression is truncated. - */ - - start = parsePtr->term; - scanned = parsePtr->incomplete; - goto error; - } - - tokenPtr = parsePtr->tokenPtr + wordIndex; - tokenPtr->size = scanned; - tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1; - if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) { - - /* - * When this expression is destined to be compiled, and a - * braced or quoted word within an expression is known at - * compile time (no runtime substitutions in it), we can - * store it as a literal rather than in its tokenized form. - * This is an advantage since the compiled bytecode is going - * to need the argument in Tcl_Obj form eventually, so it's - * just as well to get there now. Another advantage is that - * with this conversion, larger constant expressions might - * be grown and optimized. - * - * On the contrary, if the end goal of this parse is to - * fill a Tcl_Parse for a caller of Tcl_ParseExpr(), then it's - * wasteful to convert to a literal only to convert back again - * later. - */ - - literal = Tcl_NewObj(); - if (TclWordKnownAtCompileTime(tokenPtr, literal)) { - Tcl_ListObjAppendElement(NULL, litList, literal); - complete = lastParsed = OT_LITERAL; - parsePtr->numTokens = wordIndex; - break; - } - Tcl_DecrRefCount(literal); - } - complete = lastParsed = OT_TOKENS; - break; - } /* case LEAF */ - - case UNARY: - - /* - * A unary operator appearing just after something that's not an - * operator is a syntax error -- something trying to be the left - * operand of an operator that doesn't take one. - */ - - if (NotOperator(lastParsed)) { - msg = Tcl_ObjPrintf("missing operator at %s", mark); - scanned = 0; - insertMark = 1; - goto error; - } - - /* Create an OpNode for the unary operator */ - nodePtr->lexeme = lexeme; - nodePtr->precedence = prec[lexeme]; - nodePtr->mark = MARK_RIGHT; - - /* - * A FUNCTION cannot be a constant expression, because Tcl allows - * functions to return variable results with the same arguments; - * for example, rand(). Other unary operators can root a constant - * expression, so long as the argument is a constant expression. - */ - - nodePtr->constant = (lexeme != FUNCTION); - - /* - * This unary operator is a new incomplete tree, so push it - * onto our stack of incomplete trees. Also remember it as - * the last lexeme we parsed. - */ - - nodePtr->p.prev = incomplete; - incomplete = lastParsed = nodesUsed; - nodesUsed++; - break; - - case BINARY: { - OpNode *incompletePtr; - unsigned char precedence = prec[lexeme]; - - /* - * A binary operator appearing just after another operator is a - * syntax error -- one of the two operators is missing an operand. - */ - - if (IsOperator(lastParsed)) { - if ((lexeme == CLOSE_PAREN) - && (nodePtr[-1].lexeme == OPEN_PAREN)) { - if (nodePtr[-2].lexeme == FUNCTION) { - - /* - * Normally, "()" is a syntax error, but as a special - * case accept it as an argument list for a function. - * Treat this as a special LEAF lexeme, and restart - * the parsing loop with zero characters scanned. - * We'll parse the ")" again the next time through, - * but with the OT_EMPTY leaf as the subexpression - * between the parens. - */ - - scanned = 0; - complete = lastParsed = OT_EMPTY; - break; - } - msg = Tcl_ObjPrintf("empty subexpression at %s", mark); - scanned = 0; - insertMark = 1; - goto error; - } - - if (nodePtr[-1].precedence > precedence) { - if (nodePtr[-1].lexeme == OPEN_PAREN) { - TclNewLiteralStringObj(msg, "unbalanced open paren"); - parsePtr->errorType = TCL_PARSE_MISSING_PAREN; - } else if (nodePtr[-1].lexeme == COMMA) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; - } else if (nodePtr[-1].lexeme == START) { - TclNewLiteralStringObj(msg, "empty expression"); - } - } else { - if (lexeme == CLOSE_PAREN) { - TclNewLiteralStringObj(msg, "unbalanced close paren"); - } else if ((lexeme == COMMA) - && (nodePtr[-1].lexeme == OPEN_PAREN) - && (nodePtr[-2].lexeme == FUNCTION)) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; - } - } - if (msg == NULL) { - msg = Tcl_ObjPrintf("missing operand at %s", mark); - scanned = 0; - insertMark = 1; - } - goto error; - } - - /* - * Here is where the tree comes together. At this point, we - * have a stack of incomplete trees corresponding to - * substrings that are incomplete expressions, followed by - * a complete tree corresponding to a substring that is itself - * a complete expression, followed by the binary operator we have - * just parsed. The incomplete trees can each be completed by - * adding a right operand. - * - * To illustrate with an example, when we parse the expression - * "1+2*3-4" and we reach this point having just parsed the "-" - * operator, we have these incomplete trees: START, "1+", and - * "2*". Next we have the complete subexpression "3". Last is - * the "-" we've just parsed. - * - * The next step is to join our complete tree to an operator. - * The choice is governed by the precedence and associativity - * of the competing operators. If we connect it as the right - * operand of our most recent incomplete tree, we get a new - * complete tree, and we can repeat the process. The while - * loop following repeats this until precedence indicates it - * is time to join the complete tree as the left operand of - * the just parsed binary operator. - * - * Continuing the example, the first pass through the loop - * will join "3" to "2*"; the next pass will join "2*3" to - * "1+". Then we'll exit the loop and join "1+2*3" to "-". - * When we return to parse another lexeme, our stack of - * incomplete trees is START and "1+2*3-". - */ - - while (1) { - incompletePtr = nodes + incomplete; - - if (incompletePtr->precedence < precedence) { - break; - } - - if (incompletePtr->precedence == precedence) { - - /* Right association rules for exponentiation. */ - if (lexeme == EXPON) { - break; - } - - /* - * Special association rules for the conditional operators. - * The "?" and ":" operators have equal precedence, but - * must be linked up in sensible pairs. - */ - - if ((incompletePtr->lexeme == QUESTION) - && (NotOperator(complete) - || (nodes[complete].lexeme != COLON))) { - break; - } - if ((incompletePtr->lexeme == COLON) - && (lexeme == QUESTION)) { - break; - } - } - - /* Some special syntax checks... */ - - /* Parens must balance */ - if ((incompletePtr->lexeme == OPEN_PAREN) - && (lexeme != CLOSE_PAREN)) { - TclNewLiteralStringObj(msg, "unbalanced open paren"); - parsePtr->errorType = TCL_PARSE_MISSING_PAREN; - goto error; - } - - /* Right operand of "?" must be ":" */ - if ((incompletePtr->lexeme == QUESTION) - && (NotOperator(complete) - || (nodes[complete].lexeme != COLON))) { - msg = Tcl_ObjPrintf( - "missing operator \":\" at %s", mark); - scanned = 0; - insertMark = 1; - goto error; - } - - /* Operator ":" may only be right operand of "?" */ - if (IsOperator(complete) - && (nodes[complete].lexeme == COLON) - && (incompletePtr->lexeme != QUESTION)) { - TclNewLiteralStringObj(msg, - "unexpected operator \":\" " - "without preceding \"?\""); - goto error; - } - - /* - * Attach complete tree as right operand of most recent - * incomplete tree. - */ - - incompletePtr->right = complete; - if (IsOperator(complete)) { - nodes[complete].p.parent = incomplete; - incompletePtr->constant = incompletePtr->constant - && nodes[complete].constant; - } else { - incompletePtr->constant = incompletePtr->constant - && (complete == OT_LITERAL); - } - - /* - * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations each - * make up a single operator. Force them to agree whether they - * have a constant expression. - */ - - if ((incompletePtr->lexeme == QUESTION) - || (incompletePtr->lexeme == FUNCTION)) { - nodes[complete].constant = incompletePtr->constant; - } - - if (incompletePtr->lexeme == START) { - - /* - * Completing the START tree indicates we're done. - * Transfer the parse tree to the caller and return. - */ - - *opTreePtr = nodes; - return TCL_OK; - } - - /* - * With a right operand attached, last incomplete tree has - * become the complete tree. Pop it from the incomplete - * tree stack. - */ - - complete = incomplete; - incomplete = incompletePtr->p.prev; - - /* CLOSE_PAREN can only close one OPEN_PAREN. */ - if (incompletePtr->lexeme == OPEN_PAREN) { - break; - } - } - - /* More syntax checks... */ - - /* Parens must balance. */ - if (lexeme == CLOSE_PAREN) { - if (incompletePtr->lexeme != OPEN_PAREN) { - TclNewLiteralStringObj(msg, "unbalanced close paren"); - goto error; - } - } - - /* Commas must appear only in function argument lists. */ - if (lexeme == COMMA) { - if ((incompletePtr->lexeme != OPEN_PAREN) - || (incompletePtr[-1].lexeme != FUNCTION)) { - TclNewLiteralStringObj(msg, - "unexpected \",\" outside function argument list"); - goto error; + if (numBytes < 0) { + numBytes = (script? strlen(script) : 0); + } + if (!opTableInitialized) { + Tcl_MutexLock(&opMutex); + if (!opTableInitialized) { + Tcl_InitHashTable(&opHashTable, TCL_STRING_KEYS); + for (i = 0; operatorTable[i].name != NULL; i++) { + hPtr = Tcl_CreateHashEntry(&opHashTable, + operatorTable[i].name, &new); + if (new) { + Tcl_SetHashValue(hPtr, (ClientData) i); } } + opTableInitialized = 1; + } + Tcl_MutexUnlock(&opMutex); + } - /* Operator ":" may only be right operand of "?" */ - if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) { - TclNewLiteralStringObj(msg, - "unexpected operator \":\" without preceding \"?\""); - goto error; - } - - /* Create no node for a CLOSE_PAREN lexeme. */ - if (lexeme == CLOSE_PAREN) { - break; - } - - /* Link complete tree as left operand of new node. */ - nodePtr->lexeme = lexeme; - nodePtr->precedence = precedence; - nodePtr->mark = MARK_LEFT; - nodePtr->left = complete; - - /* - * The COMMA operator cannot be optimized, since the function - * needs all of its arguments, and optimization would reduce - * the number. Other binary operators root constant expressions - * when both arguments are constant expressions. - */ - - nodePtr->constant = (lexeme != COMMA); - - if (IsOperator(complete)) { - nodes[complete].p.parent = nodesUsed; - nodePtr->constant = nodePtr->constant - && nodes[complete].constant; - } else { - nodePtr->constant = nodePtr->constant - && (complete == OT_LITERAL); - } - - /* - * With a left operand attached and a right operand missing, - * the just-parsed binary operator is root of a new incomplete - * tree. Push it onto the stack of incomplete trees. - */ - - nodePtr->p.prev = incomplete; - incomplete = lastParsed = nodesUsed; - nodesUsed++; - break; - } /* case BINARY */ - } /* lexeme handler */ - - /* Advance past the just-parsed lexeme */ - start += scanned; - numBytes -= scanned; - } /* main parsing loop */ + /* + * Initialize the structure containing information abvout this + * expression compilation. + */ - error: + info.interp = interp; + info.parsePtr = &parse; + info.expr = script; + info.lastChar = (script + numBytes); + info.hasOperators = 0; /* - * We only get here if there's been an error. - * Any errors that didn't get a suitable parsePtr->errorType, - * get recorded as syntax errors. + * Parse the expression then compile it. */ - if (parsePtr->errorType == TCL_PARSE_SUCCESS) { - parsePtr->errorType = TCL_PARSE_SYNTAX; - } - - /* Free any partial parse tree we've built. */ - if (nodes != NULL) { - ckfree((char*) nodes); + code = Tcl_ParseExpr(interp, script, numBytes, &parse); + if (code != TCL_OK) { + goto done; } - if (interp == NULL) { - - /* Nowhere to report an error message, so just free it */ - if (msg) { - Tcl_DecrRefCount(msg); - } - } else { - - /* - * Construct the complete error message. Start with the simple - * error message, pulled from the interp result if necessary... - */ - - if (msg == NULL) { - msg = Tcl_GetObjResult(interp); - } +#ifdef TCL_TIP280 + /* TIP #280 : Track Lines within the expression */ + TclAdvanceLines (&envPtr->line, script, parse.tokenPtr->start); +#endif + code = CompileSubExpr(parse.tokenPtr, &info, envPtr); + if (code != TCL_OK) { + Tcl_FreeParse(&parse); + goto done; + } + + if (!info.hasOperators) { /* - * Add a detailed quote from the bad expression, displaying and - * sometimes marking the precise location of the syntax error. + * Attempt to convert the primary's object to an int or double. + * This is done in order to support Tcl's policy of interpreting + * operands if at all possible as first integers, else + * floating-point numbers. */ - - Tcl_AppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", - ((start - limit) < parsePtr->string) ? "" : "...", - ((start - limit) < parsePtr->string) - ? (int) (start - parsePtr->string) : limit - 3, - ((start - limit) < parsePtr->string) - ? parsePtr->string : start - limit + 3, - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "...", insertMark ? mark : "", - (start + scanned + limit > parsePtr->end) - ? (int) (parsePtr->end - start) - scanned : limit-3, - start + scanned, - (start + scanned + limit > parsePtr->end) ? "" : "..."); - - /* Next, append any postscript message. */ - if (post != NULL) { - Tcl_AppendToObj(msg, ";\n", -1); - Tcl_AppendObjToObj(msg, post); - Tcl_DecrRefCount(post); - } - Tcl_SetObjResult(interp, msg); - - /* Finally, place context information in the errorInfo. */ - numBytes = parsePtr->end - parsePtr->string; - Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( - "\n (parsing expression \"%.*s%s\")", - (numBytes < limit) ? numBytes : limit - 3, - parsePtr->string, (numBytes < limit) ? "" : "...")); + + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); } + Tcl_FreeParse(&parse); - return TCL_ERROR; + done: + return code; } /* *---------------------------------------------------------------------- * - * ConvertTreeToTokens -- + * TclFinalizeCompilation -- * - * Given a string, the numBytes bytes starting at start, and an OpNode - * tree and Tcl_Token array created by passing that same string to - * ParseExpr(), this function writes into *parsePtr the sequence of - * Tcl_Tokens needed so to satisfy the historical interface provided - * by Tcl_ParseExpr(). Note that this routine exists only for the sake - * of the public Tcl_ParseExpr() routine. It is not used by Tcl itself - * at all. + * Clean up the compilation environment so it can later be + * properly reinitialized. This procedure is called by Tcl_Finalize(). * * Results: * None. * * Side effects: - * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the - * parsed expression. + * Cleans up the compilation environment. At the moment, just the + * table of expression operators is freed. * *---------------------------------------------------------------------- */ -static void -ConvertTreeToTokens( - const char *start, - int numBytes, - OpNode *nodes, - Tcl_Token *tokenPtr, - Tcl_Parse *parsePtr) -{ - int subExprTokenIdx = 0; - OpNode *nodePtr = nodes; - int next = nodePtr->right; - - while (1) { - Tcl_Token *subExprTokenPtr; - int scanned, parentIdx; - unsigned char lexeme; - - /* - * Advance the mark so the next exit from this node won't retrace - * steps over ground already covered. - */ - - nodePtr->mark++; - - /* Handle next child node or leaf */ - switch (next) { - case OT_EMPTY: - - /* No tokens and no characters for the OT_EMPTY leaf. */ - break; - - case OT_LITERAL: - - /* Skip any white space that comes before the literal */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - - /* Reparse the literal to get pointers into source string */ - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - - TclGrowParseTokenArray(parsePtr, 2); - subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; - subExprTokenPtr->start = start; - subExprTokenPtr->size = scanned; - subExprTokenPtr->numComponents = 1; - subExprTokenPtr[1].type = TCL_TOKEN_TEXT; - subExprTokenPtr[1].start = start; - subExprTokenPtr[1].size = scanned; - subExprTokenPtr[1].numComponents = 0; - - parsePtr->numTokens += 2; - start +=scanned; - numBytes -= scanned; - break; - - case OT_TOKENS: { - - /* - * tokenPtr points to a token sequence that came from parsing - * a Tcl word. A Tcl word is made up of a sequence of one or - * more elements. When the word is only a single element, it's - * been the historical practice to replace the TCL_TOKEN_WORD - * token directly with a TCL_TOKEN_SUB_EXPR token. However, - * when the word has multiple elements, a TCL_TOKEN_WORD token - * is kept as a grouping device so that TCL_TOKEN_SUB_EXPR - * always has only one element. Wise or not, these are the - * rules the Tcl expr parser has followed, and for the sake - * of those few callers of Tcl_ParseExpr() we do not change - * them now. Internally, we can do better. - */ - - int toCopy = tokenPtr->numComponents + 1; - - if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { - - /* - * Single element word. Copy tokens and convert the leading - * token to TCL_TOKEN_SUB_EXPR. - */ - - TclGrowParseTokenArray(parsePtr, toCopy); - subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - memcpy(subExprTokenPtr, tokenPtr, - (size_t) toCopy * sizeof(Tcl_Token)); - subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; - parsePtr->numTokens += toCopy; - } else { - - /* - * Multiple element word. Create a TCL_TOKEN_SUB_EXPR - * token to lead, with fields initialized from the leading - * token, then copy entire set of word tokens. - */ - - TclGrowParseTokenArray(parsePtr, toCopy+1); - subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - *subExprTokenPtr = *tokenPtr; - subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; - subExprTokenPtr->numComponents++; - subExprTokenPtr++; - memcpy(subExprTokenPtr, tokenPtr, - (size_t) toCopy * sizeof(Tcl_Token)); - parsePtr->numTokens += toCopy + 1; - } - - scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; - numBytes -= scanned; - tokenPtr += toCopy; - break; - } - - default: - - /* Advance to the child node, which is an operator. */ - nodePtr = nodes + next; - - /* Skip any white space that comes before the subexpression */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - - /* Generate tokens for the operator / subexpression... */ - switch (nodePtr->lexeme) { - case OPEN_PAREN: - case COMMA: - case COLON: - - /* - * Historical practice has been to have no Tcl_Tokens for - * these operators. - */ - - break; - - default: { - - /* - * Remember the index of the last subexpression we were - * working on -- that of our parent. We'll stack it later. - */ - - parentIdx = subExprTokenIdx; - - /* - * Verify space for the two leading Tcl_Tokens representing - * the subexpression rooted by this operator. The first - * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second - * of type TCL_TOKEN_OPERATOR. - */ - - TclGrowParseTokenArray(parsePtr, 2); - subExprTokenIdx = parsePtr->numTokens; - subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; - parsePtr->numTokens += 2; - subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; - subExprTokenPtr[1].type = TCL_TOKEN_OPERATOR; - - /* - * Our current position scanning the string is the starting - * point for this subexpression. - */ - - subExprTokenPtr->start = start; - - /* - * Eventually, we know that the numComponents field of the - * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means - * we can make other use of this field for now to track the - * stack of subexpressions we have pending. - */ - - subExprTokenPtr[1].numComponents = parentIdx; - break; - } - } - break; - } - - /* Determine which way to exit the node on this pass. */ - router: - switch (nodePtr->mark) { - case MARK_LEFT: - next = nodePtr->left; - break; - - case MARK_RIGHT: - next = nodePtr->right; - - /* Skip any white space that comes before the operator */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - - /* - * Here we scan from the string the operator corresponding to - * nodePtr->lexeme. - */ - - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - - switch(nodePtr->lexeme) { - case OPEN_PAREN: - case COMMA: - case COLON: - - /* No tokens for these lexemes -> nothing to do. */ - break; - - default: - - /* - * Record in the TCL_TOKEN_OPERATOR token the pointers into - * the string marking where the operator is. - */ - - subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; - subExprTokenPtr[1].start = start; - subExprTokenPtr[1].size = scanned; - break; - } - - start +=scanned; - numBytes -= scanned; - break; - - case MARK_PARENT: - switch (nodePtr->lexeme) { - case START: - - /* When we get back to the START node, we're done. */ - return; - - case COMMA: - case COLON: - - /* No tokens for these lexemes -> nothing to do. */ - break; - - case OPEN_PAREN: - - /* Skip past matching close paren. */ - scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; - numBytes -= scanned; - scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - start +=scanned; - numBytes -= scanned; - break; - - default: { - - /* - * Before we leave this node/operator/subexpression for the - * last time, finish up its tokens.... - * - * Our current position scanning the string is where the - * substring for the subexpression ends. - */ - - subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; - subExprTokenPtr->size = start - subExprTokenPtr->start; - - /* - * All the Tcl_Tokens allocated and filled belong to - * this subexpresion. The first token is the leading - * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) - * are its components. - */ - - subExprTokenPtr->numComponents = - (parsePtr->numTokens - subExprTokenIdx) - 1; - - /* - * Finally, as we return up the tree to our parent, pop the - * parent subexpression off our subexpression stack, and - * fill in the zero numComponents for the operator Tcl_Token. - */ - - parentIdx = subExprTokenPtr[1].numComponents; - subExprTokenPtr[1].numComponents = 0; - subExprTokenIdx = parentIdx; - break; - } - } - - /* Since we're returning to parent, skip child handling code. */ - nodePtr = nodes + nodePtr->p.parent; - goto router; - } - } -} - -/* - *---------------------------------------------------------------------- - * - * Tcl_ParseExpr -- - * - * Given a string, the numBytes bytes starting at start, this function - * parses it as a Tcl expression and stores information about the - * structure of the expression in the Tcl_Parse struct indicated by the - * caller. - * - * Results: - * If the string is successfully parsed as a valid Tcl expression, TCL_OK - * is returned, and data about the expression structure is written to - * *parsePtr. If the string cannot be parsed as a valid Tcl expression, - * TCL_ERROR is returned, and if interp is non-NULL, an error message is - * written to interp. - * - * Side effects: - * If there is insufficient space in parsePtr to hold all the information - * about the expression, then additional space is malloc-ed. If the - * function returns TCL_OK then the caller must eventually invoke - * Tcl_FreeParse to release any additional space that was allocated. - * - *---------------------------------------------------------------------- - */ - -int -Tcl_ParseExpr( - Tcl_Interp *interp, /* Used for error reporting. */ - const char *start, /* Start of source string to parse. */ - int numBytes, /* Number of bytes in string. If < 0, the - * string consists of all bytes up to the - * first null character. */ - Tcl_Parse *parsePtr) /* Structure to fill with information about - * the parsed expression; any previous - * information in the structure is ignored. */ +void +TclFinalizeCompilation() { - int code; - OpNode *opTree = NULL; /* Will point to the tree of operators */ - Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ - Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse *exprParsePtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); - /* Holds the Tcl_Tokens of substitutions */ - - if (numBytes < 0) { - numBytes = (start ? strlen(start) : 0); + Tcl_MutexLock(&opMutex); + if (opTableInitialized) { + Tcl_DeleteHashTable(&opHashTable); + opTableInitialized = 0; } - - code = ParseExpr(interp, start, numBytes, &opTree, litList, - funcList, exprParsePtr, 1 /* parseOnly */); - Tcl_DecrRefCount(funcList); - Tcl_DecrRefCount(litList); - - TclParseInit(interp, start, numBytes, parsePtr); - if (code == TCL_OK) { - ConvertTreeToTokens(start, numBytes, - opTree, exprParsePtr->tokenPtr, parsePtr); - } else { - parsePtr->term = exprParsePtr->term; - parsePtr->errorType = exprParsePtr->errorType; - } - - Tcl_FreeParse(exprParsePtr); - TclStackFree(interp, exprParsePtr); - ckfree((char *) opTree); - return code; + Tcl_MutexUnlock(&opMutex); } /* *---------------------------------------------------------------------- * - * ParseLexeme -- + * CompileSubExpr -- * - * Parse a single lexeme from the start of a string, scanning no more - * than numBytes bytes. + * Given a pointer to a TCL_TOKEN_SUB_EXPR token describing a + * subexpression, this procedure emits instructions to evaluate the + * subexpression at runtime. * * Results: - * Returns the number of bytes scanned to produce the lexeme. + * The return value is TCL_OK on a successful compilation and TCL_ERROR + * on failure. If TCL_ERROR is returned, then the interpreter's result + * contains an error message. * * Side effects: - * Code identifying lexeme parsed is writen to *lexemePtr. + * Adds instructions to envPtr to evaluate the subexpression. * *---------------------------------------------------------------------- */ static int -ParseLexeme( - const char *start, /* Start of lexeme to parse. */ - int numBytes, /* Number of bytes in string. */ - unsigned char *lexemePtr, /* Write code of parsed lexeme to this - * storage. */ - Tcl_Obj **literalPtr) /* Write corresponding literal value to this - storage, if non-NULL. */ +CompileSubExpr(exprTokenPtr, infoPtr, envPtr) + Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token + * to compile. */ + ExprInfo *infoPtr; /* Describes the compilation state for the + * expression being compiled. */ + CompileEnv *envPtr; /* Holds resulting instructions. */ { - const char *end; - int scanned; - Tcl_UniChar ch; - Tcl_Obj *literal = NULL; - unsigned char byte; - - if (numBytes == 0) { - *lexemePtr = END; - return 0; + Tcl_Interp *interp = infoPtr->interp; + Tcl_Token *tokenPtr, *endPtr = NULL; /* silence gcc 4 warning */ + Tcl_Token *afterSubexprPtr; + CONST OperatorDesc *opDescPtr; + Tcl_HashEntry *hPtr; + CONST char *operator; + Tcl_DString opBuf; + int objIndex, opIndex, length, code; + char buffer[TCL_UTF_MAX]; + + if (exprTokenPtr->type != TCL_TOKEN_SUB_EXPR) { + panic("CompileSubExpr: token type %d not TCL_TOKEN_SUB_EXPR\n", + exprTokenPtr->type); } - byte = (unsigned char)(*start); - if (byte < sizeof(Lexeme) && Lexeme[byte] != 0) { - *lexemePtr = Lexeme[byte]; - return 1; - } - switch (byte) { - case '*': - if ((numBytes > 1) && (start[1] == '*')) { - *lexemePtr = EXPON; - return 2; - } - *lexemePtr = MULT; - return 1; - - case '=': - if ((numBytes > 1) && (start[1] == '=')) { - *lexemePtr = EQUAL; - return 2; - } - *lexemePtr = INCOMPLETE; - return 1; - - case '!': - if ((numBytes > 1) && (start[1] == '=')) { - *lexemePtr = NEQ; - return 2; - } - *lexemePtr = NOT; - return 1; + code = TCL_OK; - case '&': - if ((numBytes > 1) && (start[1] == '&')) { - *lexemePtr = AND; - return 2; - } - *lexemePtr = BIT_AND; - return 1; - - case '|': - if ((numBytes > 1) && (start[1] == '|')) { - *lexemePtr = OR; - return 2; - } - *lexemePtr = BIT_OR; - return 1; - - case '<': - if (numBytes > 1) { - switch (start[1]) { - case '<': - *lexemePtr = LEFT_SHIFT; - return 2; - case '=': - *lexemePtr = LEQ; - return 2; + /* + * Switch on the type of the first token after the subexpression token. + * After processing it, advance tokenPtr to point just after the + * subexpression's last token. + */ + + tokenPtr = exprTokenPtr+1; + TRACE(exprTokenPtr->start, exprTokenPtr->size, + tokenPtr->start, tokenPtr->size); + switch (tokenPtr->type) { + case TCL_TOKEN_WORD: + code = TclCompileTokens(interp, tokenPtr+1, + tokenPtr->numComponents, envPtr); + if (code != TCL_OK) { + goto done; } - } - *lexemePtr = LESS; - return 1; - - case '>': - if (numBytes > 1) { - switch (start[1]) { - case '>': - *lexemePtr = RIGHT_SHIFT; - return 2; - case '=': - *lexemePtr = GEQ; - return 2; + tokenPtr += (tokenPtr->numComponents + 1); + break; + + case TCL_TOKEN_TEXT: + if (tokenPtr->size > 0) { + objIndex = TclRegisterNewLiteral(envPtr, tokenPtr->start, + tokenPtr->size); + } else { + objIndex = TclRegisterNewLiteral(envPtr, "", 0); } - } - *lexemePtr = GREATER; - return 1; - - case 'i': - if ((numBytes > 1) && (start[1] == 'n') - && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { - + TclEmitPush(objIndex, envPtr); + tokenPtr += 1; + break; + + case TCL_TOKEN_BS: + length = TclParseBackslash(tokenPtr->start, tokenPtr->size, + (int *) NULL, buffer); + if (length > 0) { + objIndex = TclRegisterNewLiteral(envPtr, buffer, length); + } else { + objIndex = TclRegisterNewLiteral(envPtr, "", 0); + } + TclEmitPush(objIndex, envPtr); + tokenPtr += 1; + break; + + case TCL_TOKEN_COMMAND: + code = TclCompileScript(interp, tokenPtr->start+1, + tokenPtr->size-2, /*nested*/ 0, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += 1; + break; + + case TCL_TOKEN_VARIABLE: + code = TclCompileTokens(interp, tokenPtr, 1, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + break; + + case TCL_TOKEN_SUB_EXPR: + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + break; + + case TCL_TOKEN_OPERATOR: /* - * Must make this check so we can tell the difference between - * the "in" operator and the "int" function name and the - * "infinity" numeric value. + * Look up the operator. If the operator isn't found, treat it + * as a math function. */ - - *lexemePtr = IN_LIST; - return 2; - } - break; - - case 'e': - if ((numBytes > 1) && (start[1] == 'q') - && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { - *lexemePtr = STREQ; - return 2; - } - break; - - case 'n': - if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { - switch (start[1]) { - case 'e': - *lexemePtr = STRNEQ; - return 2; - case 'i': - *lexemePtr = NOT_IN_LIST; - return 2; - } - } - } - - literal = Tcl_NewObj(); - if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end, - TCL_PARSE_NO_WHITESPACE) == TCL_OK) { - if (end < start + numBytes && !isalnum(UCHAR(*end)) - && UCHAR(*end) != '_') { - - number: - TclInitStringRep(literal, start, end-start); - *lexemePtr = NUMBER; - if (literalPtr) { - *literalPtr = literal; - } else { - Tcl_DecrRefCount(literal); + Tcl_DStringInit(&opBuf); + operator = Tcl_DStringAppend(&opBuf, + tokenPtr->start, tokenPtr->size); + hPtr = Tcl_FindHashEntry(&opHashTable, operator); + if (hPtr == NULL) { + code = CompileMathFuncCall(exprTokenPtr, operator, infoPtr, + envPtr, &endPtr); + Tcl_DStringFree(&opBuf); + if (code != TCL_OK) { + goto done; + } + tokenPtr = endPtr; + break; } - return (end-start); - } else { - unsigned char lexeme; + Tcl_DStringFree(&opBuf); + opIndex = (int) Tcl_GetHashValue(hPtr); + opDescPtr = &(operatorTable[opIndex]); /* - * We have a number followed directly by bareword characters - * (alpha, digit, underscore). Is this a number followed by - * bareword syntax error? Or should we join into one bareword? - * Example: Inf + luence + () becomes a valid function call. - * [Bug 3401704] + * If the operator is "normal", compile it using information + * from the operator table. */ - if (literal->typePtr == &tclDoubleType) { - const char *p = start; - while (p < end) { - if (!isalnum(UCHAR(*p++))) { - /* - * The number has non-bareword characters, so we - * must treat it as a number. - */ - goto number; + + if (opDescPtr->numOperands > 0) { + tokenPtr++; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + + if (opDescPtr->numOperands == 2) { + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; } + tokenPtr += (tokenPtr->numComponents + 1); } + TclEmitOpcode(opDescPtr->instruction, envPtr); + infoPtr->hasOperators = 1; + break; } - ParseLexeme(end, numBytes-(end-start), &lexeme, NULL); - if ((NODE_TYPE & lexeme) == BINARY) { - /* - * The bareword characters following the number take the - * form of an operator (eq, ne, in, ni, ...) so we treat - * as number + operator. - */ - goto number; - } + /* - * Otherwise, fall through and parse the whole as a bareword. + * The operator requires special treatment, and is either + * "+" or "-", or one of "&&", "||" or "?". */ - } - } + + switch (opIndex) { + case OP_PLUS: + case OP_MINUS: + tokenPtr++; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + + /* + * Check whether the "+" or "-" is unary. + */ + + afterSubexprPtr = exprTokenPtr + + exprTokenPtr->numComponents+1; + if (tokenPtr == afterSubexprPtr) { + TclEmitOpcode(((opIndex==OP_PLUS)? + INST_UPLUS : INST_UMINUS), + envPtr); + break; + } + + /* + * The "+" or "-" is binary. + */ + + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + TclEmitOpcode(((opIndex==OP_PLUS)? INST_ADD : INST_SUB), + envPtr); + break; - if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = Tcl_UtfToUniChar(start, &ch); - } else { - char utfBytes[TCL_UTF_MAX]; - memcpy(utfBytes, start, (size_t) numBytes); - utfBytes[numBytes] = '\0'; - scanned = Tcl_UtfToUniChar(utfBytes, &ch); - } - if (!isalnum(UCHAR(ch))) { - *lexemePtr = INVALID; - Tcl_DecrRefCount(literal); - return scanned; - } - end = start; - while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { - end += scanned; - numBytes -= scanned; - if (Tcl_UtfCharComplete(end, numBytes)) { - scanned = Tcl_UtfToUniChar(end, &ch); - } else { - char utfBytes[TCL_UTF_MAX]; - memcpy(utfBytes, end, (size_t) numBytes); - utfBytes[numBytes] = '\0'; - scanned = Tcl_UtfToUniChar(utfBytes, &ch); - } + case OP_LAND: + case OP_LOR: + code = CompileLandOrLorExpr(exprTokenPtr, opIndex, + infoPtr, envPtr, &endPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr = endPtr; + break; + + case OP_QUESTY: + code = CompileCondExpr(exprTokenPtr, infoPtr, + envPtr, &endPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr = endPtr; + break; + + default: + panic("CompileSubExpr: unexpected operator %d requiring special treatment\n", + opIndex); + } /* end switch on operator requiring special treatment */ + infoPtr->hasOperators = 1; + break; + + default: + panic("CompileSubExpr: unexpected token type %d\n", + tokenPtr->type); } - *lexemePtr = BAREWORD; - if (literalPtr) { - Tcl_SetStringObj(literal, start, (int) (end-start)); - *literalPtr = literal; - } else { - Tcl_DecrRefCount(literal); + + /* + * Verify that the subexpression token had the required number of + * subtokens: that we've advanced tokenPtr just beyond the + * subexpression's last token. For example, a "*" subexpression must + * contain the tokens for exactly two operands. + */ + + if (tokenPtr != (exprTokenPtr + exprTokenPtr->numComponents+1)) { + LogSyntaxError(infoPtr); + code = TCL_ERROR; } - return (end-start); + + done: + return code; } /* *---------------------------------------------------------------------- * - * TclCompileExpr -- + * CompileLandOrLorExpr -- * - * This procedure compiles a string containing a Tcl expression into Tcl - * bytecodes. + * This procedure compiles a Tcl logical and ("&&") or logical or + * ("||") subexpression. * * Results: - * None. + * The return value is TCL_OK on a successful compilation and TCL_ERROR + * on failure. If TCL_OK is returned, a pointer to the token just after + * the last one in the subexpression is stored at the address in + * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result + * contains an error message. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. @@ -2022,680 +580,387 @@ ParseLexeme( *---------------------------------------------------------------------- */ -void -TclCompileExpr( - Tcl_Interp *interp, /* Used for error reporting. */ - const char *script, /* The source script to compile. */ - int numBytes, /* Number of bytes in script. */ - CompileEnv *envPtr, /* Holds resulting instructions. */ - int optimize) /* 0 for one-off expressions */ +static int +CompileLandOrLorExpr(exprTokenPtr, opIndex, infoPtr, envPtr, endPtrPtr) + Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token + * containing the "&&" or "||" operator. */ + int opIndex; /* A code describing the expression + * operator: either OP_LAND or OP_LOR. */ + ExprInfo *infoPtr; /* Describes the compilation state for the + * expression being compiled. */ + CompileEnv *envPtr; /* Holds resulting instructions. */ + Tcl_Token **endPtrPtr; /* If successful, a pointer to the token + * just after the last token in the + * subexpression is stored here. */ { - OpNode *opTree = NULL; /* Will point to the tree of operators */ - Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ - Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse *parsePtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); - /* Holds the Tcl_Tokens of substitutions */ - - int code = ParseExpr(interp, script, numBytes, &opTree, litList, - funcList, parsePtr, 0 /* parseOnly */); - - if (code == TCL_OK) { - - /* Valid parse; compile the tree. */ - int objc; - Tcl_Obj *const *litObjv; - Tcl_Obj **funcObjv; - - /* TIP #280 : Track Lines within the expression */ - TclAdvanceLines(&envPtr->line, script, - script + TclParseAllWhiteSpace(script, numBytes)); - - TclListObjGetElements(NULL, litList, &objc, (Tcl_Obj ***)&litObjv); - TclListObjGetElements(NULL, funcList, &objc, &funcObjv); - CompileExprTree(interp, opTree, 0, &litObjv, funcObjv, - parsePtr->tokenPtr, envPtr, optimize); - } else { - TclCompileSyntaxError(interp, envPtr); - } + JumpFixup shortCircuitFixup; /* Used to fix up the short circuit jump + * after the first subexpression. */ + JumpFixup lhsTrueFixup, lhsEndFixup; + /* Used to fix up jumps used to convert the + * first operand to 0 or 1. */ + Tcl_Token *tokenPtr; + int dist, code; + int savedStackDepth = envPtr->currStackDepth; - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - Tcl_DecrRefCount(funcList); - Tcl_DecrRefCount(litList); - ckfree((char *) opTree); -} - -/* - *---------------------------------------------------------------------- - * - * ExecConstantExprTree -- - * Compiles and executes bytecode for the subexpression tree at index - * in the nodes array. This subexpression must be constant, made up - * of only constant operators (not functions) and literals. - * - * Results: - * A standard Tcl return code and result left in interp. - * - * Side effects: - * Consumes subtree of nodes rooted at index. Advances the pointer - * *litObjvPtr. - * - *---------------------------------------------------------------------- - */ + /* + * Emit code for the first operand. + */ -static int -ExecConstantExprTree( - Tcl_Interp *interp, - OpNode *nodes, - int index, - Tcl_Obj *const **litObjvPtr) -{ - CompileEnv *envPtr; - ByteCode *byteCodePtr; - int code; - Tcl_Obj *byteCodeObj = Tcl_NewObj(); + tokenPtr = exprTokenPtr+2; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); /* - * Note we are compiling an expression with literal arguments. This means - * there can be no [info frame] calls when we execute the resulting - * bytecode, so there's no need to tend to TIP 280 issues. + * Convert the first operand to the result that Tcl requires: + * "0" or "1". Eventually we'll use a new instruction for this. */ + + TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &lhsTrueFixup); + TclEmitPush(TclRegisterNewLiteral(envPtr, "0", 1), envPtr); + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &lhsEndFixup); + dist = (envPtr->codeNext - envPtr->codeStart) - lhsTrueFixup.codeOffset; + if (TclFixupForwardJump(envPtr, &lhsTrueFixup, dist, 127)) { + badDist: + panic("CompileLandOrLorExpr: bad jump distance %d\n", dist); + } + envPtr->currStackDepth = savedStackDepth; + TclEmitPush(TclRegisterNewLiteral(envPtr, "1", 1), envPtr); + dist = (envPtr->codeNext - envPtr->codeStart) - lhsEndFixup.codeOffset; + if (TclFixupForwardJump(envPtr, &lhsEndFixup, dist, 127)) { + goto badDist; + } - envPtr = (CompileEnv *) TclStackAlloc(interp, sizeof(CompileEnv)); - TclInitCompileEnv(interp, envPtr, NULL, 0, NULL, 0); - CompileExprTree(interp, nodes, index, litObjvPtr, NULL, NULL, envPtr, - 0 /* optimize */); - TclEmitOpcode(INST_DONE, envPtr); - Tcl_IncrRefCount(byteCodeObj); - TclInitByteCodeObj(byteCodeObj, envPtr); - TclFreeCompileEnv(envPtr); - TclStackFree(interp, envPtr); - byteCodePtr = (ByteCode *) byteCodeObj->internalRep.otherValuePtr; - code = TclExecuteByteCode(interp, byteCodePtr); - Tcl_DecrRefCount(byteCodeObj); - return code; -} - -/* - *---------------------------------------------------------------------- - * - * CompileExprTree -- - * Compiles and writes to envPtr instructions for the subexpression - * tree at index in the nodes array. (*litObjvPtr) must point to the - * proper location in a corresponding literals list. Likewise, when - * non-NULL, funcObjv and tokenPtr must point into matching arrays of - * function names and Tcl_Token's derived from earlier call to - * ParseExpr(). When optimize is true, any constant subexpressions - * will be precomputed. - * - * Results: - * None. - * - * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. - * Consumes subtree of nodes rooted at index. Advances the pointer - * *litObjvPtr. - * - *---------------------------------------------------------------------- - */ + /* + * Emit the "short circuit" jump around the rest of the expression. + * Duplicate the "0" or "1" on top of the stack first to keep the + * jump from consuming it. + */ -static void -CompileExprTree( - Tcl_Interp *interp, - OpNode *nodes, - int index, - Tcl_Obj *const **litObjvPtr, - Tcl_Obj *const *funcObjv, - Tcl_Token *tokenPtr, - CompileEnv *envPtr, - int optimize) -{ - OpNode *nodePtr = nodes + index; - OpNode *rootPtr = nodePtr; - int numWords = 0; - JumpList *jumpPtr = NULL; - int convert = 1; - - while (1) { - int next; - JumpList *freePtr, *newJump; - - if (nodePtr->mark == MARK_LEFT) { - next = nodePtr->left; - - switch (nodePtr->lexeme) { - case QUESTION: - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - convert = 1; - break; - case AND: - case OR: - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - break; - } - } else if (nodePtr->mark == MARK_RIGHT) { - next = nodePtr->right; - - switch (nodePtr->lexeme) { - case FUNCTION: { - Tcl_DString cmdName; - const char *p; - int length; - - Tcl_DStringInit(&cmdName); - Tcl_DStringAppend(&cmdName, "tcl::mathfunc::", -1); - p = TclGetStringFromObj(*funcObjv, &length); - funcObjv++; - Tcl_DStringAppend(&cmdName, p, length); - TclEmitPush(TclRegisterNewNSLiteral(envPtr, - Tcl_DStringValue(&cmdName), - Tcl_DStringLength(&cmdName)), envPtr); - Tcl_DStringFree(&cmdName); - - /* - * Start a count of the number of words in this function - * command invocation. In case there's already a count - * in progress (nested functions), save it in our unused - * "left" field for restoring later. - */ - - nodePtr->left = numWords; - numWords = 2; /* Command plus one argument */ - break; - } - case QUESTION: - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); - break; - case COLON: - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->jump)); - envPtr->currStackDepth = jumpPtr->depth; - jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart); - jumpPtr->convert = convert; - convert = 1; - break; - case AND: - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); - break; - case OR: - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &(jumpPtr->jump)); - break; - } - } else { - switch (nodePtr->lexeme) { - case START: - case QUESTION: - if (convert && (nodePtr == rootPtr)) { - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } - break; - case OPEN_PAREN: + TclEmitOpcode(INST_DUP, envPtr); + TclEmitForwardJump(envPtr, + ((opIndex==OP_LAND)? TCL_FALSE_JUMP : TCL_TRUE_JUMP), + &shortCircuitFixup); - /* do nothing */ - break; - case FUNCTION: + /* + * Emit code for the second operand. + */ - /* - * Use the numWords count we've kept to invoke the - * function command with the correct number of arguments. - */ + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); - if (numWords < 255) { - TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr); - } else { - TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr); - } + /* + * Emit a "logical and" or "logical or" instruction. This does not try + * to "short- circuit" the evaluation of both operands, but instead + * ensures that we either have a "1" or a "0" result. + */ - /* Restore any saved numWords value. */ - numWords = nodePtr->left; - convert = 1; - break; - case COMMA: + TclEmitOpcode(((opIndex==OP_LAND)? INST_LAND : INST_LOR), envPtr); - /* Each comma implies another function argument. */ - numWords++; - break; - case COLON: - if (TclFixupForwardJump(envPtr, &(jumpPtr->next->jump), - (envPtr->codeNext - envPtr->codeStart) - - jumpPtr->next->jump.codeOffset, 127)) { - jumpPtr->offset += 3; - } - TclFixupForwardJump(envPtr, &(jumpPtr->jump), - jumpPtr->offset - jumpPtr->jump.codeOffset, 127); - convert |= jumpPtr->convert; - envPtr->currStackDepth = jumpPtr->depth + 1; - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - break; - case AND: - case OR: - TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) - ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, - &(jumpPtr->next->jump)); - TclEmitPush(TclRegisterNewLiteral(envPtr, - (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->next->jump)); - TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->jump), 127); - if (TclFixupForwardJumpToHere(envPtr, &(jumpPtr->jump), 127)) { - jumpPtr->next->next->jump.codeOffset += 3; - } - TclEmitPush(TclRegisterNewLiteral(envPtr, - (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); - TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->next->jump), - 127); - convert = 0; - envPtr->currStackDepth = jumpPtr->depth + 1; - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - break; - default: - TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); - convert = 0; - break; - } - if (nodePtr == rootPtr) { + /* + * Now that we know the target of the forward jump, update it with the + * correct distance. + */ - /* We're done */ - return; - } - nodePtr = nodes + nodePtr->p.parent; - continue; - } + dist = (envPtr->codeNext - envPtr->codeStart) + - shortCircuitFixup.codeOffset; + TclFixupForwardJump(envPtr, &shortCircuitFixup, dist, 127); + *endPtrPtr = tokenPtr; - nodePtr->mark++; - switch (next) { - case OT_EMPTY: - numWords = 1; /* No arguments, so just the command */ - break; - case OT_LITERAL: { - Tcl_Obj *const *litObjv = *litObjvPtr; - Tcl_Obj *literal = *litObjv; - - if (optimize) { - int length, index; - const char *bytes = TclGetStringFromObj(literal, &length); - LiteralEntry *lePtr; - Tcl_Obj *objPtr; - - index = TclRegisterNewLiteral(envPtr, bytes, length); - lePtr = envPtr->literalArrayPtr + index; - objPtr = lePtr->objPtr; - if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { - /* - * Would like to do this: - * - * lePtr->objPtr = literal; - * Tcl_IncrRefCount(literal); - * Tcl_DecrRefCount(objPtr); - * - * However, the design of the "global" and "local" - * LiteralTable does not permit the value of lePtr->objPtr - * to change. So rather than replace lePtr->objPtr, we - * do surgery to transfer our desired intrep into it. - * - */ - objPtr->typePtr = literal->typePtr; - objPtr->internalRep = literal->internalRep; - literal->typePtr = NULL; - } - TclEmitPush(index, envPtr); - } else { - /* - * When optimize==0, we know the expression is a one-off - * and there's nothing to be gained from sharing literals - * when they won't live long, and the copies we have already - * have an appropriate intrep. In this case, skip literal - * registration that would enable sharing, and use the routine - * that preserves intreps. - */ - TclEmitPush(TclAddLiteralObj(envPtr, literal, NULL), envPtr); - } - (*litObjvPtr)++; - break; - } - case OT_TOKENS: - TclCompileTokens(interp, tokenPtr+1, tokenPtr->numComponents, - envPtr); - tokenPtr += tokenPtr->numComponents + 1; - break; - default: - if (optimize && nodes[next].constant) { - Tcl_InterpState save = Tcl_SaveInterpState(interp, TCL_OK); - if (ExecConstantExprTree(interp, nodes, next, litObjvPtr) - == TCL_OK) { - TclEmitPush(TclAddLiteralObj(envPtr, - Tcl_GetObjResult(interp), NULL), envPtr); - } else { - TclCompileSyntaxError(interp, envPtr); - } - Tcl_RestoreInterpState(interp, save); - convert = 0; - } else { - nodePtr = nodes + next; - } - } - } + done: + envPtr->currStackDepth = savedStackDepth + 1; + return code; } /* *---------------------------------------------------------------------- * - * TclSingleOpCmd -- - * Implements the commands: ~, !, <<, >>, %, !=, ne, in, ni - * in the ::tcl::mathop namespace. These commands have no - * extension to arbitrary arguments; they accept only exactly one - * or exactly two arguments as suitable for the operator. + * CompileCondExpr -- + * + * This procedure compiles a Tcl conditional expression: + * condExpr ::= lorExpr ['?' condExpr ':' condExpr] * * Results: - * A standard Tcl return code and result left in interp. + * The return value is TCL_OK on a successful compilation and TCL_ERROR + * on failure. If TCL_OK is returned, a pointer to the token just after + * the last one in the subexpression is stored at the address in + * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result + * contains an error message. * * Side effects: - * None. + * Adds instructions to envPtr to evaluate the expression at runtime. * *---------------------------------------------------------------------- */ -int -TclSingleOpCmd( - ClientData clientData, - Tcl_Interp *interp, - int objc, - Tcl_Obj *const objv[]) +static int +CompileCondExpr(exprTokenPtr, infoPtr, envPtr, endPtrPtr) + Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token + * containing the "?" operator. */ + ExprInfo *infoPtr; /* Describes the compilation state for the + * expression being compiled. */ + CompileEnv *envPtr; /* Holds resulting instructions. */ + Tcl_Token **endPtrPtr; /* If successful, a pointer to the token + * just after the last token in the + * subexpression is stored here. */ { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - unsigned char lexeme; - OpNode nodes[2]; - Tcl_Obj *const *litObjv = objv + 1; - - if (objc != 1+occdPtr->i.numArgs) { - Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); - return TCL_ERROR; - } + JumpFixup jumpAroundThenFixup, jumpAroundElseFixup; + /* Used to update or replace one-byte jumps + * around the then and else expressions when + * their target PCs are determined. */ + Tcl_Token *tokenPtr; + int elseCodeOffset, dist, code; + int savedStackDepth = envPtr->currStackDepth; - ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); - nodes[0].lexeme = START; - nodes[0].mark = MARK_RIGHT; - nodes[0].right = 1; - nodes[1].lexeme = lexeme; - if (objc == 2) { - nodes[1].mark = MARK_RIGHT; - } else { - nodes[1].mark = MARK_LEFT; - nodes[1].left = OT_LITERAL; + /* + * Emit code for the test. + */ + + tokenPtr = exprTokenPtr+2; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; } - nodes[1].right = OT_LITERAL; - nodes[1].p.parent = 0; + tokenPtr += (tokenPtr->numComponents + 1); + + /* + * Emit the jump to the "else" expression if the test was false. + */ + + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpAroundThenFixup); - return ExecConstantExprTree(interp, nodes, 0, &litObjv); -} - -/* - *---------------------------------------------------------------------- - * - * TclSortingOpCmd -- - * Implements the commands: <, <=, >, >=, ==, eq - * in the ::tcl::mathop namespace. These commands are defined for - * arbitrary number of arguments by computing the AND of the base - * operator applied to all neighbor argument pairs. - * - * Results: - * A standard Tcl return code and result left in interp. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ + /* + * Compile the "then" expression. Note that if a subexpression is only + * a primary, we need to try to convert it to numeric. We do this to + * support Tcl's policy of interpreting operands if at all possible as + * first integers, else floating-point numbers. + */ -int -TclSortingOpCmd( - ClientData clientData, - Tcl_Interp *interp, - int objc, - Tcl_Obj *const objv[]) -{ - int code = TCL_OK; + infoPtr->hasOperators = 0; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + if (!infoPtr->hasOperators) { + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + } - if (objc < 3) { - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); - } else { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - Tcl_Obj **litObjv = (Tcl_Obj **) TclStackAlloc(interp, - 2*(objc-2)*sizeof(Tcl_Obj *)); - OpNode *nodes = (OpNode *) TclStackAlloc(interp, - 2*(objc-2)*sizeof(OpNode)); - unsigned char lexeme; - int i, lastAnd = 1; - Tcl_Obj *const *litObjPtrPtr = litObjv; - - ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); - - litObjv[0] = objv[1]; - nodes[0].lexeme = START; - nodes[0].mark = MARK_RIGHT; - for (i=2; i<objc-1; i++) { - litObjv[2*(i-1)-1] = objv[i]; - nodes[2*(i-1)-1].lexeme = lexeme; - nodes[2*(i-1)-1].mark = MARK_LEFT; - nodes[2*(i-1)-1].left = OT_LITERAL; - nodes[2*(i-1)-1].right = OT_LITERAL; - - litObjv[2*(i-1)] = objv[i]; - nodes[2*(i-1)].lexeme = AND; - nodes[2*(i-1)].mark = MARK_LEFT; - nodes[2*(i-1)].left = lastAnd; - nodes[lastAnd].p.parent = 2*(i-1); - - nodes[2*(i-1)].right = 2*(i-1)+1; - nodes[2*(i-1)+1].p.parent= 2*(i-1); - - lastAnd = 2*(i-1); - } - litObjv[2*(objc-2)-1] = objv[objc-1]; + /* + * Emit an unconditional jump around the "else" condExpr. + */ + + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, + &jumpAroundElseFixup); - nodes[2*(objc-2)-1].lexeme = lexeme; - nodes[2*(objc-2)-1].mark = MARK_LEFT; - nodes[2*(objc-2)-1].left = OT_LITERAL; - nodes[2*(objc-2)-1].right = OT_LITERAL; + /* + * Compile the "else" expression. + */ - nodes[0].right = lastAnd; - nodes[lastAnd].p.parent = 0; + envPtr->currStackDepth = savedStackDepth; + elseCodeOffset = (envPtr->codeNext - envPtr->codeStart); + infoPtr->hasOperators = 0; + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; + } + tokenPtr += (tokenPtr->numComponents + 1); + if (!infoPtr->hasOperators) { + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + } - code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); + /* + * Fix up the second jump around the "else" expression. + */ - TclStackFree(interp, nodes); - TclStackFree(interp, litObjv); + dist = (envPtr->codeNext - envPtr->codeStart) + - jumpAroundElseFixup.codeOffset; + if (TclFixupForwardJump(envPtr, &jumpAroundElseFixup, dist, 127)) { + /* + * Update the else expression's starting code offset since it + * moved down 3 bytes too. + */ + + elseCodeOffset += 3; } + + /* + * Fix up the first jump to the "else" expression if the test was false. + */ + + dist = (elseCodeOffset - jumpAroundThenFixup.codeOffset); + TclFixupForwardJump(envPtr, &jumpAroundThenFixup, dist, 127); + *endPtrPtr = tokenPtr; + + done: + envPtr->currStackDepth = savedStackDepth + 1; return code; } /* *---------------------------------------------------------------------- * - * TclVariadicOpCmd -- - * Implements the commands: +, *, &, |, ^, ** - * in the ::tcl::mathop namespace. These commands are defined for - * arbitrary number of arguments by repeatedly applying the base - * operator with suitable associative rules. When fewer than two - * arguments are provided, suitable identity values are returned. + * CompileMathFuncCall -- + * + * This procedure compiles a call on a math function in an expression: + * mathFuncCall ::= funcName '(' [condExpr {',' condExpr}] ')' * * Results: - * A standard Tcl return code and result left in interp. + * The return value is TCL_OK on a successful compilation and TCL_ERROR + * on failure. If TCL_OK is returned, a pointer to the token just after + * the last one in the subexpression is stored at the address in + * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result + * contains an error message. * * Side effects: - * None. + * Adds instructions to envPtr to evaluate the math function at + * runtime. * *---------------------------------------------------------------------- */ -int -TclVariadicOpCmd( - ClientData clientData, - Tcl_Interp *interp, - int objc, - Tcl_Obj *const objv[]) +static int +CompileMathFuncCall(exprTokenPtr, funcName, infoPtr, envPtr, endPtrPtr) + Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token + * containing the math function call. */ + CONST char *funcName; /* Name of the math function. */ + ExprInfo *infoPtr; /* Describes the compilation state for the + * expression being compiled. */ + CompileEnv *envPtr; /* Holds resulting instructions. */ + Tcl_Token **endPtrPtr; /* If successful, a pointer to the token + * just after the last token in the + * subexpression is stored here. */ { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - unsigned char lexeme; - int code; + Tcl_Interp *interp = infoPtr->interp; + Interp *iPtr = (Interp *) interp; + MathFunc *mathFuncPtr; + Tcl_HashEntry *hPtr; + Tcl_Token *tokenPtr, *afterSubexprPtr; + int code, i; + + /* + * Look up the MathFunc record for the function. + */ - if (objc < 2) { - Tcl_SetObjResult(interp, Tcl_NewIntObj(occdPtr->i.identity)); - return TCL_OK; + code = TCL_OK; + hPtr = Tcl_FindHashEntry(&iPtr->mathFuncTable, funcName); + if (hPtr == NULL) { + Tcl_ResetResult(interp); + Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), + "unknown math function \"", funcName, "\"", (char *) NULL); + code = TCL_ERROR; + goto done; } + mathFuncPtr = (MathFunc *) Tcl_GetHashValue(hPtr); - ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); - lexeme |= BINARY; - - if (objc == 2) { - Tcl_Obj *litObjv[2]; - OpNode nodes[2]; - int decrMe = 0; - Tcl_Obj *const *litObjPtrPtr = litObjv; - - if (lexeme == EXPON) { - litObjv[1] = Tcl_NewIntObj(occdPtr->i.identity); - Tcl_IncrRefCount(litObjv[1]); - decrMe = 1; - litObjv[0] = objv[1]; - nodes[0].lexeme = START; - nodes[0].mark = MARK_RIGHT; - nodes[0].right = 1; - nodes[1].lexeme = lexeme; - nodes[1].mark = MARK_LEFT; - nodes[1].left = OT_LITERAL; - nodes[1].right = OT_LITERAL; - nodes[1].p.parent = 0; - } else { - if (lexeme == DIVIDE) { - litObjv[0] = Tcl_NewDoubleObj(1.0); - } else { - litObjv[0] = Tcl_NewIntObj(occdPtr->i.identity); - } - Tcl_IncrRefCount(litObjv[0]); - litObjv[1] = objv[1]; - nodes[0].lexeme = START; - nodes[0].mark = MARK_RIGHT; - nodes[0].right = 1; - nodes[1].lexeme = lexeme; - nodes[1].mark = MARK_LEFT; - nodes[1].left = OT_LITERAL; - nodes[1].right = OT_LITERAL; - nodes[1].p.parent = 0; - } + /* + * If not a builtin function, push an object with the function's name. + */ - code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); + if (mathFuncPtr->builtinFuncIndex < 0) { + TclEmitPush(TclRegisterNewLiteral(envPtr, funcName, -1), envPtr); + } - Tcl_DecrRefCount(litObjv[decrMe]); - return code; - } else { - Tcl_Obj *const *litObjv = objv + 1; - OpNode *nodes = (OpNode *) TclStackAlloc(interp, - (objc-1)*sizeof(OpNode)); - int i, lastOp = OT_LITERAL; - - nodes[0].lexeme = START; - nodes[0].mark = MARK_RIGHT; - if (lexeme == EXPON) { - for (i=objc-2; i>0; i-- ) { - nodes[i].lexeme = lexeme; - nodes[i].mark = MARK_LEFT; - nodes[i].left = OT_LITERAL; - nodes[i].right = lastOp; - if (lastOp >= 0) { - nodes[lastOp].p.parent = i; - } - lastOp = i; - } - } else { - for (i=1; i<objc-1; i++ ) { - nodes[i].lexeme = lexeme; - nodes[i].mark = MARK_LEFT; - nodes[i].left = lastOp; - if (lastOp >= 0) { - nodes[lastOp].p.parent = i; - } - nodes[i].right = OT_LITERAL; - lastOp = i; + /* + * Compile any arguments for the function. + */ + + tokenPtr = exprTokenPtr+2; + afterSubexprPtr = exprTokenPtr + (exprTokenPtr->numComponents + 1); + if (mathFuncPtr->numArgs > 0) { + for (i = 0; i < mathFuncPtr->numArgs; i++) { + if (tokenPtr == afterSubexprPtr) { + Tcl_ResetResult(interp); + Tcl_AppendToObj(Tcl_GetObjResult(interp), + "too few arguments for math function", -1); + code = TCL_ERROR; + goto done; + } + code = CompileSubExpr(tokenPtr, infoPtr, envPtr); + if (code != TCL_OK) { + goto done; } + tokenPtr += (tokenPtr->numComponents + 1); } - nodes[0].right = lastOp; - nodes[lastOp].p.parent = 0; - - code = ExecConstantExprTree(interp, nodes, 0, &litObjv); + if (tokenPtr != afterSubexprPtr) { + Tcl_ResetResult(interp); + Tcl_AppendToObj(Tcl_GetObjResult(interp), + "too many arguments for math function", -1); + code = TCL_ERROR; + goto done; + } + } else if (tokenPtr != afterSubexprPtr) { + Tcl_ResetResult(interp); + Tcl_AppendToObj(Tcl_GetObjResult(interp), + "too many arguments for math function", -1); + code = TCL_ERROR; + goto done; + } + + /* + * Compile the call on the math function. Note that the "objc" argument + * count for non-builtin functions is incremented by 1 to include the + * function name itself. + */ - TclStackFree(interp, nodes); + if (mathFuncPtr->builtinFuncIndex >= 0) { /* a builtin function */ + /* + * Adjust the current stack depth by the number of arguments + * of the builtin function. This cannot be handled by the + * TclEmitInstInt1 macro as the number of arguments is not + * passed as an operand. + */ - return code; + if (envPtr->maxStackDepth < envPtr->currStackDepth) { + envPtr->maxStackDepth = envPtr->currStackDepth; + } + TclEmitInstInt1(INST_CALL_BUILTIN_FUNC1, + mathFuncPtr->builtinFuncIndex, envPtr); + envPtr->currStackDepth -= mathFuncPtr->numArgs; + } else { + TclEmitInstInt1(INST_CALL_FUNC1, (mathFuncPtr->numArgs+1), envPtr); } + *endPtrPtr = afterSubexprPtr; + + done: + return code; } /* *---------------------------------------------------------------------- * - * TclNoIdentOpCmd -- - * Implements the commands: -, / - * in the ::tcl::mathop namespace. These commands are defined for - * arbitrary non-zero number of arguments by repeatedly applying - * the base operator with suitable associative rules. When no - * arguments are provided, an error is raised. + * LogSyntaxError -- + * + * This procedure is invoked after an error occurs when compiling an + * expression. It sets the interpreter result to an error message + * describing the error. * * Results: - * A standard Tcl return code and result left in interp. + * None. * * Side effects: - * None. + * Sets the interpreter result to an error message describing the + * expression that was being compiled when the error occurred. * *---------------------------------------------------------------------- */ -int -TclNoIdentOpCmd( - ClientData clientData, - Tcl_Interp *interp, - int objc, - Tcl_Obj *const objv[]) +static void +LogSyntaxError(infoPtr) + ExprInfo *infoPtr; /* Describes the compilation state for the + * expression being compiled. */ { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); - return TCL_ERROR; - } - return TclVariadicOpCmd(clientData, interp, objc, objv); + int numBytes = (infoPtr->lastChar - infoPtr->expr); + char buffer[100]; + + sprintf(buffer, "syntax error in expression \"%.*s\"", + ((numBytes > 60)? 60 : numBytes), infoPtr->expr); + Tcl_ResetResult(infoPtr->interp); + Tcl_AppendStringsToObj(Tcl_GetObjResult(infoPtr->interp), + buffer, (char *) NULL); } -/* - * Local Variables: - * mode: c - * c-basic-offset: 4 - * fill-column: 78 - * End: - */ |
