diff options
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r-- | generic/tclCompExpr.c | 3229 |
1 files changed, 2482 insertions, 747 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 6bac221..999fe0a 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1,578 +1,2020 @@ -/* +/* * tclCompExpr.c -- * - * This file contains the code to compile Tcl expressions. + * This file contains the code to parse and compile Tcl expressions + * and implementations of the Tcl commands corresponding to expression + * operators, such as the command ::tcl::mathop::+ . * - * Copyright (c) 1997 Sun Microsystems, Inc. - * Copyright (c) 1998-2000 by Scriptics Corporation. + * Contributions from Don Porter, NIST, 2006-2007. (not subject to US copyright) * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclCompile.h" +#include "tclCompile.h" /* CompileEnv */ /* - * The stuff below is a bit of a hack so that this file can be used in - * environments that include no UNIX, i.e. no errno: just arrange to use - * the errno from tclExecute.c here. + * Expression parsing takes place in the routine ParseExpr(). It takes a + * string as input, parses that string, and generates a representation of + * the expression in the form of a tree of operators, a list of literals, + * a list of function names, and an array of Tcl_Token's within a Tcl_Parse + * struct. The tree is composed of OpNodes. */ -#ifndef TCL_GENERIC_ONLY -#include "tclPort.h" -#else -#define NO_ERRNO_H -#endif +typedef struct OpNode { + int left; /* "Pointer" to the left operand. */ + int right; /* "Pointer" to the right operand. */ + union { + int parent; /* "Pointer" to the parent operand. */ + int prev; /* "Pointer" joining incomplete tree stack */ + } p; + unsigned char lexeme; /* Code that identifies the operator. */ + unsigned char precedence; /* Precedence of the operator */ + unsigned char mark; /* Mark used to control traversal. */ + unsigned char constant; /* Flag marking constant subexpressions. */ +} OpNode; + +/* + * The storage for the tree is dynamically allocated array of OpNodes. The + * array is grown as parsing needs dictate according to a scheme similar to + * Tcl's string growth algorithm, so that the resizing costs are O(N) and so + * that we use at least half the memory allocated as expressions get large. + * + * Each OpNode in the tree represents an operator in the expression, either + * unary or binary. When parsing is completed successfully, a binary operator + * OpNode will have its left and right fields filled with "pointers" to its + * left and right operands. A unary operator OpNode will have its right field + * filled with a pointer to its single operand. When an operand is a + * subexpression the "pointer" takes the form of the index -- a non-negative + * integer -- into the OpNode storage array where the root of that + * subexpression parse tree is found. + * + * Non-operator elements of the expression do not get stored in the OpNode + * tree. They are stored in the other structures according to their type. + * Literal values get appended to the literal list. Elements that denote + * forms of quoting or substitution known to the Tcl parser get stored as + * Tcl_Tokens. These non-operator elements of the expression are the + * leaves of the completed parse tree. When an operand of an OpNode is + * one of these leaf elements, the following negative integer codes are used + * to indicate which kind of elements it is. + */ -#ifdef NO_ERRNO_H -extern int errno; /* Use errno from tclExecute.c. */ -#define ERANGE 34 -#endif +enum OperandTypes { + OT_LITERAL = -3, /* Operand is a literal in the literal list */ + OT_TOKENS = -2, /* Operand is sequence of Tcl_Tokens */ + OT_EMPTY = -1 /* "Operand" is an empty string. This is a + * special case used only to represent the + * EMPTY lexeme. See below. */ +}; /* - * Boolean variable that controls whether expression compilation tracing - * is enabled. + * Readable macros to test whether a "pointer" value points to an operator. + * They operate on the "non-negative integer -> operator; negative integer -> + * a non-operator OperandType" distinction. */ -#ifdef TCL_COMPILE_DEBUG -static int traceExprComp = 0; -#endif /* TCL_COMPILE_DEBUG */ +#define IsOperator(l) ((l) >= 0) +#define NotOperator(l) ((l) < 0) /* - * The ExprInfo structure describes the state of compiling an expression. - * A pointer to an ExprInfo record is passed among the routines in - * this module. + * Note that it is sufficient to store in the tree just the type of leaf + * operand, without any explicit pointer to which leaf. This is true because + * the traversals of the completed tree we perform are known to visit + * the leaves in the same order as the original parse. + * + * In a completed parse tree, those OpNodes that are themselves (roots of + * subexpression trees that are) operands of some operator store in their + * p.parent field a "pointer" to the OpNode of that operator. The p.parent + * field permits a traversal of the tree within a * non-recursive routine + * (ConvertTreeToTokens() and CompileExprTree()). This means that even + * expression trees of great depth pose no risk of blowing the C stack. + * + * While the parse tree is being constructed, the same memory space is used + * to hold the p.prev field which chains together a stack of incomplete + * trees awaiting their right operands. + * + * The lexeme field is filled in with the lexeme of the operator that is + * returned by the ParseLexeme() routine. Only lexemes for unary and + * binary operators get stored in an OpNode. Other lexmes get different + * treatement. + * + * The precedence field provides a place to store the precedence of the + * operator, so it need not be looked up again and again. + * + * The mark field is use to control the traversal of the tree, so + * that it can be done non-recursively. The mark values are: */ -typedef struct ExprInfo { - Tcl_Interp *interp; /* Used for error reporting. */ - Tcl_Parse *parsePtr; /* Structure filled with information about - * the parsed expression. */ - CONST char *expr; /* The expression that was originally passed - * to TclCompileExpr. */ - CONST char *lastChar; /* Points just after last byte of expr. */ - int hasOperators; /* Set 1 if the expr has operators; 0 if - * expr is only a primary. If 1 after - * compiling an expr, a tryCvtToNumeric - * instruction is emitted to convert the - * primary to a number if possible. */ -} ExprInfo; +enum Marks { + MARK_LEFT, /* Next step of traversal is to visit left subtree */ + MARK_RIGHT, /* Next step of traversal is to visit right subtree */ + MARK_PARENT /* Next step of traversal is to return to parent */ +}; /* - * Definitions of numeric codes representing each expression operator. - * The order of these must match the entries in the operatorTable below. - * Also the codes for the relational operators (OP_LESS, OP_GREATER, - * OP_LE, OP_GE, OP_EQ, and OP_NE) must be consecutive and in that order. - * Note that OP_PLUS and OP_MINUS represent both unary and binary operators. + * The constant field is a boolean flag marking which subexpressions are + * completely known at compile time, and are eligible for computing then + * rather than waiting until run time. */ -#define OP_MULT 0 -#define OP_DIVIDE 1 -#define OP_MOD 2 -#define OP_PLUS 3 -#define OP_MINUS 4 -#define OP_LSHIFT 5 -#define OP_RSHIFT 6 -#define OP_LESS 7 -#define OP_GREATER 8 -#define OP_LE 9 -#define OP_GE 10 -#define OP_EQ 11 -#define OP_NEQ 12 -#define OP_BITAND 13 -#define OP_BITXOR 14 -#define OP_BITOR 15 -#define OP_LAND 16 -#define OP_LOR 17 -#define OP_QUESTY 18 -#define OP_LNOT 19 -#define OP_BITNOT 20 -#define OP_STREQ 21 -#define OP_STRNEQ 22 +/* + * Each lexeme belongs to one of four categories, which determine + * its place in the parse tree. We use the two high bits of the + * (unsigned char) value to store a NODE_TYPE code. + */ + +#define NODE_TYPE 0xC0 + +/* + * The four category values are LEAF, UNARY, and BINARY, explained below, + * and "uncategorized", which is used either temporarily, until context + * determines which of the other three categories is correct, or for + * lexemes like INVALID, which aren't really lexemes at all, but indicators + * of a parsing error. Note that the codes must be distinct to distinguish + * categories, but need not take the form of a bit array. + */ +#define BINARY 0x40 /* This lexeme is a binary operator. An + * OpNode representing it should go into the + * parse tree, and two operands should be + * parsed for it in the expression. */ +#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode + * representing it should go into the parse + * tree, and one operand should be parsed for + * it in the expression. */ +#define LEAF 0xC0 /* This lexeme is a leaf operand in the parse + * tree. No OpNode will be placed in the tree + * for it. Either a literal value will be + * appended to the list of literals in this + * expression, or appropriate Tcl_Tokens will + * be appended in a Tcl_Parse struct to + * represent those leaves that require some + * form of substitution. + */ + +/* Uncategorized lexemes */ + +#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or + * BINARY_PLUS according to context. */ +#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or + * BINARY_MINUS according to context. */ +#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to + * FUNCTION or a parse error according to + * context and value. */ +#define INCOMPLETE 4 /* A parse error. Used only when the single + * "=" is encountered. */ +#define INVALID 5 /* A parse error. Used when any punctuation + * appears that's not a supported operator. */ + +/* Leaf lexemes */ + +#define NUMBER ( LEAF | 1) /* For literal numbers */ +#define SCRIPT ( LEAF | 2) /* Script substitution; [foo] */ +#define BOOLEAN ( LEAF | BAREWORD) /* For literal booleans */ +#define BRACED ( LEAF | 4) /* Braced string; {foo bar} */ +#define VARIABLE ( LEAF | 5) /* Variable substitution; $x */ +#define QUOTED ( LEAF | 6) /* Quoted string; "foo $bar [soom]" */ +#define EMPTY ( LEAF | 7) /* Used only for an empty argument + * list to a function. Represents + * the empty string within parens in + * the expression: rand() */ + +/* Unary operator lexemes */ + +#define UNARY_PLUS ( UNARY | PLUS) +#define UNARY_MINUS ( UNARY | MINUS) +#define FUNCTION ( UNARY | BAREWORD) /* This is a bit of "creative + * interpretation" on the part of the + * parser. A function call is parsed + * into the parse tree according to + * the perspective that the function + * name is a unary operator and its + * argument list, enclosed in parens, + * is its operand. The additional + * requirements not implied generally + * by treatment as a unary operator -- + * for example, the requirement that + * the operand be enclosed in parens -- + * are hard coded in the relevant + * portions of ParseExpr(). We trade + * off the need to include such + * exceptional handling in the code + * against the need we would otherwise + * have for more lexeme categories. */ +#define START ( UNARY | 4) /* This lexeme isn't parsed from the + * expression text at all. It + * represents the start of the + * expression and sits at the root of + * the parse tree where it serves as + * the start/end point of traversals. */ +#define OPEN_PAREN ( UNARY | 5) /* Another bit of creative + * interpretation, where we treat "(" + * as a unary operator with the + * sub-expression between it and its + * matching ")" as its operand. See + * CLOSE_PAREN below. */ +#define NOT ( UNARY | 6) +#define BIT_NOT ( UNARY | 7) + +/* Binary operator lexemes */ + +#define BINARY_PLUS ( BINARY | PLUS) +#define BINARY_MINUS ( BINARY | MINUS) +#define COMMA ( BINARY | 3) /* The "," operator is a low precedence + * binary operator that separates the + * arguments in a function call. The + * additional constraint that this + * operator can only legally appear + * at the right places within a + * function call argument list are + * hard coded within ParseExpr(). */ +#define MULT ( BINARY | 4) +#define DIVIDE ( BINARY | 5) +#define MOD ( BINARY | 6) +#define LESS ( BINARY | 7) +#define GREATER ( BINARY | 8) +#define BIT_AND ( BINARY | 9) +#define BIT_XOR ( BINARY | 10) +#define BIT_OR ( BINARY | 11) +#define QUESTION ( BINARY | 12) /* These two lexemes make up the */ +#define COLON ( BINARY | 13) /* ternary conditional operator, + * $x ? $y : $z . We treat them as + * two binary operators to avoid + * another lexeme category, and + * code the additional constraints + * directly in ParseExpr(). For + * instance, the right operand of + * a "?" operator must be a ":" + * operator. */ +#define LEFT_SHIFT ( BINARY | 14) +#define RIGHT_SHIFT ( BINARY | 15) +#define LEQ ( BINARY | 16) +#define GEQ ( BINARY | 17) +#define EQUAL ( BINARY | 18) +#define NEQ ( BINARY | 19) +#define AND ( BINARY | 20) +#define OR ( BINARY | 21) +#define STREQ ( BINARY | 22) +#define STRNEQ ( BINARY | 23) +#define EXPON ( BINARY | 24) /* Unlike the other binary operators, + * EXPON is right associative and this + * distinction is coded directly in + * ParseExpr(). */ +#define IN_LIST ( BINARY | 25) +#define NOT_IN_LIST ( BINARY | 26) +#define CLOSE_PAREN ( BINARY | 27) /* By categorizing the CLOSE_PAREN + * lexeme as a BINARY operator, the + * normal parsing rules for binary + * operators assure that a close paren + * will not directly follow another + * operator, and the machinery already + * in place to connect operands to + * operators according to precedence + * performs most of the work of + * matching open and close parens for + * us. In the end though, a close + * paren is not really a binary + * operator, and some special coding + * in ParseExpr() make sure we never + * put an actual CLOSE_PAREN node + * in the parse tree. The + * sub-expression between parens + * becomes the single argument of + * the matching OPEN_PAREN unary + * operator. */ +#define END ( BINARY | 28) /* This lexeme represents the end of + * the string being parsed. Treating + * it as a binary operator follows the + * same logic as the CLOSE_PAREN lexeme + * and END pairs with START, in the + * same way that CLOSE_PAREN pairs with + * OPEN_PAREN. */ /* - * Table describing the expression operators. Entries in this table must - * correspond to the definitions of numeric codes for operators just above. + * When ParseExpr() builds the parse tree it must choose which operands to + * connect to which operators. This is done according to operator precedence. + * The greater an operator's precedence the greater claim it has to link to + * an available operand. The Precedence enumeration lists the precedence + * values used by Tcl expression operators, from lowest to highest claim. + * Each precedence level is commented with the operators that hold that + * precedence. */ -static int opTableInitialized = 0; /* 0 means not yet initialized. */ - -TCL_DECLARE_MUTEX(opMutex) - -typedef struct OperatorDesc { - CONST char *name; /* Name of the operator. */ - int numOperands; /* Number of operands. 0 if the operator - * requires special handling. */ - int instruction; /* Instruction opcode for the operator. - * Ignored if numOperands is 0. */ -} OperatorDesc; - -static CONST OperatorDesc operatorTable[] = { - {"*", 2, INST_MULT}, - {"/", 2, INST_DIV}, - {"%", 2, INST_MOD}, - {"+", 0, 0}, - {"-", 0, 0}, - {"<<", 2, INST_LSHIFT}, - {">>", 2, INST_RSHIFT}, - {"<", 2, INST_LT}, - {">", 2, INST_GT}, - {"<=", 2, INST_LE}, - {">=", 2, INST_GE}, - {"==", 2, INST_EQ}, - {"!=", 2, INST_NEQ}, - {"&", 2, INST_BITAND}, - {"^", 2, INST_BITXOR}, - {"|", 2, INST_BITOR}, - {"&&", 0, 0}, - {"||", 0, 0}, - {"?", 0, 0}, - {"!", 1, INST_LNOT}, - {"~", 1, INST_BITNOT}, - {"eq", 2, INST_STR_EQ}, - {"ne", 2, INST_STR_NEQ}, - {NULL, 0, 0} +enum Precedence { + PREC_END = 1, /* END */ + PREC_START, /* START */ + PREC_CLOSE_PAREN, /* ")" */ + PREC_OPEN_PAREN, /* "(" */ + PREC_COMMA, /* "," */ + PREC_CONDITIONAL, /* "?", ":" */ + PREC_OR, /* "||" */ + PREC_AND, /* "&&" */ + PREC_BIT_OR, /* "|" */ + PREC_BIT_XOR, /* "^" */ + PREC_BIT_AND, /* "&" */ + PREC_EQUAL, /* "==", "!=", "eq", "ne", "in", "ni" */ + PREC_COMPARE, /* "<", ">", "<=", ">=" */ + PREC_SHIFT, /* "<<", ">>" */ + PREC_ADD, /* "+", "-" */ + PREC_MULT, /* "*", "/", "%" */ + PREC_EXPON, /* "**" */ + PREC_UNARY /* "+", "-", FUNCTION, "!", "~" */ }; /* - * Hashtable used to map the names of expression operators to the index - * of their OperatorDesc description. + * Here the same information contained in the comments above is stored + * in inverted form, so that given a lexeme, one can quickly look up + * its precedence value. */ -static Tcl_HashTable opHashTable; +static const unsigned char prec[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + PREC_ADD, /* BINARY_PLUS */ + PREC_ADD, /* BINARY_MINUS */ + PREC_COMMA, /* COMMA */ + PREC_MULT, /* MULT */ + PREC_MULT, /* DIVIDE */ + PREC_MULT, /* MOD */ + PREC_COMPARE, /* LESS */ + PREC_COMPARE, /* GREATER */ + PREC_BIT_AND, /* BIT_AND */ + PREC_BIT_XOR, /* BIT_XOR */ + PREC_BIT_OR, /* BIT_OR */ + PREC_CONDITIONAL, /* QUESTION */ + PREC_CONDITIONAL, /* COLON */ + PREC_SHIFT, /* LEFT_SHIFT */ + PREC_SHIFT, /* RIGHT_SHIFT */ + PREC_COMPARE, /* LEQ */ + PREC_COMPARE, /* GEQ */ + PREC_EQUAL, /* EQUAL */ + PREC_EQUAL, /* NEQ */ + PREC_AND, /* AND */ + PREC_OR, /* OR */ + PREC_EQUAL, /* STREQ */ + PREC_EQUAL, /* STRNEQ */ + PREC_EXPON, /* EXPON */ + PREC_EQUAL, /* IN_LIST */ + PREC_EQUAL, /* NOT_IN_LIST */ + PREC_CLOSE_PAREN, /* CLOSE_PAREN */ + PREC_END, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + PREC_UNARY, /* UNARY_PLUS */ + PREC_UNARY, /* UNARY_MINUS */ + PREC_UNARY, /* FUNCTION */ + PREC_START, /* START */ + PREC_OPEN_PAREN, /* OPEN_PAREN */ + PREC_UNARY, /* NOT*/ + PREC_UNARY, /* BIT_NOT*/ +}; /* - * Declarations for local procedures to this file: + * A table mapping lexemes to bytecode instructions, used by CompileExprTree(). */ -static int CompileCondExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, - CompileEnv *envPtr, Tcl_Token **endPtrPtr)); -static int CompileLandOrLorExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, int opIndex, - ExprInfo *infoPtr, CompileEnv *envPtr, - Tcl_Token **endPtrPtr)); -static int CompileMathFuncCall _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, CONST char *funcName, - ExprInfo *infoPtr, CompileEnv *envPtr, - Tcl_Token **endPtrPtr)); -static int CompileSubExpr _ANSI_ARGS_(( - Tcl_Token *exprTokenPtr, ExprInfo *infoPtr, - CompileEnv *envPtr)); -static void LogSyntaxError _ANSI_ARGS_((ExprInfo *infoPtr)); +static const unsigned char instruction[] = { + /* Non-operator lexemes */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Binary operator lexemes */ + INST_ADD, /* BINARY_PLUS */ + INST_SUB, /* BINARY_MINUS */ + 0, /* COMMA */ + INST_MULT, /* MULT */ + INST_DIV, /* DIVIDE */ + INST_MOD, /* MOD */ + INST_LT, /* LESS */ + INST_GT, /* GREATER */ + INST_BITAND, /* BIT_AND */ + INST_BITXOR, /* BIT_XOR */ + INST_BITOR, /* BIT_OR */ + 0, /* QUESTION */ + 0, /* COLON */ + INST_LSHIFT, /* LEFT_SHIFT */ + INST_RSHIFT, /* RIGHT_SHIFT */ + INST_LE, /* LEQ */ + INST_GE, /* GEQ */ + INST_EQ, /* EQUAL */ + INST_NEQ, /* NEQ */ + 0, /* AND */ + 0, /* OR */ + INST_STR_EQ, /* STREQ */ + INST_STR_NEQ, /* STRNEQ */ + INST_EXPON, /* EXPON */ + INST_LIST_IN, /* IN_LIST */ + INST_LIST_NOT_IN, /* NOT_IN_LIST */ + 0, /* CLOSE_PAREN */ + 0, /* END */ + /* Expansion room for more binary operators */ + 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + /* Unary operator lexemes */ + INST_UPLUS, /* UNARY_PLUS */ + INST_UMINUS, /* UNARY_MINUS */ + 0, /* FUNCTION */ + 0, /* START */ + 0, /* OPEN_PAREN */ + INST_LNOT, /* NOT*/ + INST_BITNOT, /* BIT_NOT*/ +}; /* - * Macro used to debug the execution of the expression compiler. + * A table mapping a byte value to the corresponding lexeme for use by + * ParseLexeme(). */ -#ifdef TCL_COMPILE_DEBUG -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) \ - if (traceExprComp) { \ - fprintf(stderr, "CompileSubExpr: \"%.*s\", token \"%.*s\"\n", \ - (exprLength), (exprBytes), (tokenLength), (tokenBytes)); \ - } -#else -#define TRACE(exprBytes, exprLength, tokenBytes, tokenLength) -#endif /* TCL_COMPILE_DEBUG */ +static const unsigned char Lexeme[] = { + INVALID /* NUL */, INVALID /* SOH */, + INVALID /* STX */, INVALID /* ETX */, + INVALID /* EOT */, INVALID /* ENQ */, + INVALID /* ACK */, INVALID /* BEL */, + INVALID /* BS */, INVALID /* HT */, + INVALID /* LF */, INVALID /* VT */, + INVALID /* FF */, INVALID /* CR */, + INVALID /* SO */, INVALID /* SI */, + INVALID /* DLE */, INVALID /* DC1 */, + INVALID /* DC2 */, INVALID /* DC3 */, + INVALID /* DC4 */, INVALID /* NAK */, + INVALID /* SYN */, INVALID /* ETB */, + INVALID /* CAN */, INVALID /* EM */, + INVALID /* SUB */, INVALID /* ESC */, + INVALID /* FS */, INVALID /* GS */, + INVALID /* RS */, INVALID /* US */, + INVALID /* SPACE */, 0 /* ! or != */, + QUOTED /* " */, INVALID /* # */, + VARIABLE /* $ */, MOD /* % */, + 0 /* & or && */, INVALID /* ' */, + OPEN_PAREN /* ( */, CLOSE_PAREN /* ) */, + 0 /* * or ** */, PLUS /* + */, + COMMA /* , */, MINUS /* - */, + 0 /* . */, DIVIDE /* / */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-9 */ + COLON /* : */, INVALID /* ; */, + 0 /* < or << or <= */, + 0 /* == or INVALID */, + 0 /* > or >> or >= */, + QUESTION /* ? */, INVALID /* @ */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A-M */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* N-Z */ + SCRIPT /* [ */, INVALID /* \ */, + INVALID /* ] */, BIT_XOR /* ^ */, + INVALID /* _ */, INVALID /* ` */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a-m */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* n-z */ + BRACED /* { */, 0 /* | or || */, + INVALID /* } */, BIT_NOT /* ~ */, + INVALID /* DEL */ +}; + +/* + * The JumpList struct is used to create a stack of data needed for the + * TclEmitForwardJump() and TclFixupForwardJump() calls that are performed + * when compiling the short-circuiting operators QUESTION/COLON, AND, and OR. + * Keeping a stack permits the CompileExprTree() routine to be non-recursive. + */ + +typedef struct JumpList { + JumpFixup jump; /* Pass this argument to matching calls of + * TclEmitForwardJump() and + * TclFixupForwardJump(). */ + int depth; /* Remember the currStackDepth of the + * CompileEnv here. */ + int offset; /* Data used to compute jump lengths to pass + * to TclFixupForwardJump() */ + int convert; /* Temporary storage used to compute whether + * numeric conversion will be needed following + * the operator we're compiling. */ + struct JumpList *next; /* Point to next item on the stack */ +} JumpList; + +/* + * Declarations for local functions to this file: + */ + +static void CompileExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, Tcl_Token *tokenPtr, + CompileEnv *envPtr, int optimize); +static void ConvertTreeToTokens(const char *start, int numBytes, + OpNode *nodes, Tcl_Token *tokenPtr, + Tcl_Parse *parsePtr); +static int ExecConstantExprTree(Tcl_Interp *interp, OpNode *nodes, + int index, Tcl_Obj * const **litObjvPtr); +static int ParseExpr(Tcl_Interp *interp, const char *start, + int numBytes, OpNode **opTreePtr, + Tcl_Obj *litList, Tcl_Obj *funcList, + Tcl_Parse *parsePtr, int parseOnly); +static int ParseLexeme(const char *start, int numBytes, + unsigned char *lexemePtr, Tcl_Obj **literalPtr); + /* *---------------------------------------------------------------------- * - * TclCompileExpr -- + * ParseExpr -- * - * This procedure compiles a string containing a Tcl expression into - * Tcl bytecodes. This procedure is the top-level interface to the - * the expression compilation module, and is used by such public - * procedures as Tcl_ExprString, Tcl_ExprStringObj, Tcl_ExprLong, - * Tcl_ExprDouble, Tcl_ExprBoolean, and Tcl_ExprBooleanObj. + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and constructs a tree representing + * the structure of the expression. The caller must pass in empty + * lists as the funcList and litList arguments. The elements of the + * parsed expression are returned to the caller as that tree, a list of + * literal values, a list of function names, and in Tcl_Tokens + * added to a Tcl_Parse struct passed in by the caller. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * If the string is successfully parsed as a valid Tcl expression, TCL_OK + * is returned, and data about the expression structure is written to + * the last four arguments. If the string cannot be parsed as a valid + * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL, an + * error message is written to interp. * * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. + * Memory will be allocated. If TCL_OK is returned, the caller must + * clean up the returned data structures. The (OpNode *) value written + * to opTreePtr should be passed to ckfree() and the parsePtr argument + * should be passed to Tcl_FreeParse(). The elements appended to the + * litList and funcList will automatically be freed whenever the + * refcount on those lists indicates they can be freed. * *---------------------------------------------------------------------- */ -int -TclCompileExpr(interp, script, numBytes, envPtr) - Tcl_Interp *interp; /* Used for error reporting. */ - CONST char *script; /* The source script to compile. */ - int numBytes; /* Number of bytes in script. If < 0, the - * string consists of all bytes up to the - * first null character. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ +static int +ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. */ + OpNode **opTreePtr, /* Points to space where a pointer to the + * allocated OpNode tree should go. */ + Tcl_Obj *litList, /* List to append literals to. */ + Tcl_Obj *funcList, /* List to append function names to. */ + Tcl_Parse *parsePtr, /* Structure to fill with tokens representing + * those operands that require run time + * substitutions. */ + int parseOnly) /* A boolean indicating whether the caller's + * aim is just a parse, or whether it will go + * on to compile the expression. Different + * optimizations are appropriate for the + * two scenarios. */ { - ExprInfo info; - Tcl_Parse parse; - Tcl_HashEntry *hPtr; - int new, i, code; + OpNode *nodes = NULL; /* Pointer to the OpNode storage array where + * we build the parse tree. */ + int nodesAvailable = 64; /* Initial size of the storage array. This + * value establishes a minimum tree memory cost + * of only about 1 kibyte, and is large enough + * for most expressions to parse with no need + * for array growth and reallocation. */ + int nodesUsed = 0; /* Number of OpNodes filled. */ + int scanned = 0; /* Capture number of byte scanned by + * parsing routines. */ + int lastParsed; /* Stores info about what the lexeme parsed + * the previous pass through the parsing loop + * was. If it was an operator, lastParsed is + * the index of the OpNode for that operator. + * If it was not an operator, lastParsed holds + * an OperandTypes value encoding what we + * need to know about it. */ + int incomplete; /* Index of the most recent incomplete tree + * in the OpNode array. Heads a stack of + * incomplete trees linked by p.prev. */ + int complete = OT_EMPTY; /* "Index" of the complete tree (that is, a + * complete subexpression) determined at the + * moment. OT_EMPTY is a nonsense value + * used only to silence compiler warnings. + * During a parse, complete will always hold + * an index or an OperandTypes value pointing + * to an actual leaf at the time the complete + * tree is needed. */ + + /* These variables control generation of the error message. */ + Tcl_Obj *msg = NULL; /* The error message. */ + Tcl_Obj *post = NULL; /* In a few cases, an additional postscript + * for the error message, supplying more + * information after the error msg and + * location have been reported. */ + const char *mark = "_@_"; /* In the portion of the complete error message + * where the error location is reported, this + * "mark" substring is inserted into the + * string being parsed to aid in pinpointing + * the location of the syntax error in the + * expression. */ + int insertMark = 0; /* A boolean controlling whether the "mark" + * should be inserted. */ + const int limit = 25; /* Portions of the error message are + * constructed out of substrings of the + * original expression. In order to keep the + * error message readable, we impose this limit + * on the substring size we extract. */ + + TclParseInit(interp, start, numBytes, parsePtr); + + nodes = (OpNode *) attemptckalloc(nodesAvailable * sizeof(OpNode)); + if (nodes == NULL) { + TclNewLiteralStringObj(msg, "not enough memory to parse expression"); + goto error; + } + + /* Initialize the parse tree with the special "START" node. */ + nodes->lexeme = START; + nodes->precedence = prec[START]; + nodes->mark = MARK_RIGHT; + nodes->constant = 1; + incomplete = lastParsed = nodesUsed; + nodesUsed++; /* - * If this is the first time we've been called, initialize the table - * of expression operators. + * Main parsing loop parses one lexeme per iteration. We exit the + * loop only when there's a syntax error with a "goto error" which + * takes us to the error handling code following the loop, or when + * we've successfully completed the parse and we return to the caller. */ - if (numBytes < 0) { - numBytes = (script? strlen(script) : 0); - } - if (!opTableInitialized) { - Tcl_MutexLock(&opMutex); - if (!opTableInitialized) { - Tcl_InitHashTable(&opHashTable, TCL_STRING_KEYS); - for (i = 0; operatorTable[i].name != NULL; i++) { - hPtr = Tcl_CreateHashEntry(&opHashTable, - operatorTable[i].name, &new); - if (new) { - Tcl_SetHashValue(hPtr, (ClientData) i); - } + while (1) { + OpNode *nodePtr; /* Points to the OpNode we may fill this + * pass through the loop. */ + unsigned char lexeme; /* The lexeme we parse this iteration. */ + Tcl_Obj *literal; /* Filled by the ParseLexeme() call when + * a literal is parsed that has a Tcl_Obj + * rep worth preserving. */ + + /* + * Each pass through this loop adds up to one more OpNode. Allocate + * space for one if required. + */ + + if (nodesUsed >= nodesAvailable) { + int size = nodesUsed * 2; + OpNode *newPtr; + + do { + newPtr = (OpNode *) attemptckrealloc((char *) nodes, + (unsigned int) size * sizeof(OpNode)); + } while ((newPtr == NULL) + && ((size -= (size - nodesUsed) / 2) > nodesUsed)); + if (newPtr == NULL) { + TclNewLiteralStringObj(msg, + "not enough memory to parse expression"); + goto error; } - opTableInitialized = 1; + nodesAvailable = size; + nodes = newPtr; } - Tcl_MutexUnlock(&opMutex); - } + nodePtr = nodes + nodesUsed; + + /* Skip white space between lexemes. */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start += scanned; + numBytes -= scanned; + + scanned = ParseLexeme(start, numBytes, &lexeme, &literal); + + /* Use context to categorize the lexemes that are ambiguous. */ + if ((NODE_TYPE & lexeme) == 0) { + switch (lexeme) { + case INVALID: + msg = Tcl_ObjPrintf( + "invalid character \"%.*s\"", scanned, start); + goto error; + case INCOMPLETE: + msg = Tcl_ObjPrintf( + "incomplete operator \"%.*s\"", scanned, start); + goto error; + case BAREWORD: + + /* + * Most barewords in an expression are a syntax error. + * The exceptions are that when a bareword is followed by + * an open paren, it might be a function call, and when the + * bareword is a legal literal boolean value, we accept that + * as well. + */ + + if (start[scanned+TclParseAllWhiteSpace( + start+scanned, numBytes-scanned)] == '(') { + lexeme = FUNCTION; - /* - * Initialize the structure containing information abvout this - * expression compilation. - */ + /* + * When we compile the expression we'll need the function + * name, and there's no place in the parse tree to store + * it, so we keep a separate list of all the function + * names we've parsed in the order we found them. + */ - info.interp = interp; - info.parsePtr = &parse; - info.expr = script; - info.lastChar = (script + numBytes); - info.hasOperators = 0; + Tcl_ListObjAppendElement(NULL, funcList, literal); + } else { + int b; + if (Tcl_GetBooleanFromObj(NULL, literal, &b) == TCL_OK) { + lexeme = BOOLEAN; + } else { + Tcl_DecrRefCount(literal); + msg = Tcl_ObjPrintf( + "invalid bareword \"%.*s%s\"", + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "..."); + post = Tcl_ObjPrintf( + "should be \"$%.*s%s\" or \"{%.*s%s}\"", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + Tcl_AppendPrintfToObj(post, + " or \"%.*s%s(...)\" or ...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + if (start[0] == '0') { + const char *stop; + TclParseNumber(NULL, NULL, NULL, start, scanned, + &stop, TCL_PARSE_NO_WHITESPACE); + + if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + + switch (start[1]) { + case 'b': + Tcl_AppendToObj(post, + " (invalid binary number?)", -1); + break; + case 'o': + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + break; + default: + if (isdigit(UCHAR(start[1]))) { + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + } + break; + } + } + } + goto error; + } + } + break; + case PLUS: + case MINUS: + if (IsOperator(lastParsed)) { + + /* + * A "+" or "-" coming just after another operator + * must be interpreted as a unary operator. + */ + + lexeme |= UNARY; + } else { + lexeme |= BINARY; + } + } + } /* Uncategorized lexemes */ + + /* Handle lexeme based on its category. */ + switch (NODE_TYPE & lexeme) { + + /* + * Each LEAF results in either a literal getting appended to the + * litList, or a sequence of Tcl_Tokens representing a Tcl word + * getting appended to the parsePtr->tokens. No OpNode is filled + * for this lexeme. + */ + + case LEAF: { + Tcl_Token *tokenPtr; + const char *end = start; + int wordIndex; + int code = TCL_OK; + + /* + * A leaf operand appearing just after something that's not an + * operator is a syntax error. + */ + + if (NotOperator(lastParsed)) { + msg = Tcl_ObjPrintf("missing operator at %s", mark); + scanned = 0; + insertMark = 1; + + /* Free any literal to avoid a memleak. */ + if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) { + Tcl_DecrRefCount(literal); + } + goto error; + } + + switch (lexeme) { + case NUMBER: + case BOOLEAN: + /* + * TODO: Consider using a dict or hash to collapse all + * duplicate literals into a single representative value. + * (Like what is done with [split $s {}]). + * Pro: ~75% memory saving on expressions like + * {1+1+1+1+1+.....+1} (Convert "pointer + Tcl_Obj" cost + * to "pointer" cost only) + * Con: Cost of the dict store/retrieve on every literal + * in every expression when expressions like the above + * tend to be uncommon. + * The memory savings is temporary; Compiling to bytecode + * will collapse things as literals are registered + * anyway, so the savings applies only to the time + * between parsing and compiling. Possibly important + * due to high-water mark nature of memory allocation. + */ + Tcl_ListObjAppendElement(NULL, litList, literal); + complete = lastParsed = OT_LITERAL; + start += scanned; + numBytes -= scanned; + continue; + + default: + break; + } + + /* + * Remaining LEAF cases may involve filling Tcl_Tokens, so + * make room for at least 2 more tokens. + */ + + TclGrowParseTokenArray(parsePtr, 2); + wordIndex = parsePtr->numTokens; + tokenPtr = parsePtr->tokenPtr + wordIndex; + tokenPtr->type = TCL_TOKEN_WORD; + tokenPtr->start = start; + parsePtr->numTokens++; + + switch (lexeme) { + case QUOTED: + code = Tcl_ParseQuotedString(NULL, start, numBytes, + parsePtr, 1, &end); + scanned = end - start; + break; + + case BRACED: + code = Tcl_ParseBraces(NULL, start, numBytes, + parsePtr, 1, &end); + scanned = end - start; + break; + + case VARIABLE: + code = Tcl_ParseVarName(NULL, start, numBytes, parsePtr, 1); + + /* + * Handle the quirk that Tcl_ParseVarName reports a successful + * parse even when it gets only a "$" with no variable name. + */ + + tokenPtr = parsePtr->tokenPtr + wordIndex + 1; + if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) { + TclNewLiteralStringObj(msg, "invalid character \"$\""); + goto error; + } + scanned = tokenPtr->size; + break; + + case SCRIPT: { + Tcl_Parse *nestedPtr = + (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); + + tokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + tokenPtr->type = TCL_TOKEN_COMMAND; + tokenPtr->start = start; + tokenPtr->numComponents = 0; + + end = start + numBytes; + start++; + while (1) { + code = Tcl_ParseCommand(interp, start, (end - start), 1, + nestedPtr); + if (code != TCL_OK) { + parsePtr->term = nestedPtr->term; + parsePtr->errorType = nestedPtr->errorType; + parsePtr->incomplete = nestedPtr->incomplete; + break; + } + start = (nestedPtr->commandStart + nestedPtr->commandSize); + Tcl_FreeParse(nestedPtr); + if ((nestedPtr->term < end) && (*(nestedPtr->term) == ']') + && !(nestedPtr->incomplete)) { + break; + } + + if (start == end) { + TclNewLiteralStringObj(msg, "missing close-bracket"); + parsePtr->term = tokenPtr->start; + parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; + parsePtr->incomplete = 1; + code = TCL_ERROR; + break; + } + } + TclStackFree(interp, nestedPtr); + end = start; + start = tokenPtr->start; + scanned = end - start; + tokenPtr->size = scanned; + parsePtr->numTokens++; + break; + } + } + if (code != TCL_OK) { + + /* + * Here we handle all the syntax errors generated by + * the Tcl_Token generating parsing routines called in the + * switch just above. If the value of parsePtr->incomplete + * is 1, then the error was an unbalanced '[', '(', '{', + * or '"' and parsePtr->term is pointing to that unbalanced + * character. If the value of parsePtr->incomplete is 0, + * then the error is one of lacking whitespace following a + * quoted word, for example: expr {[an error {foo}bar]}, + * and parsePtr->term points to where the whitespace is + * missing. We reset our values of start and scanned so that + * when our error message is constructed, the location of + * the syntax error is sure to appear in it, even if the + * quoted expression is truncated. + */ + + start = parsePtr->term; + scanned = parsePtr->incomplete; + goto error; + } + + tokenPtr = parsePtr->tokenPtr + wordIndex; + tokenPtr->size = scanned; + tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1; + if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) { + + /* + * When this expression is destined to be compiled, and a + * braced or quoted word within an expression is known at + * compile time (no runtime substitutions in it), we can + * store it as a literal rather than in its tokenized form. + * This is an advantage since the compiled bytecode is going + * to need the argument in Tcl_Obj form eventually, so it's + * just as well to get there now. Another advantage is that + * with this conversion, larger constant expressions might + * be grown and optimized. + * + * On the contrary, if the end goal of this parse is to + * fill a Tcl_Parse for a caller of Tcl_ParseExpr(), then it's + * wasteful to convert to a literal only to convert back again + * later. + */ + + literal = Tcl_NewObj(); + if (TclWordKnownAtCompileTime(tokenPtr, literal)) { + Tcl_ListObjAppendElement(NULL, litList, literal); + complete = lastParsed = OT_LITERAL; + parsePtr->numTokens = wordIndex; + break; + } + Tcl_DecrRefCount(literal); + } + complete = lastParsed = OT_TOKENS; + break; + } /* case LEAF */ + + case UNARY: + + /* + * A unary operator appearing just after something that's not an + * operator is a syntax error -- something trying to be the left + * operand of an operator that doesn't take one. + */ + + if (NotOperator(lastParsed)) { + msg = Tcl_ObjPrintf("missing operator at %s", mark); + scanned = 0; + insertMark = 1; + goto error; + } + + /* Create an OpNode for the unary operator */ + nodePtr->lexeme = lexeme; + nodePtr->precedence = prec[lexeme]; + nodePtr->mark = MARK_RIGHT; + + /* + * A FUNCTION cannot be a constant expression, because Tcl allows + * functions to return variable results with the same arguments; + * for example, rand(). Other unary operators can root a constant + * expression, so long as the argument is a constant expression. + */ + + nodePtr->constant = (lexeme != FUNCTION); + + /* + * This unary operator is a new incomplete tree, so push it + * onto our stack of incomplete trees. Also remember it as + * the last lexeme we parsed. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + break; + + case BINARY: { + OpNode *incompletePtr; + unsigned char precedence = prec[lexeme]; + + /* + * A binary operator appearing just after another operator is a + * syntax error -- one of the two operators is missing an operand. + */ + + if (IsOperator(lastParsed)) { + if ((lexeme == CLOSE_PAREN) + && (nodePtr[-1].lexeme == OPEN_PAREN)) { + if (nodePtr[-2].lexeme == FUNCTION) { + + /* + * Normally, "()" is a syntax error, but as a special + * case accept it as an argument list for a function. + * Treat this as a special LEAF lexeme, and restart + * the parsing loop with zero characters scanned. + * We'll parse the ")" again the next time through, + * but with the OT_EMPTY leaf as the subexpression + * between the parens. + */ + + scanned = 0; + complete = lastParsed = OT_EMPTY; + break; + } + msg = Tcl_ObjPrintf("empty subexpression at %s", mark); + scanned = 0; + insertMark = 1; + goto error; + } + + if (nodePtr[-1].precedence > precedence) { + if (nodePtr[-1].lexeme == OPEN_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + } else if (nodePtr[-1].lexeme == COMMA) { + msg = Tcl_ObjPrintf( + "missing function argument at %s", mark); + scanned = 0; + insertMark = 1; + } else if (nodePtr[-1].lexeme == START) { + TclNewLiteralStringObj(msg, "empty expression"); + } + } else { + if (lexeme == CLOSE_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + } else if ((lexeme == COMMA) + && (nodePtr[-1].lexeme == OPEN_PAREN) + && (nodePtr[-2].lexeme == FUNCTION)) { + msg = Tcl_ObjPrintf( + "missing function argument at %s", mark); + scanned = 0; + insertMark = 1; + } + } + if (msg == NULL) { + msg = Tcl_ObjPrintf("missing operand at %s", mark); + scanned = 0; + insertMark = 1; + } + goto error; + } + + /* + * Here is where the tree comes together. At this point, we + * have a stack of incomplete trees corresponding to + * substrings that are incomplete expressions, followed by + * a complete tree corresponding to a substring that is itself + * a complete expression, followed by the binary operator we have + * just parsed. The incomplete trees can each be completed by + * adding a right operand. + * + * To illustrate with an example, when we parse the expression + * "1+2*3-4" and we reach this point having just parsed the "-" + * operator, we have these incomplete trees: START, "1+", and + * "2*". Next we have the complete subexpression "3". Last is + * the "-" we've just parsed. + * + * The next step is to join our complete tree to an operator. + * The choice is governed by the precedence and associativity + * of the competing operators. If we connect it as the right + * operand of our most recent incomplete tree, we get a new + * complete tree, and we can repeat the process. The while + * loop following repeats this until precedence indicates it + * is time to join the complete tree as the left operand of + * the just parsed binary operator. + * + * Continuing the example, the first pass through the loop + * will join "3" to "2*"; the next pass will join "2*3" to + * "1+". Then we'll exit the loop and join "1+2*3" to "-". + * When we return to parse another lexeme, our stack of + * incomplete trees is START and "1+2*3-". + */ + + while (1) { + incompletePtr = nodes + incomplete; + + if (incompletePtr->precedence < precedence) { + break; + } + + if (incompletePtr->precedence == precedence) { + + /* Right association rules for exponentiation. */ + if (lexeme == EXPON) { + break; + } + + /* + * Special association rules for the conditional operators. + * The "?" and ":" operators have equal precedence, but + * must be linked up in sensible pairs. + */ + + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { + break; + } + if ((incompletePtr->lexeme == COLON) + && (lexeme == QUESTION)) { + break; + } + } + + /* Some special syntax checks... */ + + /* Parens must balance */ + if ((incompletePtr->lexeme == OPEN_PAREN) + && (lexeme != CLOSE_PAREN)) { + TclNewLiteralStringObj(msg, "unbalanced open paren"); + parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + goto error; + } + + /* Right operand of "?" must be ":" */ + if ((incompletePtr->lexeme == QUESTION) + && (NotOperator(complete) + || (nodes[complete].lexeme != COLON))) { + msg = Tcl_ObjPrintf( + "missing operator \":\" at %s", mark); + scanned = 0; + insertMark = 1; + goto error; + } + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) + && (nodes[complete].lexeme == COLON) + && (incompletePtr->lexeme != QUESTION)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" " + "without preceding \"?\""); + goto error; + } + + /* + * Attach complete tree as right operand of most recent + * incomplete tree. + */ + + incompletePtr->right = complete; + if (IsOperator(complete)) { + nodes[complete].p.parent = incomplete; + incompletePtr->constant = incompletePtr->constant + && nodes[complete].constant; + } else { + incompletePtr->constant = incompletePtr->constant + && (complete == OT_LITERAL); + } + + /* + * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations each + * make up a single operator. Force them to agree whether they + * have a constant expression. + */ + + if ((incompletePtr->lexeme == QUESTION) + || (incompletePtr->lexeme == FUNCTION)) { + nodes[complete].constant = incompletePtr->constant; + } + + if (incompletePtr->lexeme == START) { + + /* + * Completing the START tree indicates we're done. + * Transfer the parse tree to the caller and return. + */ + + *opTreePtr = nodes; + return TCL_OK; + } + + /* + * With a right operand attached, last incomplete tree has + * become the complete tree. Pop it from the incomplete + * tree stack. + */ + + complete = incomplete; + incomplete = incompletePtr->p.prev; + + /* CLOSE_PAREN can only close one OPEN_PAREN. */ + if (incompletePtr->lexeme == OPEN_PAREN) { + break; + } + } + + /* More syntax checks... */ + + /* Parens must balance. */ + if (lexeme == CLOSE_PAREN) { + if (incompletePtr->lexeme != OPEN_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + goto error; + } + } + + /* Commas must appear only in function argument lists. */ + if (lexeme == COMMA) { + if ((incompletePtr->lexeme != OPEN_PAREN) + || (incompletePtr[-1].lexeme != FUNCTION)) { + TclNewLiteralStringObj(msg, + "unexpected \",\" outside function argument list"); + goto error; + } + } + + /* Operator ":" may only be right operand of "?" */ + if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) { + TclNewLiteralStringObj(msg, + "unexpected operator \":\" without preceding \"?\""); + goto error; + } + + /* Create no node for a CLOSE_PAREN lexeme. */ + if (lexeme == CLOSE_PAREN) { + break; + } + + /* Link complete tree as left operand of new node. */ + nodePtr->lexeme = lexeme; + nodePtr->precedence = precedence; + nodePtr->mark = MARK_LEFT; + nodePtr->left = complete; + + /* + * The COMMA operator cannot be optimized, since the function + * needs all of its arguments, and optimization would reduce + * the number. Other binary operators root constant expressions + * when both arguments are constant expressions. + */ + + nodePtr->constant = (lexeme != COMMA); + + if (IsOperator(complete)) { + nodes[complete].p.parent = nodesUsed; + nodePtr->constant = nodePtr->constant + && nodes[complete].constant; + } else { + nodePtr->constant = nodePtr->constant + && (complete == OT_LITERAL); + } + + /* + * With a left operand attached and a right operand missing, + * the just-parsed binary operator is root of a new incomplete + * tree. Push it onto the stack of incomplete trees. + */ + + nodePtr->p.prev = incomplete; + incomplete = lastParsed = nodesUsed; + nodesUsed++; + break; + } /* case BINARY */ + } /* lexeme handler */ + + /* Advance past the just-parsed lexeme */ + start += scanned; + numBytes -= scanned; + } /* main parsing loop */ + + error: /* - * Parse the expression then compile it. + * We only get here if there's been an error. + * Any errors that didn't get a suitable parsePtr->errorType, + * get recorded as syntax errors. */ - code = Tcl_ParseExpr(interp, script, numBytes, &parse); - if (code != TCL_OK) { - goto done; + if (parsePtr->errorType == TCL_PARSE_SUCCESS) { + parsePtr->errorType = TCL_PARSE_SYNTAX; } -#ifdef TCL_TIP280 - /* TIP #280 : Track Lines within the expression */ - TclAdvanceLines (&envPtr->line, script, parse.tokenPtr->start); -#endif - - code = CompileSubExpr(parse.tokenPtr, &info, envPtr); - if (code != TCL_OK) { - Tcl_FreeParse(&parse); - goto done; + /* Free any partial parse tree we've built. */ + if (nodes != NULL) { + ckfree((char*) nodes); } - - if (!info.hasOperators) { + + if (interp == NULL) { + + /* Nowhere to report an error message, so just free it */ + if (msg) { + Tcl_DecrRefCount(msg); + } + } else { + /* - * Attempt to convert the primary's object to an int or double. - * This is done in order to support Tcl's policy of interpreting - * operands if at all possible as first integers, else - * floating-point numbers. + * Construct the complete error message. Start with the simple + * error message, pulled from the interp result if necessary... */ - - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + + if (msg == NULL) { + msg = Tcl_GetObjResult(interp); + } + + /* + * Add a detailed quote from the bad expression, displaying and + * sometimes marking the precise location of the syntax error. + */ + + Tcl_AppendPrintfToObj(msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", + ((start - limit) < parsePtr->string) ? "" : "...", + ((start - limit) < parsePtr->string) + ? (int) (start - parsePtr->string) : limit - 3, + ((start - limit) < parsePtr->string) + ? parsePtr->string : start - limit + 3, + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "...", insertMark ? mark : "", + (start + scanned + limit > parsePtr->end) + ? (int) (parsePtr->end - start) - scanned : limit-3, + start + scanned, + (start + scanned + limit > parsePtr->end) ? "" : "..."); + + /* Next, append any postscript message. */ + if (post != NULL) { + Tcl_AppendToObj(msg, ";\n", -1); + Tcl_AppendObjToObj(msg, post); + Tcl_DecrRefCount(post); + } + Tcl_SetObjResult(interp, msg); + + /* Finally, place context information in the errorInfo. */ + numBytes = parsePtr->end - parsePtr->string; + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (parsing expression \"%.*s%s\")", + (numBytes < limit) ? numBytes : limit - 3, + parsePtr->string, (numBytes < limit) ? "" : "...")); } - Tcl_FreeParse(&parse); - done: - return code; + return TCL_ERROR; } /* *---------------------------------------------------------------------- * - * TclFinalizeCompilation -- + * ConvertTreeToTokens -- * - * Clean up the compilation environment so it can later be - * properly reinitialized. This procedure is called by Tcl_Finalize(). + * Given a string, the numBytes bytes starting at start, and an OpNode + * tree and Tcl_Token array created by passing that same string to + * ParseExpr(), this function writes into *parsePtr the sequence of + * Tcl_Tokens needed so to satisfy the historical interface provided + * by Tcl_ParseExpr(). Note that this routine exists only for the sake + * of the public Tcl_ParseExpr() routine. It is not used by Tcl itself + * at all. * * Results: * None. * * Side effects: - * Cleans up the compilation environment. At the moment, just the - * table of expression operators is freed. + * The Tcl_Parse *parsePtr is filled with Tcl_Tokens representing the + * parsed expression. * *---------------------------------------------------------------------- */ -void -TclFinalizeCompilation() +static void +ConvertTreeToTokens( + const char *start, + int numBytes, + OpNode *nodes, + Tcl_Token *tokenPtr, + Tcl_Parse *parsePtr) { - Tcl_MutexLock(&opMutex); - if (opTableInitialized) { - Tcl_DeleteHashTable(&opHashTable); - opTableInitialized = 0; + int subExprTokenIdx = 0; + OpNode *nodePtr = nodes; + int next = nodePtr->right; + + while (1) { + Tcl_Token *subExprTokenPtr; + int scanned, parentIdx; + unsigned char lexeme; + + /* + * Advance the mark so the next exit from this node won't retrace + * steps over ground already covered. + */ + + nodePtr->mark++; + + /* Handle next child node or leaf */ + switch (next) { + case OT_EMPTY: + + /* No tokens and no characters for the OT_EMPTY leaf. */ + break; + + case OT_LITERAL: + + /* Skip any white space that comes before the literal */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* Reparse the literal to get pointers into source string */ + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->start = start; + subExprTokenPtr->size = scanned; + subExprTokenPtr->numComponents = 1; + subExprTokenPtr[1].type = TCL_TOKEN_TEXT; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + subExprTokenPtr[1].numComponents = 0; + + parsePtr->numTokens += 2; + start +=scanned; + numBytes -= scanned; + break; + + case OT_TOKENS: { + + /* + * tokenPtr points to a token sequence that came from parsing + * a Tcl word. A Tcl word is made up of a sequence of one or + * more elements. When the word is only a single element, it's + * been the historical practice to replace the TCL_TOKEN_WORD + * token directly with a TCL_TOKEN_SUB_EXPR token. However, + * when the word has multiple elements, a TCL_TOKEN_WORD token + * is kept as a grouping device so that TCL_TOKEN_SUB_EXPR + * always has only one element. Wise or not, these are the + * rules the Tcl expr parser has followed, and for the sake + * of those few callers of Tcl_ParseExpr() we do not change + * them now. Internally, we can do better. + */ + + int toCopy = tokenPtr->numComponents + 1; + + if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { + + /* + * Single element word. Copy tokens and convert the leading + * token to TCL_TOKEN_SUB_EXPR. + */ + + TclGrowParseTokenArray(parsePtr, toCopy); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + parsePtr->numTokens += toCopy; + } else { + + /* + * Multiple element word. Create a TCL_TOKEN_SUB_EXPR + * token to lead, with fields initialized from the leading + * token, then copy entire set of word tokens. + */ + + TclGrowParseTokenArray(parsePtr, toCopy+1); + subExprTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + *subExprTokenPtr = *tokenPtr; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr->numComponents++; + subExprTokenPtr++; + memcpy(subExprTokenPtr, tokenPtr, + (size_t) toCopy * sizeof(Tcl_Token)); + parsePtr->numTokens += toCopy + 1; + } + + scanned = tokenPtr->start + tokenPtr->size - start; + start +=scanned; + numBytes -= scanned; + tokenPtr += toCopy; + break; + } + + default: + + /* Advance to the child node, which is an operator. */ + nodePtr = nodes + next; + + /* Skip any white space that comes before the subexpression */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* Generate tokens for the operator / subexpression... */ + switch (nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + + /* + * Historical practice has been to have no Tcl_Tokens for + * these operators. + */ + + break; + + default: { + + /* + * Remember the index of the last subexpression we were + * working on -- that of our parent. We'll stack it later. + */ + + parentIdx = subExprTokenIdx; + + /* + * Verify space for the two leading Tcl_Tokens representing + * the subexpression rooted by this operator. The first + * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second + * of type TCL_TOKEN_OPERATOR. + */ + + TclGrowParseTokenArray(parsePtr, 2); + subExprTokenIdx = parsePtr->numTokens; + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + parsePtr->numTokens += 2; + subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; + subExprTokenPtr[1].type = TCL_TOKEN_OPERATOR; + + /* + * Our current position scanning the string is the starting + * point for this subexpression. + */ + + subExprTokenPtr->start = start; + + /* + * Eventually, we know that the numComponents field of the + * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means + * we can make other use of this field for now to track the + * stack of subexpressions we have pending. + */ + + subExprTokenPtr[1].numComponents = parentIdx; + break; + } + } + break; + } + + /* Determine which way to exit the node on this pass. */ + router: + switch (nodePtr->mark) { + case MARK_LEFT: + next = nodePtr->left; + break; + + case MARK_RIGHT: + next = nodePtr->right; + + /* Skip any white space that comes before the operator */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + + /* + * Here we scan from the string the operator corresponding to + * nodePtr->lexeme. + */ + + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + + switch(nodePtr->lexeme) { + case OPEN_PAREN: + case COMMA: + case COLON: + + /* No tokens for these lexemes -> nothing to do. */ + break; + + default: + + /* + * Record in the TCL_TOKEN_OPERATOR token the pointers into + * the string marking where the operator is. + */ + + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr[1].start = start; + subExprTokenPtr[1].size = scanned; + break; + } + + start +=scanned; + numBytes -= scanned; + break; + + case MARK_PARENT: + switch (nodePtr->lexeme) { + case START: + + /* When we get back to the START node, we're done. */ + return; + + case COMMA: + case COLON: + + /* No tokens for these lexemes -> nothing to do. */ + break; + + case OPEN_PAREN: + + /* Skip past matching close paren. */ + scanned = TclParseAllWhiteSpace(start, numBytes); + start +=scanned; + numBytes -= scanned; + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); + start +=scanned; + numBytes -= scanned; + break; + + default: { + + /* + * Before we leave this node/operator/subexpression for the + * last time, finish up its tokens.... + * + * Our current position scanning the string is where the + * substring for the subexpression ends. + */ + + subExprTokenPtr = parsePtr->tokenPtr + subExprTokenIdx; + subExprTokenPtr->size = start - subExprTokenPtr->start; + + /* + * All the Tcl_Tokens allocated and filled belong to + * this subexpresion. The first token is the leading + * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) + * are its components. + */ + + subExprTokenPtr->numComponents = + (parsePtr->numTokens - subExprTokenIdx) - 1; + + /* + * Finally, as we return up the tree to our parent, pop the + * parent subexpression off our subexpression stack, and + * fill in the zero numComponents for the operator Tcl_Token. + */ + + parentIdx = subExprTokenPtr[1].numComponents; + subExprTokenPtr[1].numComponents = 0; + subExprTokenIdx = parentIdx; + break; + } + } + + /* Since we're returning to parent, skip child handling code. */ + nodePtr = nodes + nodePtr->p.parent; + goto router; + } } - Tcl_MutexUnlock(&opMutex); } /* *---------------------------------------------------------------------- * - * CompileSubExpr -- + * Tcl_ParseExpr -- * - * Given a pointer to a TCL_TOKEN_SUB_EXPR token describing a - * subexpression, this procedure emits instructions to evaluate the - * subexpression at runtime. + * Given a string, the numBytes bytes starting at start, this function + * parses it as a Tcl expression and stores information about the + * structure of the expression in the Tcl_Parse struct indicated by the + * caller. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * If the string is successfully parsed as a valid Tcl expression, TCL_OK + * is returned, and data about the expression structure is written to + * *parsePtr. If the string cannot be parsed as a valid Tcl expression, + * TCL_ERROR is returned, and if interp is non-NULL, an error message is + * written to interp. * * Side effects: - * Adds instructions to envPtr to evaluate the subexpression. + * If there is insufficient space in parsePtr to hold all the information + * about the expression, then additional space is malloc-ed. If the + * function returns TCL_OK then the caller must eventually invoke + * Tcl_FreeParse to release any additional space that was allocated. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_ParseExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *start, /* Start of source string to parse. */ + int numBytes, /* Number of bytes in string. If < 0, the + * string consists of all bytes up to the + * first null character. */ + Tcl_Parse *parsePtr) /* Structure to fill with information about + * the parsed expression; any previous + * information in the structure is ignored. */ +{ + int code; + OpNode *opTree = NULL; /* Will point to the tree of operators */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ + Tcl_Parse *exprParsePtr = + (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions */ + + if (numBytes < 0) { + numBytes = (start ? strlen(start) : 0); + } + + code = ParseExpr(interp, start, numBytes, &opTree, litList, + funcList, exprParsePtr, 1 /* parseOnly */); + Tcl_DecrRefCount(funcList); + Tcl_DecrRefCount(litList); + + TclParseInit(interp, start, numBytes, parsePtr); + if (code == TCL_OK) { + ConvertTreeToTokens(start, numBytes, + opTree, exprParsePtr->tokenPtr, parsePtr); + } else { + parsePtr->term = exprParsePtr->term; + parsePtr->errorType = exprParsePtr->errorType; + } + + Tcl_FreeParse(exprParsePtr); + TclStackFree(interp, exprParsePtr); + ckfree((char *) opTree); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * ParseLexeme -- + * + * Parse a single lexeme from the start of a string, scanning no more + * than numBytes bytes. + * + * Results: + * Returns the number of bytes scanned to produce the lexeme. + * + * Side effects: + * Code identifying lexeme parsed is writen to *lexemePtr. * *---------------------------------------------------------------------- */ static int -CompileSubExpr(exprTokenPtr, infoPtr, envPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * to compile. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ +ParseLexeme( + const char *start, /* Start of lexeme to parse. */ + int numBytes, /* Number of bytes in string. */ + unsigned char *lexemePtr, /* Write code of parsed lexeme to this + * storage. */ + Tcl_Obj **literalPtr) /* Write corresponding literal value to this + storage, if non-NULL. */ { - Tcl_Interp *interp = infoPtr->interp; - Tcl_Token *tokenPtr, *endPtr = NULL; /* silence gcc 4 warning */ - Tcl_Token *afterSubexprPtr; - CONST OperatorDesc *opDescPtr; - Tcl_HashEntry *hPtr; - CONST char *operator; - Tcl_DString opBuf; - int objIndex, opIndex, length, code; - char buffer[TCL_UTF_MAX]; - - if (exprTokenPtr->type != TCL_TOKEN_SUB_EXPR) { - panic("CompileSubExpr: token type %d not TCL_TOKEN_SUB_EXPR\n", - exprTokenPtr->type); + const char *end; + int scanned; + Tcl_UniChar ch; + Tcl_Obj *literal = NULL; + unsigned char byte; + + if (numBytes == 0) { + *lexemePtr = END; + return 0; } - code = TCL_OK; + byte = (unsigned char)(*start); + if (byte < sizeof(Lexeme) && Lexeme[byte] != 0) { + *lexemePtr = Lexeme[byte]; + return 1; + } + switch (byte) { + case '*': + if ((numBytes > 1) && (start[1] == '*')) { + *lexemePtr = EXPON; + return 2; + } + *lexemePtr = MULT; + return 1; - /* - * Switch on the type of the first token after the subexpression token. - * After processing it, advance tokenPtr to point just after the - * subexpression's last token. - */ - - tokenPtr = exprTokenPtr+1; - TRACE(exprTokenPtr->start, exprTokenPtr->size, - tokenPtr->start, tokenPtr->size); - switch (tokenPtr->type) { - case TCL_TOKEN_WORD: - code = TclCompileTokens(interp, tokenPtr+1, - tokenPtr->numComponents, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - break; - - case TCL_TOKEN_TEXT: - if (tokenPtr->size > 0) { - objIndex = TclRegisterNewLiteral(envPtr, tokenPtr->start, - tokenPtr->size); - } else { - objIndex = TclRegisterNewLiteral(envPtr, "", 0); - } - TclEmitPush(objIndex, envPtr); - tokenPtr += 1; - break; - - case TCL_TOKEN_BS: - length = TclParseBackslash(tokenPtr->start, tokenPtr->size, - (int *) NULL, buffer); - if (length > 0) { - objIndex = TclRegisterNewLiteral(envPtr, buffer, length); - } else { - objIndex = TclRegisterNewLiteral(envPtr, "", 0); - } - TclEmitPush(objIndex, envPtr); - tokenPtr += 1; - break; - - case TCL_TOKEN_COMMAND: - code = TclCompileScript(interp, tokenPtr->start+1, - tokenPtr->size-2, /*nested*/ 0, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += 1; - break; - - case TCL_TOKEN_VARIABLE: - code = TclCompileTokens(interp, tokenPtr, 1, envPtr); - if (code != TCL_OK) { - goto done; + case '=': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = EQUAL; + return 2; + } + *lexemePtr = INCOMPLETE; + return 1; + + case '!': + if ((numBytes > 1) && (start[1] == '=')) { + *lexemePtr = NEQ; + return 2; + } + *lexemePtr = NOT; + return 1; + + case '&': + if ((numBytes > 1) && (start[1] == '&')) { + *lexemePtr = AND; + return 2; + } + *lexemePtr = BIT_AND; + return 1; + + case '|': + if ((numBytes > 1) && (start[1] == '|')) { + *lexemePtr = OR; + return 2; + } + *lexemePtr = BIT_OR; + return 1; + + case '<': + if (numBytes > 1) { + switch (start[1]) { + case '<': + *lexemePtr = LEFT_SHIFT; + return 2; + case '=': + *lexemePtr = LEQ; + return 2; } - tokenPtr += (tokenPtr->numComponents + 1); - break; - - case TCL_TOKEN_SUB_EXPR: - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + } + *lexemePtr = LESS; + return 1; + + case '>': + if (numBytes > 1) { + switch (start[1]) { + case '>': + *lexemePtr = RIGHT_SHIFT; + return 2; + case '=': + *lexemePtr = GEQ; + return 2; } - tokenPtr += (tokenPtr->numComponents + 1); - break; - - case TCL_TOKEN_OPERATOR: + } + *lexemePtr = GREATER; + return 1; + + case 'i': + if ((numBytes > 1) && (start[1] == 'n') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + /* - * Look up the operator. If the operator isn't found, treat it - * as a math function. + * Must make this check so we can tell the difference between + * the "in" operator and the "int" function name and the + * "infinity" numeric value. */ - Tcl_DStringInit(&opBuf); - operator = Tcl_DStringAppend(&opBuf, - tokenPtr->start, tokenPtr->size); - hPtr = Tcl_FindHashEntry(&opHashTable, operator); - if (hPtr == NULL) { - code = CompileMathFuncCall(exprTokenPtr, operator, infoPtr, - envPtr, &endPtr); - Tcl_DStringFree(&opBuf); - if (code != TCL_OK) { - goto done; - } - tokenPtr = endPtr; - break; + + *lexemePtr = IN_LIST; + return 2; + } + break; + + case 'e': + if ((numBytes > 1) && (start[1] == 'q') + && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + *lexemePtr = STREQ; + return 2; + } + break; + + case 'n': + if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + switch (start[1]) { + case 'e': + *lexemePtr = STRNEQ; + return 2; + case 'i': + *lexemePtr = NOT_IN_LIST; + return 2; + } + } + } + + literal = Tcl_NewObj(); + if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end, + TCL_PARSE_NO_WHITESPACE) == TCL_OK) { + if (end < start + numBytes && !isalnum(UCHAR(*end)) + && UCHAR(*end) != '_') { + + number: + TclInitStringRep(literal, start, end-start); + *lexemePtr = NUMBER; + if (literalPtr) { + *literalPtr = literal; + } else { + Tcl_DecrRefCount(literal); } - Tcl_DStringFree(&opBuf); - opIndex = (int) Tcl_GetHashValue(hPtr); - opDescPtr = &(operatorTable[opIndex]); + return (end-start); + } else { + unsigned char lexeme; /* - * If the operator is "normal", compile it using information - * from the operator table. + * We have a number followed directly by bareword characters + * (alpha, digit, underscore). Is this a number followed by + * bareword syntax error? Or should we join into one bareword? + * Example: Inf + luence + () becomes a valid function call. + * [Bug 3401704] */ - - if (opDescPtr->numOperands > 0) { - tokenPtr++; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - - if (opDescPtr->numOperands == 2) { - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + if (literal->typePtr == &tclDoubleType) { + const char *p = start; + while (p < end) { + if (!isalnum(UCHAR(*p++))) { + /* + * The number has non-bareword characters, so we + * must treat it as a number. + */ + goto number; } - tokenPtr += (tokenPtr->numComponents + 1); } - TclEmitOpcode(opDescPtr->instruction, envPtr); - infoPtr->hasOperators = 1; - break; } - + ParseLexeme(end, numBytes-(end-start), &lexeme, NULL); + if ((NODE_TYPE & lexeme) == BINARY) { + /* + * The bareword characters following the number take the + * form of an operator (eq, ne, in, ni, ...) so we treat + * as number + operator. + */ + goto number; + } /* - * The operator requires special treatment, and is either - * "+" or "-", or one of "&&", "||" or "?". + * Otherwise, fall through and parse the whole as a bareword. */ - - switch (opIndex) { - case OP_PLUS: - case OP_MINUS: - tokenPtr++; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Check whether the "+" or "-" is unary. - */ - - afterSubexprPtr = exprTokenPtr - + exprTokenPtr->numComponents+1; - if (tokenPtr == afterSubexprPtr) { - TclEmitOpcode(((opIndex==OP_PLUS)? - INST_UPLUS : INST_UMINUS), - envPtr); - break; - } - - /* - * The "+" or "-" is binary. - */ - - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - TclEmitOpcode(((opIndex==OP_PLUS)? INST_ADD : INST_SUB), - envPtr); - break; - - case OP_LAND: - case OP_LOR: - code = CompileLandOrLorExpr(exprTokenPtr, opIndex, - infoPtr, envPtr, &endPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr = endPtr; - break; - - case OP_QUESTY: - code = CompileCondExpr(exprTokenPtr, infoPtr, - envPtr, &endPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr = endPtr; - break; - - default: - panic("CompileSubExpr: unexpected operator %d requiring special treatment\n", - opIndex); - } /* end switch on operator requiring special treatment */ - infoPtr->hasOperators = 1; - break; - - default: - panic("CompileSubExpr: unexpected token type %d\n", - tokenPtr->type); + } } - /* - * Verify that the subexpression token had the required number of - * subtokens: that we've advanced tokenPtr just beyond the - * subexpression's last token. For example, a "*" subexpression must - * contain the tokens for exactly two operands. - */ - - if (tokenPtr != (exprTokenPtr + exprTokenPtr->numComponents+1)) { - LogSyntaxError(infoPtr); - code = TCL_ERROR; + if (Tcl_UtfCharComplete(start, numBytes)) { + scanned = Tcl_UtfToUniChar(start, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, start, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); } - - done: - return code; + if (!isalnum(UCHAR(ch))) { + *lexemePtr = INVALID; + Tcl_DecrRefCount(literal); + return scanned; + } + end = start; + while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { + end += scanned; + numBytes -= scanned; + if (Tcl_UtfCharComplete(end, numBytes)) { + scanned = Tcl_UtfToUniChar(end, &ch); + } else { + char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, end, (size_t) numBytes); + utfBytes[numBytes] = '\0'; + scanned = Tcl_UtfToUniChar(utfBytes, &ch); + } + } + *lexemePtr = BAREWORD; + if (literalPtr) { + Tcl_SetStringObj(literal, start, (int) (end-start)); + *literalPtr = literal; + } else { + Tcl_DecrRefCount(literal); + } + return (end-start); } /* *---------------------------------------------------------------------- * - * CompileLandOrLorExpr -- + * TclCompileExpr -- * - * This procedure compiles a Tcl logical and ("&&") or logical or - * ("||") subexpression. + * This procedure compiles a string containing a Tcl expression into Tcl + * bytecodes. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. @@ -580,387 +2022,680 @@ CompileSubExpr(exprTokenPtr, infoPtr, envPtr) *---------------------------------------------------------------------- */ -static int -CompileLandOrLorExpr(exprTokenPtr, opIndex, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "&&" or "||" operator. */ - int opIndex; /* A code describing the expression - * operator: either OP_LAND or OP_LOR. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +void +TclCompileExpr( + Tcl_Interp *interp, /* Used for error reporting. */ + const char *script, /* The source script to compile. */ + int numBytes, /* Number of bytes in script. */ + CompileEnv *envPtr, /* Holds resulting instructions. */ + int optimize) /* 0 for one-off expressions */ { - JumpFixup shortCircuitFixup; /* Used to fix up the short circuit jump - * after the first subexpression. */ - JumpFixup lhsTrueFixup, lhsEndFixup; - /* Used to fix up jumps used to convert the - * first operand to 0 or 1. */ - Tcl_Token *tokenPtr; - int dist, code; - int savedStackDepth = envPtr->currStackDepth; + OpNode *opTree = NULL; /* Will point to the tree of operators */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ + Tcl_Parse *parsePtr = + (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions */ + + int code = ParseExpr(interp, script, numBytes, &opTree, litList, + funcList, parsePtr, 0 /* parseOnly */); + + if (code == TCL_OK) { + + /* Valid parse; compile the tree. */ + int objc; + Tcl_Obj *const *litObjv; + Tcl_Obj **funcObjv; + + /* TIP #280 : Track Lines within the expression */ + TclAdvanceLines(&envPtr->line, script, + script + TclParseAllWhiteSpace(script, numBytes)); + + TclListObjGetElements(NULL, litList, &objc, (Tcl_Obj ***)&litObjv); + TclListObjGetElements(NULL, funcList, &objc, &funcObjv); + CompileExprTree(interp, opTree, 0, &litObjv, funcObjv, + parsePtr->tokenPtr, envPtr, optimize); + } else { + TclCompileSyntaxError(interp, envPtr); + } - /* - * Emit code for the first operand. - */ + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); + Tcl_DecrRefCount(funcList); + Tcl_DecrRefCount(litList); + ckfree((char *) opTree); +} + +/* + *---------------------------------------------------------------------- + * + * ExecConstantExprTree -- + * Compiles and executes bytecode for the subexpression tree at index + * in the nodes array. This subexpression must be constant, made up + * of only constant operators (not functions) and literals. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. + * + *---------------------------------------------------------------------- + */ - tokenPtr = exprTokenPtr+2; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); +static int +ExecConstantExprTree( + Tcl_Interp *interp, + OpNode *nodes, + int index, + Tcl_Obj *const **litObjvPtr) +{ + CompileEnv *envPtr; + ByteCode *byteCodePtr; + int code; + Tcl_Obj *byteCodeObj = Tcl_NewObj(); /* - * Convert the first operand to the result that Tcl requires: - * "0" or "1". Eventually we'll use a new instruction for this. + * Note we are compiling an expression with literal arguments. This means + * there can be no [info frame] calls when we execute the resulting + * bytecode, so there's no need to tend to TIP 280 issues. */ - - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &lhsTrueFixup); - TclEmitPush(TclRegisterNewLiteral(envPtr, "0", 1), envPtr); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &lhsEndFixup); - dist = (envPtr->codeNext - envPtr->codeStart) - lhsTrueFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &lhsTrueFixup, dist, 127)) { - badDist: - panic("CompileLandOrLorExpr: bad jump distance %d\n", dist); - } - envPtr->currStackDepth = savedStackDepth; - TclEmitPush(TclRegisterNewLiteral(envPtr, "1", 1), envPtr); - dist = (envPtr->codeNext - envPtr->codeStart) - lhsEndFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &lhsEndFixup, dist, 127)) { - goto badDist; - } - /* - * Emit the "short circuit" jump around the rest of the expression. - * Duplicate the "0" or "1" on top of the stack first to keep the - * jump from consuming it. - */ + envPtr = (CompileEnv *) TclStackAlloc(interp, sizeof(CompileEnv)); + TclInitCompileEnv(interp, envPtr, NULL, 0, NULL, 0); + CompileExprTree(interp, nodes, index, litObjvPtr, NULL, NULL, envPtr, + 0 /* optimize */); + TclEmitOpcode(INST_DONE, envPtr); + Tcl_IncrRefCount(byteCodeObj); + TclInitByteCodeObj(byteCodeObj, envPtr); + TclFreeCompileEnv(envPtr); + TclStackFree(interp, envPtr); + byteCodePtr = (ByteCode *) byteCodeObj->internalRep.otherValuePtr; + code = TclExecuteByteCode(interp, byteCodePtr); + Tcl_DecrRefCount(byteCodeObj); + return code; +} + +/* + *---------------------------------------------------------------------- + * + * CompileExprTree -- + * Compiles and writes to envPtr instructions for the subexpression + * tree at index in the nodes array. (*litObjvPtr) must point to the + * proper location in a corresponding literals list. Likewise, when + * non-NULL, funcObjv and tokenPtr must point into matching arrays of + * function names and Tcl_Token's derived from earlier call to + * ParseExpr(). When optimize is true, any constant subexpressions + * will be precomputed. + * + * Results: + * None. + * + * Side effects: + * Adds instructions to envPtr to evaluate the expression at runtime. + * Consumes subtree of nodes rooted at index. Advances the pointer + * *litObjvPtr. + * + *---------------------------------------------------------------------- + */ - TclEmitOpcode(INST_DUP, envPtr); - TclEmitForwardJump(envPtr, - ((opIndex==OP_LAND)? TCL_FALSE_JUMP : TCL_TRUE_JUMP), - &shortCircuitFixup); +static void +CompileExprTree( + Tcl_Interp *interp, + OpNode *nodes, + int index, + Tcl_Obj *const **litObjvPtr, + Tcl_Obj *const *funcObjv, + Tcl_Token *tokenPtr, + CompileEnv *envPtr, + int optimize) +{ + OpNode *nodePtr = nodes + index; + OpNode *rootPtr = nodePtr; + int numWords = 0; + JumpList *jumpPtr = NULL; + int convert = 1; + + while (1) { + int next; + JumpList *freePtr, *newJump; + + if (nodePtr->mark == MARK_LEFT) { + next = nodePtr->left; + + switch (nodePtr->lexeme) { + case QUESTION: + newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + jumpPtr->depth = envPtr->currStackDepth; + convert = 1; + break; + case AND: + case OR: + newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + jumpPtr->depth = envPtr->currStackDepth; + break; + } + } else if (nodePtr->mark == MARK_RIGHT) { + next = nodePtr->right; + + switch (nodePtr->lexeme) { + case FUNCTION: { + Tcl_DString cmdName; + const char *p; + int length; + + Tcl_DStringInit(&cmdName); + Tcl_DStringAppend(&cmdName, "tcl::mathfunc::", -1); + p = TclGetStringFromObj(*funcObjv, &length); + funcObjv++; + Tcl_DStringAppend(&cmdName, p, length); + TclEmitPush(TclRegisterNewNSLiteral(envPtr, + Tcl_DStringValue(&cmdName), + Tcl_DStringLength(&cmdName)), envPtr); + Tcl_DStringFree(&cmdName); + + /* + * Start a count of the number of words in this function + * command invocation. In case there's already a count + * in progress (nested functions), save it in our unused + * "left" field for restoring later. + */ + + nodePtr->left = numWords; + numWords = 2; /* Command plus one argument */ + break; + } + case QUESTION: + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); + break; + case COLON: + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, + &(jumpPtr->next->jump)); + envPtr->currStackDepth = jumpPtr->depth; + jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart); + jumpPtr->convert = convert; + convert = 1; + break; + case AND: + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); + break; + case OR: + TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &(jumpPtr->jump)); + break; + } + } else { + switch (nodePtr->lexeme) { + case START: + case QUESTION: + if (convert && (nodePtr == rootPtr)) { + TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); + } + break; + case OPEN_PAREN: - /* - * Emit code for the second operand. - */ + /* do nothing */ + break; + case FUNCTION: - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); + /* + * Use the numWords count we've kept to invoke the + * function command with the correct number of arguments. + */ - /* - * Emit a "logical and" or "logical or" instruction. This does not try - * to "short- circuit" the evaluation of both operands, but instead - * ensures that we either have a "1" or a "0" result. - */ + if (numWords < 255) { + TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr); + } else { + TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr); + } - TclEmitOpcode(((opIndex==OP_LAND)? INST_LAND : INST_LOR), envPtr); + /* Restore any saved numWords value. */ + numWords = nodePtr->left; + convert = 1; + break; + case COMMA: - /* - * Now that we know the target of the forward jump, update it with the - * correct distance. - */ + /* Each comma implies another function argument. */ + numWords++; + break; + case COLON: + if (TclFixupForwardJump(envPtr, &(jumpPtr->next->jump), + (envPtr->codeNext - envPtr->codeStart) + - jumpPtr->next->jump.codeOffset, 127)) { + jumpPtr->offset += 3; + } + TclFixupForwardJump(envPtr, &(jumpPtr->jump), + jumpPtr->offset - jumpPtr->jump.codeOffset, 127); + convert |= jumpPtr->convert; + envPtr->currStackDepth = jumpPtr->depth + 1; + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + case AND: + case OR: + TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) + ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, + &(jumpPtr->next->jump)); + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, + &(jumpPtr->next->next->jump)); + TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->jump), 127); + if (TclFixupForwardJumpToHere(envPtr, &(jumpPtr->jump), 127)) { + jumpPtr->next->next->jump.codeOffset += 3; + } + TclEmitPush(TclRegisterNewLiteral(envPtr, + (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); + TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->next->jump), + 127); + convert = 0; + envPtr->currStackDepth = jumpPtr->depth + 1; + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + freePtr = jumpPtr; + jumpPtr = jumpPtr->next; + TclStackFree(interp, freePtr); + break; + default: + TclEmitOpcode(instruction[nodePtr->lexeme], envPtr); + convert = 0; + break; + } + if (nodePtr == rootPtr) { - dist = (envPtr->codeNext - envPtr->codeStart) - - shortCircuitFixup.codeOffset; - TclFixupForwardJump(envPtr, &shortCircuitFixup, dist, 127); - *endPtrPtr = tokenPtr; + /* We're done */ + return; + } + nodePtr = nodes + nodePtr->p.parent; + continue; + } - done: - envPtr->currStackDepth = savedStackDepth + 1; - return code; + nodePtr->mark++; + switch (next) { + case OT_EMPTY: + numWords = 1; /* No arguments, so just the command */ + break; + case OT_LITERAL: { + Tcl_Obj *const *litObjv = *litObjvPtr; + Tcl_Obj *literal = *litObjv; + + if (optimize) { + int length, index; + const char *bytes = TclGetStringFromObj(literal, &length); + LiteralEntry *lePtr; + Tcl_Obj *objPtr; + + index = TclRegisterNewLiteral(envPtr, bytes, length); + lePtr = envPtr->literalArrayPtr + index; + objPtr = lePtr->objPtr; + if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { + /* + * Would like to do this: + * + * lePtr->objPtr = literal; + * Tcl_IncrRefCount(literal); + * Tcl_DecrRefCount(objPtr); + * + * However, the design of the "global" and "local" + * LiteralTable does not permit the value of lePtr->objPtr + * to change. So rather than replace lePtr->objPtr, we + * do surgery to transfer our desired intrep into it. + * + */ + objPtr->typePtr = literal->typePtr; + objPtr->internalRep = literal->internalRep; + literal->typePtr = NULL; + } + TclEmitPush(index, envPtr); + } else { + /* + * When optimize==0, we know the expression is a one-off + * and there's nothing to be gained from sharing literals + * when they won't live long, and the copies we have already + * have an appropriate intrep. In this case, skip literal + * registration that would enable sharing, and use the routine + * that preserves intreps. + */ + TclEmitPush(TclAddLiteralObj(envPtr, literal, NULL), envPtr); + } + (*litObjvPtr)++; + break; + } + case OT_TOKENS: + TclCompileTokens(interp, tokenPtr+1, tokenPtr->numComponents, + envPtr); + tokenPtr += tokenPtr->numComponents + 1; + break; + default: + if (optimize && nodes[next].constant) { + Tcl_InterpState save = Tcl_SaveInterpState(interp, TCL_OK); + if (ExecConstantExprTree(interp, nodes, next, litObjvPtr) + == TCL_OK) { + TclEmitPush(TclAddLiteralObj(envPtr, + Tcl_GetObjResult(interp), NULL), envPtr); + } else { + TclCompileSyntaxError(interp, envPtr); + } + Tcl_RestoreInterpState(interp, save); + convert = 0; + } else { + nodePtr = nodes + next; + } + } + } } /* *---------------------------------------------------------------------- * - * CompileCondExpr -- - * - * This procedure compiles a Tcl conditional expression: - * condExpr ::= lorExpr ['?' condExpr ':' condExpr] + * TclSingleOpCmd -- + * Implements the commands: ~, !, <<, >>, %, !=, ne, in, ni + * in the ::tcl::mathop namespace. These commands have no + * extension to arbitrary arguments; they accept only exactly one + * or exactly two arguments as suitable for the operator. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * A standard Tcl return code and result left in interp. * * Side effects: - * Adds instructions to envPtr to evaluate the expression at runtime. + * None. * *---------------------------------------------------------------------- */ -static int -CompileCondExpr(exprTokenPtr, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the "?" operator. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +int +TclSingleOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - JumpFixup jumpAroundThenFixup, jumpAroundElseFixup; - /* Used to update or replace one-byte jumps - * around the then and else expressions when - * their target PCs are determined. */ - Tcl_Token *tokenPtr; - int elseCodeOffset, dist, code; - int savedStackDepth = envPtr->currStackDepth; - - /* - * Emit code for the test. - */ + TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + unsigned char lexeme; + OpNode nodes[2]; + Tcl_Obj *const *litObjv = objv + 1; + + if (objc != 1+occdPtr->i.numArgs) { + Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); + return TCL_ERROR; + } - tokenPtr = exprTokenPtr+2; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + if (objc == 2) { + nodes[1].mark = MARK_RIGHT; + } else { + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; } - tokenPtr += (tokenPtr->numComponents + 1); - - /* - * Emit the jump to the "else" expression if the test was false. - */ - - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpAroundThenFixup); + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; - /* - * Compile the "then" expression. Note that if a subexpression is only - * a primary, we need to try to convert it to numeric. We do this to - * support Tcl's policy of interpreting operands if at all possible as - * first integers, else floating-point numbers. - */ + return ExecConstantExprTree(interp, nodes, 0, &litObjv); +} + +/* + *---------------------------------------------------------------------- + * + * TclSortingOpCmd -- + * Implements the commands: <, <=, >, >=, ==, eq + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by computing the AND of the base + * operator applied to all neighbor argument pairs. + * + * Results: + * A standard Tcl return code and result left in interp. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ - infoPtr->hasOperators = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - if (!infoPtr->hasOperators) { - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } +int +TclSortingOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + int code = TCL_OK; - /* - * Emit an unconditional jump around the "else" condExpr. - */ - - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &jumpAroundElseFixup); + if (objc < 3) { + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); + } else { + TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + Tcl_Obj **litObjv = (Tcl_Obj **) TclStackAlloc(interp, + 2*(objc-2)*sizeof(Tcl_Obj *)); + OpNode *nodes = (OpNode *) TclStackAlloc(interp, + 2*(objc-2)*sizeof(OpNode)); + unsigned char lexeme; + int i, lastAnd = 1; + Tcl_Obj *const *litObjPtrPtr = litObjv; + + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + + litObjv[0] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + for (i=2; i<objc-1; i++) { + litObjv[2*(i-1)-1] = objv[i]; + nodes[2*(i-1)-1].lexeme = lexeme; + nodes[2*(i-1)-1].mark = MARK_LEFT; + nodes[2*(i-1)-1].left = OT_LITERAL; + nodes[2*(i-1)-1].right = OT_LITERAL; + + litObjv[2*(i-1)] = objv[i]; + nodes[2*(i-1)].lexeme = AND; + nodes[2*(i-1)].mark = MARK_LEFT; + nodes[2*(i-1)].left = lastAnd; + nodes[lastAnd].p.parent = 2*(i-1); + + nodes[2*(i-1)].right = 2*(i-1)+1; + nodes[2*(i-1)+1].p.parent= 2*(i-1); + + lastAnd = 2*(i-1); + } + litObjv[2*(objc-2)-1] = objv[objc-1]; - /* - * Compile the "else" expression. - */ + nodes[2*(objc-2)-1].lexeme = lexeme; + nodes[2*(objc-2)-1].mark = MARK_LEFT; + nodes[2*(objc-2)-1].left = OT_LITERAL; + nodes[2*(objc-2)-1].right = OT_LITERAL; - envPtr->currStackDepth = savedStackDepth; - elseCodeOffset = (envPtr->codeNext - envPtr->codeStart); - infoPtr->hasOperators = 0; - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; - } - tokenPtr += (tokenPtr->numComponents + 1); - if (!infoPtr->hasOperators) { - TclEmitOpcode(INST_TRY_CVT_TO_NUMERIC, envPtr); - } + nodes[0].right = lastAnd; + nodes[lastAnd].p.parent = 0; - /* - * Fix up the second jump around the "else" expression. - */ + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); - dist = (envPtr->codeNext - envPtr->codeStart) - - jumpAroundElseFixup.codeOffset; - if (TclFixupForwardJump(envPtr, &jumpAroundElseFixup, dist, 127)) { - /* - * Update the else expression's starting code offset since it - * moved down 3 bytes too. - */ - - elseCodeOffset += 3; + TclStackFree(interp, nodes); + TclStackFree(interp, litObjv); } - - /* - * Fix up the first jump to the "else" expression if the test was false. - */ - - dist = (elseCodeOffset - jumpAroundThenFixup.codeOffset); - TclFixupForwardJump(envPtr, &jumpAroundThenFixup, dist, 127); - *endPtrPtr = tokenPtr; - - done: - envPtr->currStackDepth = savedStackDepth + 1; return code; } /* *---------------------------------------------------------------------- * - * CompileMathFuncCall -- - * - * This procedure compiles a call on a math function in an expression: - * mathFuncCall ::= funcName '(' [condExpr {',' condExpr}] ')' + * TclVariadicOpCmd -- + * Implements the commands: +, *, &, |, ^, ** + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary number of arguments by repeatedly applying the base + * operator with suitable associative rules. When fewer than two + * arguments are provided, suitable identity values are returned. * * Results: - * The return value is TCL_OK on a successful compilation and TCL_ERROR - * on failure. If TCL_OK is returned, a pointer to the token just after - * the last one in the subexpression is stored at the address in - * endPtrPtr. If TCL_ERROR is returned, then the interpreter's result - * contains an error message. + * A standard Tcl return code and result left in interp. * * Side effects: - * Adds instructions to envPtr to evaluate the math function at - * runtime. + * None. * *---------------------------------------------------------------------- */ -static int -CompileMathFuncCall(exprTokenPtr, funcName, infoPtr, envPtr, endPtrPtr) - Tcl_Token *exprTokenPtr; /* Points to TCL_TOKEN_SUB_EXPR token - * containing the math function call. */ - CONST char *funcName; /* Name of the math function. */ - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ - CompileEnv *envPtr; /* Holds resulting instructions. */ - Tcl_Token **endPtrPtr; /* If successful, a pointer to the token - * just after the last token in the - * subexpression is stored here. */ +int +TclVariadicOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - Tcl_Interp *interp = infoPtr->interp; - Interp *iPtr = (Interp *) interp; - MathFunc *mathFuncPtr; - Tcl_HashEntry *hPtr; - Tcl_Token *tokenPtr, *afterSubexprPtr; - int code, i; - - /* - * Look up the MathFunc record for the function. - */ + TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + unsigned char lexeme; + int code; - code = TCL_OK; - hPtr = Tcl_FindHashEntry(&iPtr->mathFuncTable, funcName); - if (hPtr == NULL) { - Tcl_ResetResult(interp); - Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), - "unknown math function \"", funcName, "\"", (char *) NULL); - code = TCL_ERROR; - goto done; + if (objc < 2) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(occdPtr->i.identity)); + return TCL_OK; } - mathFuncPtr = (MathFunc *) Tcl_GetHashValue(hPtr); - /* - * If not a builtin function, push an object with the function's name. - */ - - if (mathFuncPtr->builtinFuncIndex < 0) { - TclEmitPush(TclRegisterNewLiteral(envPtr, funcName, -1), envPtr); - } + ParseLexeme(occdPtr->op, strlen(occdPtr->op), &lexeme, NULL); + lexeme |= BINARY; + + if (objc == 2) { + Tcl_Obj *litObjv[2]; + OpNode nodes[2]; + int decrMe = 0; + Tcl_Obj *const *litObjPtrPtr = litObjv; + + if (lexeme == EXPON) { + litObjv[1] = Tcl_NewIntObj(occdPtr->i.identity); + Tcl_IncrRefCount(litObjv[1]); + decrMe = 1; + litObjv[0] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; + } else { + if (lexeme == DIVIDE) { + litObjv[0] = Tcl_NewDoubleObj(1.0); + } else { + litObjv[0] = Tcl_NewIntObj(occdPtr->i.identity); + } + Tcl_IncrRefCount(litObjv[0]); + litObjv[1] = objv[1]; + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + nodes[0].right = 1; + nodes[1].lexeme = lexeme; + nodes[1].mark = MARK_LEFT; + nodes[1].left = OT_LITERAL; + nodes[1].right = OT_LITERAL; + nodes[1].p.parent = 0; + } - /* - * Compile any arguments for the function. - */ + code = ExecConstantExprTree(interp, nodes, 0, &litObjPtrPtr); - tokenPtr = exprTokenPtr+2; - afterSubexprPtr = exprTokenPtr + (exprTokenPtr->numComponents + 1); - if (mathFuncPtr->numArgs > 0) { - for (i = 0; i < mathFuncPtr->numArgs; i++) { - if (tokenPtr == afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too few arguments for math function", -1); - code = TCL_ERROR; - goto done; - } - code = CompileSubExpr(tokenPtr, infoPtr, envPtr); - if (code != TCL_OK) { - goto done; + Tcl_DecrRefCount(litObjv[decrMe]); + return code; + } else { + Tcl_Obj *const *litObjv = objv + 1; + OpNode *nodes = (OpNode *) TclStackAlloc(interp, + (objc-1)*sizeof(OpNode)); + int i, lastOp = OT_LITERAL; + + nodes[0].lexeme = START; + nodes[0].mark = MARK_RIGHT; + if (lexeme == EXPON) { + for (i=objc-2; i>0; i-- ) { + nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; + nodes[i].left = OT_LITERAL; + nodes[i].right = lastOp; + if (lastOp >= 0) { + nodes[lastOp].p.parent = i; + } + lastOp = i; + } + } else { + for (i=1; i<objc-1; i++ ) { + nodes[i].lexeme = lexeme; + nodes[i].mark = MARK_LEFT; + nodes[i].left = lastOp; + if (lastOp >= 0) { + nodes[lastOp].p.parent = i; + } + nodes[i].right = OT_LITERAL; + lastOp = i; } - tokenPtr += (tokenPtr->numComponents + 1); } - if (tokenPtr != afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too many arguments for math function", -1); - code = TCL_ERROR; - goto done; - } - } else if (tokenPtr != afterSubexprPtr) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "too many arguments for math function", -1); - code = TCL_ERROR; - goto done; - } - - /* - * Compile the call on the math function. Note that the "objc" argument - * count for non-builtin functions is incremented by 1 to include the - * function name itself. - */ + nodes[0].right = lastOp; + nodes[lastOp].p.parent = 0; - if (mathFuncPtr->builtinFuncIndex >= 0) { /* a builtin function */ - /* - * Adjust the current stack depth by the number of arguments - * of the builtin function. This cannot be handled by the - * TclEmitInstInt1 macro as the number of arguments is not - * passed as an operand. - */ + code = ExecConstantExprTree(interp, nodes, 0, &litObjv); - if (envPtr->maxStackDepth < envPtr->currStackDepth) { - envPtr->maxStackDepth = envPtr->currStackDepth; - } - TclEmitInstInt1(INST_CALL_BUILTIN_FUNC1, - mathFuncPtr->builtinFuncIndex, envPtr); - envPtr->currStackDepth -= mathFuncPtr->numArgs; - } else { - TclEmitInstInt1(INST_CALL_FUNC1, (mathFuncPtr->numArgs+1), envPtr); - } - *endPtrPtr = afterSubexprPtr; + TclStackFree(interp, nodes); - done: - return code; + return code; + } } /* *---------------------------------------------------------------------- * - * LogSyntaxError -- - * - * This procedure is invoked after an error occurs when compiling an - * expression. It sets the interpreter result to an error message - * describing the error. + * TclNoIdentOpCmd -- + * Implements the commands: -, / + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary non-zero number of arguments by repeatedly applying + * the base operator with suitable associative rules. When no + * arguments are provided, an error is raised. * * Results: - * None. + * A standard Tcl return code and result left in interp. * * Side effects: - * Sets the interpreter result to an error message describing the - * expression that was being compiled when the error occurred. + * None. * *---------------------------------------------------------------------- */ -static void -LogSyntaxError(infoPtr) - ExprInfo *infoPtr; /* Describes the compilation state for the - * expression being compiled. */ +int +TclNoIdentOpCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) { - int numBytes = (infoPtr->lastChar - infoPtr->expr); - char buffer[100]; - - sprintf(buffer, "syntax error in expression \"%.*s\"", - ((numBytes > 60)? 60 : numBytes), infoPtr->expr); - Tcl_ResetResult(infoPtr->interp); - Tcl_AppendStringsToObj(Tcl_GetObjResult(infoPtr->interp), - buffer, (char *) NULL); + TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); + return TCL_ERROR; + } + return TclVariadicOpCmd(clientData, interp, objc, objv); } +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |