diff options
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r-- | generic/tclCompExpr.c | 1370 |
1 files changed, 738 insertions, 632 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 9142e2b..d8e4d9f 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1,8 +1,8 @@ /* * tclCompExpr.c -- * - * This file contains the code to parse and compile Tcl expressions - * and implementations of the Tcl commands corresponding to expression + * This file contains the code to parse and compile Tcl expressions and + * implementations of the Tcl commands corresponding to expression * operators, such as the command ::tcl::mathop::+ . * * Contributions from Don Porter, NIST, 2006-2007. (not subject to US copyright) @@ -15,11 +15,11 @@ #include "tclCompile.h" /* CompileEnv */ /* - * Expression parsing takes place in the routine ParseExpr(). It takes a - * string as input, parses that string, and generates a representation of - * the expression in the form of a tree of operators, a list of literals, - * a list of function names, and an array of Tcl_Token's within a Tcl_Parse - * struct. The tree is composed of OpNodes. + * Expression parsing takes place in the routine ParseExpr(). It takes a + * string as input, parses that string, and generates a representation of the + * expression in the form of a tree of operators, a list of literals, a list + * of function names, and an array of Tcl_Token's within a Tcl_Parse struct. + * The tree is composed of OpNodes. */ typedef struct OpNode { @@ -36,36 +36,36 @@ typedef struct OpNode { } OpNode; /* - * The storage for the tree is dynamically allocated array of OpNodes. The + * The storage for the tree is dynamically allocated array of OpNodes. The * array is grown as parsing needs dictate according to a scheme similar to * Tcl's string growth algorithm, so that the resizing costs are O(N) and so * that we use at least half the memory allocated as expressions get large. * * Each OpNode in the tree represents an operator in the expression, either - * unary or binary. When parsing is completed successfully, a binary operator + * unary or binary. When parsing is completed successfully, a binary operator * OpNode will have its left and right fields filled with "pointers" to its - * left and right operands. A unary operator OpNode will have its right field - * filled with a pointer to its single operand. When an operand is a + * left and right operands. A unary operator OpNode will have its right field + * filled with a pointer to its single operand. When an operand is a * subexpression the "pointer" takes the form of the index -- a non-negative * integer -- into the OpNode storage array where the root of that * subexpression parse tree is found. * * Non-operator elements of the expression do not get stored in the OpNode - * tree. They are stored in the other structures according to their type. - * Literal values get appended to the literal list. Elements that denote - * forms of quoting or substitution known to the Tcl parser get stored as - * Tcl_Tokens. These non-operator elements of the expression are the - * leaves of the completed parse tree. When an operand of an OpNode is - * one of these leaf elements, the following negative integer codes are used - * to indicate which kind of elements it is. + * tree. They are stored in the other structures according to their type. + * Literal values get appended to the literal list. Elements that denote forms + * of quoting or substitution known to the Tcl parser get stored as + * Tcl_Tokens. These non-operator elements of the expression are the leaves of + * the completed parse tree. When an operand of an OpNode is one of these leaf + * elements, the following negative integer codes are used to indicate which + * kind of elements it is. */ enum OperandTypes { OT_LITERAL = -3, /* Operand is a literal in the literal list */ OT_TOKENS = -2, /* Operand is sequence of Tcl_Tokens */ - OT_EMPTY = -1 /* "Operand" is an empty string. This is a - * special case used only to represent the - * EMPTY lexeme. See below. */ + OT_EMPTY = -1 /* "Operand" is an empty string. This is a special + * case used only to represent the EMPTY lexeme. See + * below. */ }; /* @@ -79,31 +79,30 @@ enum OperandTypes { /* * Note that it is sufficient to store in the tree just the type of leaf - * operand, without any explicit pointer to which leaf. This is true because - * the traversals of the completed tree we perform are known to visit - * the leaves in the same order as the original parse. + * operand, without any explicit pointer to which leaf. This is true because + * the traversals of the completed tree we perform are known to visit the + * leaves in the same order as the original parse. * * In a completed parse tree, those OpNodes that are themselves (roots of * subexpression trees that are) operands of some operator store in their - * p.parent field a "pointer" to the OpNode of that operator. The p.parent - * field permits a traversal of the tree within a * non-recursive routine - * (ConvertTreeToTokens() and CompileExprTree()). This means that even + * p.parent field a "pointer" to the OpNode of that operator. The p.parent + * field permits a traversal of the tree within a non-recursive routine + * (ConvertTreeToTokens() and CompileExprTree()). This means that even * expression trees of great depth pose no risk of blowing the C stack. * - * While the parse tree is being constructed, the same memory space is used - * to hold the p.prev field which chains together a stack of incomplete - * trees awaiting their right operands. + * While the parse tree is being constructed, the same memory space is used to + * hold the p.prev field which chains together a stack of incomplete trees + * awaiting their right operands. * * The lexeme field is filled in with the lexeme of the operator that is - * returned by the ParseLexeme() routine. Only lexemes for unary and - * binary operators get stored in an OpNode. Other lexmes get different - * treatement. + * returned by the ParseLexeme() routine. Only lexemes for unary and binary + * operators get stored in an OpNode. Other lexmes get different treatement. * * The precedence field provides a place to store the precedence of the * operator, so it need not be looked up again and again. * - * The mark field is use to control the traversal of the tree, so - * that it can be done non-recursively. The mark values are: + * The mark field is use to control the traversal of the tree, so that it can + * be done non-recursively. The mark values are: */ enum Marks { @@ -119,185 +118,184 @@ enum Marks { */ /* - * Each lexeme belongs to one of four categories, which determine - * its place in the parse tree. We use the two high bits of the - * (unsigned char) value to store a NODE_TYPE code. + * Each lexeme belongs to one of four categories, which determine its place in + * the parse tree. We use the two high bits of the (unsigned char) value to + * store a NODE_TYPE code. */ #define NODE_TYPE 0xC0 /* - * The four category values are LEAF, UNARY, and BINARY, explained below, - * and "uncategorized", which is used either temporarily, until context - * determines which of the other three categories is correct, or for - * lexemes like INVALID, which aren't really lexemes at all, but indicators - * of a parsing error. Note that the codes must be distinct to distinguish - * categories, but need not take the form of a bit array. + * The four category values are LEAF, UNARY, and BINARY, explained below, and + * "uncategorized", which is used either temporarily, until context determines + * which of the other three categories is correct, or for lexemes like + * INVALID, which aren't really lexemes at all, but indicators of a parsing + * error. Note that the codes must be distinct to distinguish categories, but + * need not take the form of a bit array. */ -#define BINARY 0x40 /* This lexeme is a binary operator. An - * OpNode representing it should go into the - * parse tree, and two operands should be - * parsed for it in the expression. */ -#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode +#define BINARY 0x40 /* This lexeme is a binary operator. An OpNode + * representing it should go into the parse + * tree, and two operands should be parsed for + * it in the expression. */ +#define UNARY 0x80 /* This lexeme is a unary operator. An OpNode * representing it should go into the parse * tree, and one operand should be parsed for * it in the expression. */ #define LEAF 0xC0 /* This lexeme is a leaf operand in the parse - * tree. No OpNode will be placed in the tree - * for it. Either a literal value will be + * tree. No OpNode will be placed in the tree + * for it. Either a literal value will be * appended to the list of literals in this * expression, or appropriate Tcl_Tokens will * be appended in a Tcl_Parse struct to * represent those leaves that require some - * form of substitution. - */ + * form of substitution. */ /* Uncategorized lexemes */ -#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or +#define PLUS 1 /* Ambiguous. Resolves to UNARY_PLUS or * BINARY_PLUS according to context. */ -#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or +#define MINUS 2 /* Ambiguous. Resolves to UNARY_MINUS or * BINARY_MINUS according to context. */ -#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to +#define BAREWORD 3 /* Ambigous. Resolves to BOOLEAN or to * FUNCTION or a parse error according to * context and value. */ -#define INCOMPLETE 4 /* A parse error. Used only when the single +#define INCOMPLETE 4 /* A parse error. Used only when the single * "=" is encountered. */ -#define INVALID 5 /* A parse error. Used when any punctuation +#define INVALID 5 /* A parse error. Used when any punctuation * appears that's not a supported operator. */ /* Leaf lexemes */ -#define NUMBER ( LEAF | 1) /* For literal numbers */ -#define SCRIPT ( LEAF | 2) /* Script substitution; [foo] */ -#define BOOLEAN ( LEAF | BAREWORD) /* For literal booleans */ -#define BRACED ( LEAF | 4) /* Braced string; {foo bar} */ -#define VARIABLE ( LEAF | 5) /* Variable substitution; $x */ -#define QUOTED ( LEAF | 6) /* Quoted string; "foo $bar [soom]" */ -#define EMPTY ( LEAF | 7) /* Used only for an empty argument - * list to a function. Represents - * the empty string within parens in - * the expression: rand() */ +#define NUMBER (LEAF | 1) + /* For literal numbers */ +#define SCRIPT (LEAF | 2) + /* Script substitution; [foo] */ +#define BOOLEAN (LEAF | BAREWORD) + /* For literal booleans */ +#define BRACED (LEAF | 4) + /* Braced string; {foo bar} */ +#define VARIABLE (LEAF | 5) + /* Variable substitution; $x */ +#define QUOTED (LEAF | 6) + /* Quoted string; "foo $bar [soom]" */ +#define EMPTY (LEAF | 7) + /* Used only for an empty argument list to a + * function. Represents the empty string + * within parens in the expression: rand() */ /* Unary operator lexemes */ -#define UNARY_PLUS ( UNARY | PLUS) -#define UNARY_MINUS ( UNARY | MINUS) -#define FUNCTION ( UNARY | BAREWORD) /* This is a bit of "creative - * interpretation" on the part of the - * parser. A function call is parsed - * into the parse tree according to - * the perspective that the function - * name is a unary operator and its - * argument list, enclosed in parens, - * is its operand. The additional - * requirements not implied generally - * by treatment as a unary operator -- - * for example, the requirement that - * the operand be enclosed in parens -- - * are hard coded in the relevant - * portions of ParseExpr(). We trade - * off the need to include such - * exceptional handling in the code - * against the need we would otherwise - * have for more lexeme categories. */ -#define START ( UNARY | 4) /* This lexeme isn't parsed from the - * expression text at all. It - * represents the start of the - * expression and sits at the root of - * the parse tree where it serves as - * the start/end point of traversals. */ -#define OPEN_PAREN ( UNARY | 5) /* Another bit of creative - * interpretation, where we treat "(" - * as a unary operator with the - * sub-expression between it and its - * matching ")" as its operand. See - * CLOSE_PAREN below. */ -#define NOT ( UNARY | 6) -#define BIT_NOT ( UNARY | 7) +#define UNARY_PLUS (UNARY | PLUS) +#define UNARY_MINUS (UNARY | MINUS) +#define FUNCTION (UNARY | BAREWORD) + /* This is a bit of "creative interpretation" + * on the part of the parser. A function call + * is parsed into the parse tree according to + * the perspective that the function name is a + * unary operator and its argument list, + * enclosed in parens, is its operand. The + * additional requirements not implied + * generally by treatment as a unary operator + * -- for example, the requirement that the + * operand be enclosed in parens -- are hard + * coded in the relevant portions of + * ParseExpr(). We trade off the need to + * include such exceptional handling in the + * code against the need we would otherwise + * have for more lexeme categories. */ +#define START (UNARY | 4) + /* This lexeme isn't parsed from the + * expression text at all. It represents the + * start of the expression and sits at the + * root of the parse tree where it serves as + * the start/end point of traversals. */ +#define OPEN_PAREN (UNARY | 5) + /* Another bit of creative interpretation, + * where we treat "(" as a unary operator with + * the sub-expression between it and its + * matching ")" as its operand. See + * CLOSE_PAREN below. */ +#define NOT (UNARY | 6) +#define BIT_NOT (UNARY | 7) /* Binary operator lexemes */ -#define BINARY_PLUS ( BINARY | PLUS) -#define BINARY_MINUS ( BINARY | MINUS) -#define COMMA ( BINARY | 3) /* The "," operator is a low precedence - * binary operator that separates the - * arguments in a function call. The - * additional constraint that this - * operator can only legally appear - * at the right places within a - * function call argument list are - * hard coded within ParseExpr(). */ -#define MULT ( BINARY | 4) -#define DIVIDE ( BINARY | 5) -#define MOD ( BINARY | 6) -#define LESS ( BINARY | 7) -#define GREATER ( BINARY | 8) -#define BIT_AND ( BINARY | 9) -#define BIT_XOR ( BINARY | 10) -#define BIT_OR ( BINARY | 11) -#define QUESTION ( BINARY | 12) /* These two lexemes make up the */ -#define COLON ( BINARY | 13) /* ternary conditional operator, - * $x ? $y : $z . We treat them as - * two binary operators to avoid - * another lexeme category, and - * code the additional constraints - * directly in ParseExpr(). For - * instance, the right operand of - * a "?" operator must be a ":" - * operator. */ -#define LEFT_SHIFT ( BINARY | 14) -#define RIGHT_SHIFT ( BINARY | 15) -#define LEQ ( BINARY | 16) -#define GEQ ( BINARY | 17) -#define EQUAL ( BINARY | 18) -#define NEQ ( BINARY | 19) -#define AND ( BINARY | 20) -#define OR ( BINARY | 21) -#define STREQ ( BINARY | 22) -#define STRNEQ ( BINARY | 23) -#define EXPON ( BINARY | 24) /* Unlike the other binary operators, - * EXPON is right associative and this - * distinction is coded directly in - * ParseExpr(). */ -#define IN_LIST ( BINARY | 25) -#define NOT_IN_LIST ( BINARY | 26) -#define CLOSE_PAREN ( BINARY | 27) /* By categorizing the CLOSE_PAREN - * lexeme as a BINARY operator, the - * normal parsing rules for binary - * operators assure that a close paren - * will not directly follow another - * operator, and the machinery already - * in place to connect operands to - * operators according to precedence - * performs most of the work of - * matching open and close parens for - * us. In the end though, a close - * paren is not really a binary - * operator, and some special coding - * in ParseExpr() make sure we never - * put an actual CLOSE_PAREN node - * in the parse tree. The - * sub-expression between parens - * becomes the single argument of - * the matching OPEN_PAREN unary - * operator. */ -#define END ( BINARY | 28) /* This lexeme represents the end of - * the string being parsed. Treating - * it as a binary operator follows the - * same logic as the CLOSE_PAREN lexeme - * and END pairs with START, in the - * same way that CLOSE_PAREN pairs with - * OPEN_PAREN. */ +#define BINARY_PLUS (BINARY | PLUS) +#define BINARY_MINUS (BINARY | MINUS) +#define COMMA (BINARY | 3) + /* The "," operator is a low precedence binary + * operator that separates the arguments in a + * function call. The additional constraint + * that this operator can only legally appear + * at the right places within a function call + * argument list are hard coded within + * ParseExpr(). */ +#define MULT (BINARY | 4) +#define DIVIDE (BINARY | 5) +#define MOD (BINARY | 6) +#define LESS (BINARY | 7) +#define GREATER (BINARY | 8) +#define BIT_AND (BINARY | 9) +#define BIT_XOR (BINARY | 10) +#define BIT_OR (BINARY | 11) +#define QUESTION (BINARY | 12) + /* These two lexemes make up the */ +#define COLON (BINARY | 13) + /* ternary conditional operator, $x ? $y : $z. + * We treat them as two binary operators to + * avoid another lexeme category, and code the + * additional constraints directly in + * ParseExpr(). For instance, the right + * operand of a "?" operator must be a ":" + * operator. */ +#define LEFT_SHIFT (BINARY | 14) +#define RIGHT_SHIFT (BINARY | 15) +#define LEQ (BINARY | 16) +#define GEQ (BINARY | 17) +#define EQUAL (BINARY | 18) +#define NEQ (BINARY | 19) +#define AND (BINARY | 20) +#define OR (BINARY | 21) +#define STREQ (BINARY | 22) +#define STRNEQ (BINARY | 23) +#define EXPON (BINARY | 24) + /* Unlike the other binary operators, EXPON is + * right associative and this distinction is + * coded directly in ParseExpr(). */ +#define IN_LIST (BINARY | 25) +#define NOT_IN_LIST (BINARY | 26) +#define CLOSE_PAREN (BINARY | 27) + /* By categorizing the CLOSE_PAREN lexeme as a + * BINARY operator, the normal parsing rules + * for binary operators assure that a close + * paren will not directly follow another + * operator, and the machinery already in + * place to connect operands to operators + * according to precedence performs most of + * the work of matching open and close parens + * for us. In the end though, a close paren is + * not really a binary operator, and some + * special coding in ParseExpr() make sure we + * never put an actual CLOSE_PAREN node in the + * parse tree. The sub-expression between + * parens becomes the single argument of the + * matching OPEN_PAREN unary operator. */ +#define END (BINARY | 28) + /* This lexeme represents the end of the + * string being parsed. Treating it as a + * binary operator follows the same logic as + * the CLOSE_PAREN lexeme and END pairs with + * START, in the same way that CLOSE_PAREN + * pairs with OPEN_PAREN. */ + /* * When ParseExpr() builds the parse tree it must choose which operands to * connect to which operators. This is done according to operator precedence. - * The greater an operator's precedence the greater claim it has to link to - * an available operand. The Precedence enumeration lists the precedence - * values used by Tcl expression operators, from lowest to highest claim. - * Each precedence level is commented with the operators that hold that - * precedence. + * The greater an operator's precedence the greater claim it has to link to an + * available operand. The Precedence enumeration lists the precedence values + * used by Tcl expression operators, from lowest to highest claim. Each + * precedence level is commented with the operators that hold that precedence. */ enum Precedence { @@ -322,9 +320,9 @@ enum Precedence { }; /* - * Here the same information contained in the comments above is stored - * in inverted form, so that given a lexeme, one can quickly look up - * its precedence value. + * Here the same information contained in the comments above is stored in + * inverted form, so that given a lexeme, one can quickly look up its + * precedence value. */ static const unsigned char prec[] = { @@ -367,7 +365,7 @@ static const unsigned char prec[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, + 0, /* Unary operator lexemes */ PREC_UNARY, /* UNARY_PLUS */ PREC_UNARY, /* UNARY_MINUS */ @@ -422,7 +420,7 @@ static const unsigned char instruction[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, + 0, /* Unary operator lexemes */ INST_UPLUS, /* UNARY_PLUS */ INST_UMINUS, /* UNARY_MINUS */ @@ -455,7 +453,7 @@ static const unsigned char Lexeme[] = { INVALID /* SUB */, INVALID /* ESC */, INVALID /* FS */, INVALID /* GS */, INVALID /* RS */, INVALID /* US */, - INVALID /* SPACE */, 0 /* ! or != */, + INVALID /* SPACE */, 0 /* ! or != */, QUOTED /* " */, INVALID /* # */, VARIABLE /* $ */, MOD /* % */, 0 /* & or && */, INVALID /* ' */, @@ -490,15 +488,8 @@ static const unsigned char Lexeme[] = { typedef struct JumpList { JumpFixup jump; /* Pass this argument to matching calls of - * TclEmitForwardJump() and + * TclEmitForwardJump() and * TclFixupForwardJump(). */ - int depth; /* Remember the currStackDepth of the - * CompileEnv here. */ - int offset; /* Data used to compute jump lengths to pass - * to TclFixupForwardJump() */ - int convert; /* Temporary storage used to compute whether - * numeric conversion will be needed following - * the operator we're compiling. */ struct JumpList *next; /* Point to next item on the stack */ } JumpList; @@ -521,7 +512,6 @@ static int ParseExpr(Tcl_Interp *interp, const char *start, Tcl_Parse *parsePtr, int parseOnly); static int ParseLexeme(const char *start, int numBytes, unsigned char *lexemePtr, Tcl_Obj **literalPtr); - /* *---------------------------------------------------------------------- @@ -529,27 +519,27 @@ static int ParseLexeme(const char *start, int numBytes, * ParseExpr -- * * Given a string, the numBytes bytes starting at start, this function - * parses it as a Tcl expression and constructs a tree representing - * the structure of the expression. The caller must pass in empty - * lists as the funcList and litList arguments. The elements of the - * parsed expression are returned to the caller as that tree, a list of - * literal values, a list of function names, and in Tcl_Tokens - * added to a Tcl_Parse struct passed in by the caller. + * parses it as a Tcl expression and constructs a tree representing the + * structure of the expression. The caller must pass in empty lists as + * the funcList and litList arguments. The elements of the parsed + * expression are returned to the caller as that tree, a list of literal + * values, a list of function names, and in Tcl_Tokens added to a + * Tcl_Parse struct passed in by the caller. * * Results: * If the string is successfully parsed as a valid Tcl expression, TCL_OK - * is returned, and data about the expression structure is written to - * the last four arguments. If the string cannot be parsed as a valid - * Tcl expression, TCL_ERROR is returned, and if interp is non-NULL, an - * error message is written to interp. + * is returned, and data about the expression structure is written to the + * last four arguments. If the string cannot be parsed as a valid Tcl + * expression, TCL_ERROR is returned, and if interp is non-NULL, an error + * message is written to interp. * * Side effects: - * Memory will be allocated. If TCL_OK is returned, the caller must - * clean up the returned data structures. The (OpNode *) value written - * to opTreePtr should be passed to ckfree() and the parsePtr argument - * should be passed to Tcl_FreeParse(). The elements appended to the - * litList and funcList will automatically be freed whenever the - * refcount on those lists indicates they can be freed. + * Memory will be allocated. If TCL_OK is returned, the caller must clean + * up the returned data structures. The (OpNode *) value written to + * opTreePtr should be passed to ckfree() and the parsePtr argument + * should be passed to Tcl_FreeParse(). The elements appended to the + * litList and funcList will automatically be freed whenever the refcount + * on those lists indicates they can be freed. * *---------------------------------------------------------------------- */ @@ -568,68 +558,82 @@ ParseExpr( * substitutions. */ int parseOnly) /* A boolean indicating whether the caller's * aim is just a parse, or whether it will go - * on to compile the expression. Different - * optimizations are appropriate for the - * two scenarios. */ + * on to compile the expression. Different + * optimizations are appropriate for the two + * scenarios. */ { OpNode *nodes = NULL; /* Pointer to the OpNode storage array where * we build the parse tree. */ - int nodesAvailable = 64; /* Initial size of the storage array. This - * value establishes a minimum tree memory cost - * of only about 1 kibyte, and is large enough - * for most expressions to parse with no need - * for array growth and reallocation. */ + int nodesAvailable = 64; /* Initial size of the storage array. This + * value establishes a minimum tree memory + * cost of only about 1 kibyte, and is large + * enough for most expressions to parse with + * no need for array growth and + * reallocation. */ int nodesUsed = 0; /* Number of OpNodes filled. */ - int scanned = 0; /* Capture number of byte scanned by - * parsing routines. */ + int scanned = 0; /* Capture number of byte scanned by parsing + * routines. */ int lastParsed; /* Stores info about what the lexeme parsed * the previous pass through the parsing loop - * was. If it was an operator, lastParsed is + * was. If it was an operator, lastParsed is * the index of the OpNode for that operator. * If it was not an operator, lastParsed holds - * an OperandTypes value encoding what we - * need to know about it. */ - int incomplete; /* Index of the most recent incomplete tree - * in the OpNode array. Heads a stack of + * an OperandTypes value encoding what we need + * to know about it. */ + int incomplete; /* Index of the most recent incomplete tree in + * the OpNode array. Heads a stack of * incomplete trees linked by p.prev. */ int complete = OT_EMPTY; /* "Index" of the complete tree (that is, a * complete subexpression) determined at the - * moment. OT_EMPTY is a nonsense value - * used only to silence compiler warnings. - * During a parse, complete will always hold - * an index or an OperandTypes value pointing - * to an actual leaf at the time the complete - * tree is needed. */ - - /* These variables control generation of the error message. */ + * moment. OT_EMPTY is a nonsense value used + * only to silence compiler warnings. During a + * parse, complete will always hold an index + * or an OperandTypes value pointing to an + * actual leaf at the time the complete tree + * is needed. */ + + /* + * These variables control generation of the error message. + */ + Tcl_Obj *msg = NULL; /* The error message. */ Tcl_Obj *post = NULL; /* In a few cases, an additional postscript * for the error message, supplying more * information after the error msg and * location have been reported. */ - const char *mark = "_@_"; /* In the portion of the complete error message - * where the error location is reported, this - * "mark" substring is inserted into the - * string being parsed to aid in pinpointing - * the location of the syntax error in the - * expression. */ + const char *errCode = NULL; /* The detail word of the errorCode list, or + * NULL to indicate that no changes to the + * errorCode are to be done. */ + const char *subErrCode = NULL; + /* Extra information for use in generating the + * errorCode. */ + const char *mark = "_@_"; /* In the portion of the complete error + * message where the error location is + * reported, this "mark" substring is inserted + * into the string being parsed to aid in + * pinpointing the location of the syntax + * error in the expression. */ int insertMark = 0; /* A boolean controlling whether the "mark" * should be inserted. */ const int limit = 25; /* Portions of the error message are * constructed out of substrings of the - * original expression. In order to keep the - * error message readable, we impose this limit - * on the substring size we extract. */ + * original expression. In order to keep the + * error message readable, we impose this + * limit on the substring size we extract. */ TclParseInit(interp, start, numBytes, parsePtr); - nodes = (OpNode *) attemptckalloc(nodesAvailable * sizeof(OpNode)); + nodes = attemptckalloc(nodesAvailable * sizeof(OpNode)); if (nodes == NULL) { TclNewLiteralStringObj(msg, "not enough memory to parse expression"); + errCode = "NOMEM"; goto error; } - /* Initialize the parse tree with the special "START" node. */ + /* + * Initialize the parse tree with the special "START" node. + */ + nodes->lexeme = START; nodes->precedence = prec[START]; nodes->mark = MARK_RIGHT; @@ -638,19 +642,19 @@ ParseExpr( nodesUsed++; /* - * Main parsing loop parses one lexeme per iteration. We exit the - * loop only when there's a syntax error with a "goto error" which - * takes us to the error handling code following the loop, or when - * we've successfully completed the parse and we return to the caller. + * Main parsing loop parses one lexeme per iteration. We exit the loop + * only when there's a syntax error with a "goto error" which takes us to + * the error handling code following the loop, or when we've successfully + * completed the parse and we return to the caller. */ while (1) { - OpNode *nodePtr; /* Points to the OpNode we may fill this - * pass through the loop. */ + OpNode *nodePtr; /* Points to the OpNode we may fill this pass + * through the loop. */ unsigned char lexeme; /* The lexeme we parse this iteration. */ - Tcl_Obj *literal; /* Filled by the ParseLexeme() call when - * a literal is parsed that has a Tcl_Obj - * rep worth preserving. */ + Tcl_Obj *literal; /* Filled by the ParseLexeme() call when a + * literal is parsed that has a Tcl_Obj rep + * worth preserving. */ /* * Each pass through this loop adds up to one more OpNode. Allocate @@ -662,13 +666,13 @@ ParseExpr( OpNode *newPtr; do { - newPtr = (OpNode *) attemptckrealloc((char *) nodes, - (unsigned int) size * sizeof(OpNode)); + newPtr = attemptckrealloc(nodes, size * sizeof(OpNode)); } while ((newPtr == NULL) && ((size -= (size - nodesUsed) / 2) > nodesUsed)); if (newPtr == NULL) { TclNewLiteralStringObj(msg, "not enough memory to parse expression"); + errCode = "NOMEM"; goto error; } nodesAvailable = size; @@ -676,32 +680,41 @@ ParseExpr( } nodePtr = nodes + nodesUsed; - /* Skip white space between lexemes. */ + /* + * Skip white space between lexemes. + */ + scanned = TclParseAllWhiteSpace(start, numBytes); start += scanned; numBytes -= scanned; scanned = ParseLexeme(start, numBytes, &lexeme, &literal); - /* Use context to categorize the lexemes that are ambiguous. */ + /* + * Use context to categorize the lexemes that are ambiguous. + */ + if ((NODE_TYPE & lexeme) == 0) { + int b; + switch (lexeme) { case INVALID: - msg = Tcl_ObjPrintf( - "invalid character \"%.*s\"", scanned, start); + msg = Tcl_ObjPrintf("invalid character \"%.*s\"", + scanned, start); + errCode = "BADCHAR"; goto error; case INCOMPLETE: - msg = Tcl_ObjPrintf( - "incomplete operator \"%.*s\"", scanned, start); + msg = Tcl_ObjPrintf("incomplete operator \"%.*s\"", + scanned, start); + errCode = "PARTOP"; goto error; case BAREWORD: /* - * Most barewords in an expression are a syntax error. - * The exceptions are that when a bareword is followed by - * an open paren, it might be a function call, and when the - * bareword is a legal literal boolean value, we accept that - * as well. + * Most barewords in an expression are a syntax error. The + * exceptions are that when a bareword is followed by an open + * paren, it might be a function call, and when the bareword + * is a legal literal boolean value, we accept that as well. */ if (start[scanned+TclParseAllWhiteSpace( @@ -716,63 +729,65 @@ ParseExpr( */ Tcl_ListObjAppendElement(NULL, funcList, literal); + } else if (Tcl_GetBooleanFromObj(NULL,literal,&b) == TCL_OK) { + lexeme = BOOLEAN; } else { - int b; - if (Tcl_GetBooleanFromObj(NULL, literal, &b) == TCL_OK) { - lexeme = BOOLEAN; - } else { - Tcl_DecrRefCount(literal); - msg = Tcl_ObjPrintf( - "invalid bareword \"%.*s%s\"", - (scanned < limit) ? scanned : limit - 3, start, - (scanned < limit) ? "" : "..."); - post = Tcl_ObjPrintf( - "should be \"$%.*s%s\" or \"{%.*s%s}\"", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - Tcl_AppendPrintfToObj(post, - " or \"%.*s%s(...)\" or ...", - (scanned < limit) ? scanned : limit - 3, - start, (scanned < limit) ? "" : "..."); - if (start[0] == '0') { - const char *stop; - TclParseNumber(NULL, NULL, NULL, start, scanned, - &stop, TCL_PARSE_NO_WHITESPACE); - - if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { + Tcl_DecrRefCount(literal); + msg = Tcl_ObjPrintf("invalid bareword \"%.*s%s\"", + (scanned < limit) ? scanned : limit - 3, start, + (scanned < limit) ? "" : "..."); + post = Tcl_ObjPrintf( + "should be \"$%.*s%s\" or \"{%.*s%s}\"", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + Tcl_AppendPrintfToObj(post, " or \"%.*s%s(...)\" or ...", + (scanned < limit) ? scanned : limit - 3, + start, (scanned < limit) ? "" : "..."); + errCode = "BAREWORD"; + if (start[0] == '0') { + const char *stop; + TclParseNumber(NULL, NULL, NULL, start, scanned, + &stop, TCL_PARSE_NO_WHITESPACE); + + if (isdigit(UCHAR(*stop)) || (stop == start + 1)) { + switch (start[1]) { + case 'b': + Tcl_AppendToObj(post, + " (invalid binary number?)", -1); parsePtr->errorType = TCL_PARSE_BAD_NUMBER; - - switch (start[1]) { - case 'b': - Tcl_AppendToObj(post, - " (invalid binary number?)", -1); - break; - case 'o': + errCode = "BADNUMBER"; + subErrCode = "BINARY"; + break; + case 'o': + Tcl_AppendToObj(post, + " (invalid octal number?)", -1); + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; + break; + default: + if (isdigit(UCHAR(start[1]))) { Tcl_AppendToObj(post, " (invalid octal number?)", -1); - break; - default: - if (isdigit(UCHAR(start[1]))) { - Tcl_AppendToObj(post, - " (invalid octal number?)", -1); - } - break; + parsePtr->errorType = TCL_PARSE_BAD_NUMBER; + errCode = "BADNUMBER"; + subErrCode = "OCTAL"; } + break; } } - goto error; } + goto error; } break; case PLUS: case MINUS: if (IsOperator(lastParsed)) { - /* - * A "+" or "-" coming just after another operator - * must be interpreted as a unary operator. + * A "+" or "-" coming just after another operator must be + * interpreted as a unary operator. */ lexeme |= UNARY; @@ -782,17 +797,19 @@ ParseExpr( } } /* Uncategorized lexemes */ - /* Handle lexeme based on its category. */ - switch (NODE_TYPE & lexeme) { - /* - * Each LEAF results in either a literal getting appended to the - * litList, or a sequence of Tcl_Tokens representing a Tcl word - * getting appended to the parsePtr->tokens. No OpNode is filled - * for this lexeme. + * Handle lexeme based on its category. */ + switch (NODE_TYPE & lexeme) { case LEAF: { + /* + * Each LEAF results in either a literal getting appended to the + * litList, or a sequence of Tcl_Tokens representing a Tcl word + * getting appended to the parsePtr->tokens. No OpNode is filled + * for this lexeme. + */ + Tcl_Token *tokenPtr; const char *end = start; int wordIndex; @@ -805,10 +822,14 @@ ParseExpr( if (NotOperator(lastParsed)) { msg = Tcl_ObjPrintf("missing operator at %s", mark); + errCode = "MISSING"; scanned = 0; insertMark = 1; - /* Free any literal to avoid a memleak. */ + /* + * Free any literal to avoid a memleak. + */ + if ((lexeme == NUMBER) || (lexeme == BOOLEAN)) { Tcl_DecrRefCount(literal); } @@ -817,7 +838,7 @@ ParseExpr( switch (lexeme) { case NUMBER: - case BOOLEAN: + case BOOLEAN: /* * TODO: Consider using a dict or hash to collapse all * duplicate literals into a single representative value. @@ -825,28 +846,29 @@ ParseExpr( * Pro: ~75% memory saving on expressions like * {1+1+1+1+1+.....+1} (Convert "pointer + Tcl_Obj" cost * to "pointer" cost only) - * Con: Cost of the dict store/retrieve on every literal - * in every expression when expressions like the above - * tend to be uncommon. + * Con: Cost of the dict store/retrieve on every literal in + * every expression when expressions like the above tend + * to be uncommon. * The memory savings is temporary; Compiling to bytecode * will collapse things as literals are registered - * anyway, so the savings applies only to the time - * between parsing and compiling. Possibly important - * due to high-water mark nature of memory allocation. + * anyway, so the savings applies only to the time + * between parsing and compiling. Possibly important due + * to high-water mark nature of memory allocation. */ + Tcl_ListObjAppendElement(NULL, litList, literal); complete = lastParsed = OT_LITERAL; start += scanned; numBytes -= scanned; continue; - + default: break; } /* - * Remaining LEAF cases may involve filling Tcl_Tokens, so - * make room for at least 2 more tokens. + * Remaining LEAF cases may involve filling Tcl_Tokens, so make + * room for at least 2 more tokens. */ TclGrowParseTokenArray(parsePtr, 2); @@ -865,7 +887,7 @@ ParseExpr( case BRACED: code = Tcl_ParseBraces(NULL, start, numBytes, - parsePtr, 1, &end); + parsePtr, 1, &end); scanned = end - start; break; @@ -880,6 +902,7 @@ ParseExpr( tokenPtr = parsePtr->tokenPtr + wordIndex + 1; if (code == TCL_OK && tokenPtr->type != TCL_TOKEN_VARIABLE) { TclNewLiteralStringObj(msg, "invalid character \"$\""); + errCode = "BADCHAR"; goto error; } scanned = tokenPtr->size; @@ -887,7 +910,7 @@ ParseExpr( case SCRIPT: { Tcl_Parse *nestedPtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); + TclStackAlloc(interp, sizeof(Tcl_Parse)); tokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; tokenPtr->type = TCL_TOKEN_COMMAND; @@ -897,7 +920,7 @@ ParseExpr( end = start + numBytes; start++; while (1) { - code = Tcl_ParseCommand(interp, start, (end - start), 1, + code = Tcl_ParseCommand(interp, start, end - start, 1, nestedPtr); if (code != TCL_OK) { parsePtr->term = nestedPtr->term; @@ -905,10 +928,10 @@ ParseExpr( parsePtr->incomplete = nestedPtr->incomplete; break; } - start = (nestedPtr->commandStart + nestedPtr->commandSize); + start = nestedPtr->commandStart + nestedPtr->commandSize; Tcl_FreeParse(nestedPtr); - if ((nestedPtr->term < end) && (*(nestedPtr->term) == ']') - && !(nestedPtr->incomplete)) { + if ((nestedPtr->term < end) && (nestedPtr->term[0] == ']') + && !nestedPtr->incomplete) { break; } @@ -918,6 +941,7 @@ ParseExpr( parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->incomplete = 1; code = TCL_ERROR; + errCode = "UNBALANCED"; break; } } @@ -928,28 +952,29 @@ ParseExpr( tokenPtr->size = scanned; parsePtr->numTokens++; break; - } + } /* SCRIPT case */ } if (code != TCL_OK) { - /* - * Here we handle all the syntax errors generated by - * the Tcl_Token generating parsing routines called in the - * switch just above. If the value of parsePtr->incomplete - * is 1, then the error was an unbalanced '[', '(', '{', - * or '"' and parsePtr->term is pointing to that unbalanced - * character. If the value of parsePtr->incomplete is 0, - * then the error is one of lacking whitespace following a - * quoted word, for example: expr {[an error {foo}bar]}, - * and parsePtr->term points to where the whitespace is - * missing. We reset our values of start and scanned so that - * when our error message is constructed, the location of - * the syntax error is sure to appear in it, even if the - * quoted expression is truncated. + * Here we handle all the syntax errors generated by the + * Tcl_Token generating parsing routines called in the switch + * just above. If the value of parsePtr->incomplete is 1, then + * the error was an unbalanced '[', '(', '{', or '"' and + * parsePtr->term is pointing to that unbalanced character. If + * the value of parsePtr->incomplete is 0, then the error is + * one of lacking whitespace following a quoted word, for + * example: expr {[an error {foo}bar]}, and parsePtr->term + * points to where the whitespace is missing. We reset our + * values of start and scanned so that when our error message + * is constructed, the location of the syntax error is sure to + * appear in it, even if the quoted expression is truncated. */ start = parsePtr->term; scanned = parsePtr->incomplete; + if (parsePtr->incomplete) { + errCode = "UNBALANCED"; + } goto error; } @@ -957,20 +982,19 @@ ParseExpr( tokenPtr->size = scanned; tokenPtr->numComponents = parsePtr->numTokens - wordIndex - 1; if (!parseOnly && ((lexeme == QUOTED) || (lexeme == BRACED))) { - /* * When this expression is destined to be compiled, and a * braced or quoted word within an expression is known at - * compile time (no runtime substitutions in it), we can - * store it as a literal rather than in its tokenized form. - * This is an advantage since the compiled bytecode is going - * to need the argument in Tcl_Obj form eventually, so it's - * just as well to get there now. Another advantage is that - * with this conversion, larger constant expressions might - * be grown and optimized. + * compile time (no runtime substitutions in it), we can store + * it as a literal rather than in its tokenized form. This is + * an advantage since the compiled bytecode is going to need + * the argument in Tcl_Obj form eventually, so it's just as + * well to get there now. Another advantage is that with this + * conversion, larger constant expressions might be grown and + * optimized. * - * On the contrary, if the end goal of this parse is to - * fill a Tcl_Parse for a caller of Tcl_ParseExpr(), then it's + * On the contrary, if the end goal of this parse is to fill a + * Tcl_Parse for a caller of Tcl_ParseExpr(), then it's * wasteful to convert to a literal only to convert back again * later. */ @@ -1000,10 +1024,14 @@ ParseExpr( msg = Tcl_ObjPrintf("missing operator at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; goto error; } - /* Create an OpNode for the unary operator */ + /* + * Create an OpNode for the unary operator. + */ + nodePtr->lexeme = lexeme; nodePtr->precedence = prec[lexeme]; nodePtr->mark = MARK_RIGHT; @@ -1011,16 +1039,16 @@ ParseExpr( /* * A FUNCTION cannot be a constant expression, because Tcl allows * functions to return variable results with the same arguments; - * for example, rand(). Other unary operators can root a constant + * for example, rand(). Other unary operators can root a constant * expression, so long as the argument is a constant expression. */ nodePtr->constant = (lexeme != FUNCTION); /* - * This unary operator is a new incomplete tree, so push it - * onto our stack of incomplete trees. Also remember it as - * the last lexeme we parsed. + * This unary operator is a new incomplete tree, so push it onto + * our stack of incomplete trees. Also remember it as the last + * lexeme we parsed. */ nodePtr->p.prev = incomplete; @@ -1041,15 +1069,14 @@ ParseExpr( if ((lexeme == CLOSE_PAREN) && (nodePtr[-1].lexeme == OPEN_PAREN)) { if (nodePtr[-2].lexeme == FUNCTION) { - /* * Normally, "()" is a syntax error, but as a special * case accept it as an argument list for a function. * Treat this as a special LEAF lexeme, and restart - * the parsing loop with zero characters scanned. - * We'll parse the ")" again the next time through, - * but with the OT_EMPTY leaf as the subexpression - * between the parens. + * the parsing loop with zero characters scanned. We + * will parse the ")" again the next time through, but + * with the OT_EMPTY leaf as the subexpression between + * the parens. */ scanned = 0; @@ -1059,6 +1086,7 @@ ParseExpr( msg = Tcl_ObjPrintf("empty subexpression at %s", mark); scanned = 0; insertMark = 1; + errCode = "EMPTY"; goto error; } @@ -1066,63 +1094,66 @@ ParseExpr( if (nodePtr[-1].lexeme == OPEN_PAREN) { TclNewLiteralStringObj(msg, "unbalanced open paren"); parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; } else if (nodePtr[-1].lexeme == COMMA) { msg = Tcl_ObjPrintf( "missing function argument at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; } else if (nodePtr[-1].lexeme == START) { TclNewLiteralStringObj(msg, "empty expression"); + errCode = "EMPTY"; } - } else { - if (lexeme == CLOSE_PAREN) { - TclNewLiteralStringObj(msg, "unbalanced close paren"); - } else if ((lexeme == COMMA) - && (nodePtr[-1].lexeme == OPEN_PAREN) - && (nodePtr[-2].lexeme == FUNCTION)) { - msg = Tcl_ObjPrintf( - "missing function argument at %s", mark); - scanned = 0; - insertMark = 1; - } + } else if (lexeme == CLOSE_PAREN) { + TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; + } else if ((lexeme == COMMA) + && (nodePtr[-1].lexeme == OPEN_PAREN) + && (nodePtr[-2].lexeme == FUNCTION)) { + msg = Tcl_ObjPrintf("missing function argument at %s", + mark); + scanned = 0; + insertMark = 1; + errCode = "UNBALANCED"; } if (msg == NULL) { msg = Tcl_ObjPrintf("missing operand at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; } goto error; } /* - * Here is where the tree comes together. At this point, we - * have a stack of incomplete trees corresponding to - * substrings that are incomplete expressions, followed by - * a complete tree corresponding to a substring that is itself - * a complete expression, followed by the binary operator we have - * just parsed. The incomplete trees can each be completed by - * adding a right operand. + * Here is where the tree comes together. At this point, we have a + * stack of incomplete trees corresponding to substrings that are + * incomplete expressions, followed by a complete tree + * corresponding to a substring that is itself a complete + * expression, followed by the binary operator we have just + * parsed. The incomplete trees can each be completed by adding a + * right operand. * * To illustrate with an example, when we parse the expression * "1+2*3-4" and we reach this point having just parsed the "-" * operator, we have these incomplete trees: START, "1+", and - * "2*". Next we have the complete subexpression "3". Last is - * the "-" we've just parsed. + * "2*". Next we have the complete subexpression "3". Last is the + * "-" we've just parsed. * - * The next step is to join our complete tree to an operator. - * The choice is governed by the precedence and associativity - * of the competing operators. If we connect it as the right - * operand of our most recent incomplete tree, we get a new - * complete tree, and we can repeat the process. The while - * loop following repeats this until precedence indicates it - * is time to join the complete tree as the left operand of - * the just parsed binary operator. + * The next step is to join our complete tree to an operator. The + * choice is governed by the precedence and associativity of the + * competing operators. If we connect it as the right operand of + * our most recent incomplete tree, we get a new complete tree, + * and we can repeat the process. The while loop following repeats + * this until precedence indicates it is time to join the complete + * tree as the left operand of the just parsed binary operator. * - * Continuing the example, the first pass through the loop - * will join "3" to "2*"; the next pass will join "2*3" to - * "1+". Then we'll exit the loop and join "1+2*3" to "-". - * When we return to parse another lexeme, our stack of - * incomplete trees is START and "1+2*3-". + * Continuing the example, the first pass through the loop will + * join "3" to "2*"; the next pass will join "2*3" to "1+". Then + * we'll exit the loop and join "1+2*3" to "-". When we return to + * parse another lexeme, our stack of incomplete trees is START + * and "1+2*3-". */ while (1) { @@ -1133,16 +1164,18 @@ ParseExpr( } if (incompletePtr->precedence == precedence) { + /* + * Right association rules for exponentiation. + */ - /* Right association rules for exponentiation. */ if (lexeme == EXPON) { break; } /* - * Special association rules for the conditional operators. - * The "?" and ":" operators have equal precedence, but - * must be linked up in sensible pairs. + * Special association rules for the conditional + * operators. The "?" and ":" operators have equal + * precedence, but must be linked up in sensible pairs. */ if ((incompletePtr->lexeme == QUESTION) @@ -1156,13 +1189,16 @@ ParseExpr( } } - /* Some special syntax checks... */ + /* + * Some special syntax checks... + */ /* Parens must balance */ if ((incompletePtr->lexeme == OPEN_PAREN) && (lexeme != CLOSE_PAREN)) { TclNewLiteralStringObj(msg, "unbalanced open paren"); parsePtr->errorType = TCL_PARSE_MISSING_PAREN; + errCode = "UNBALANCED"; goto error; } @@ -1170,10 +1206,10 @@ ParseExpr( if ((incompletePtr->lexeme == QUESTION) && (NotOperator(complete) || (nodes[complete].lexeme != COLON))) { - msg = Tcl_ObjPrintf( - "missing operator \":\" at %s", mark); + msg = Tcl_ObjPrintf("missing operator \":\" at %s", mark); scanned = 0; insertMark = 1; + errCode = "MISSING"; goto error; } @@ -1184,6 +1220,7 @@ ParseExpr( TclNewLiteralStringObj(msg, "unexpected operator \":\" " "without preceding \"?\""); + errCode = "SURPRISE"; goto error; } @@ -1203,9 +1240,9 @@ ParseExpr( } /* - * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations each - * make up a single operator. Force them to agree whether they - * have a constant expression. + * The QUESTION/COLON and FUNCTION/OPEN_PAREN combinations + * each make up a single operator. Force them to agree whether + * they have a constant expression. */ if ((incompletePtr->lexeme == QUESTION) @@ -1214,7 +1251,6 @@ ParseExpr( } if (incompletePtr->lexeme == START) { - /* * Completing the START tree indicates we're done. * Transfer the parse tree to the caller and return. @@ -1226,8 +1262,8 @@ ParseExpr( /* * With a right operand attached, last incomplete tree has - * become the complete tree. Pop it from the incomplete - * tree stack. + * become the complete tree. Pop it from the incomplete tree + * stack. */ complete = incomplete; @@ -1239,12 +1275,15 @@ ParseExpr( } } - /* More syntax checks... */ + /* + * More syntax checks... + */ /* Parens must balance. */ if (lexeme == CLOSE_PAREN) { if (incompletePtr->lexeme != OPEN_PAREN) { TclNewLiteralStringObj(msg, "unbalanced close paren"); + errCode = "UNBALANCED"; goto error; } } @@ -1255,6 +1294,7 @@ ParseExpr( || (incompletePtr[-1].lexeme != FUNCTION)) { TclNewLiteralStringObj(msg, "unexpected \",\" outside function argument list"); + errCode = "SURPRISE"; goto error; } } @@ -1263,25 +1303,32 @@ ParseExpr( if (IsOperator(complete) && (nodes[complete].lexeme == COLON)) { TclNewLiteralStringObj(msg, "unexpected operator \":\" without preceding \"?\""); + errCode = "SURPRISE"; goto error; } - /* Create no node for a CLOSE_PAREN lexeme. */ + /* + * Create no node for a CLOSE_PAREN lexeme. + */ + if (lexeme == CLOSE_PAREN) { break; } - /* Link complete tree as left operand of new node. */ + /* + * Link complete tree as left operand of new node. + */ + nodePtr->lexeme = lexeme; nodePtr->precedence = precedence; nodePtr->mark = MARK_LEFT; nodePtr->left = complete; - /* + /* * The COMMA operator cannot be optimized, since the function - * needs all of its arguments, and optimization would reduce - * the number. Other binary operators root constant expressions - * when both arguments are constant expressions. + * needs all of its arguments, and optimization would reduce the + * number. Other binary operators root constant expressions when + * both arguments are constant expressions. */ nodePtr->constant = (lexeme != COMMA); @@ -1296,9 +1343,9 @@ ParseExpr( } /* - * With a left operand attached and a right operand missing, - * the just-parsed binary operator is root of a new incomplete - * tree. Push it onto the stack of incomplete trees. + * With a left operand attached and a right operand missing, the + * just-parsed binary operator is root of a new incomplete tree. + * Push it onto the stack of incomplete trees. */ nodePtr->p.prev = incomplete; @@ -1313,34 +1360,36 @@ ParseExpr( numBytes -= scanned; } /* main parsing loop */ - error: - /* - * We only get here if there's been an error. - * Any errors that didn't get a suitable parsePtr->errorType, - * get recorded as syntax errors. + * We only get here if there's been an error. Any errors that didn't get a + * suitable parsePtr->errorType, get recorded as syntax errors. */ + error: if (parsePtr->errorType == TCL_PARSE_SUCCESS) { parsePtr->errorType = TCL_PARSE_SYNTAX; } - /* Free any partial parse tree we've built. */ + /* + * Free any partial parse tree we've built. + */ + if (nodes != NULL) { - ckfree((char*) nodes); + ckfree(nodes); } if (interp == NULL) { + /* + * Nowhere to report an error message, so just free it. + */ - /* Nowhere to report an error message, so just free it */ if (msg) { Tcl_DecrRefCount(msg); } } else { - /* - * Construct the complete error message. Start with the simple - * error message, pulled from the interp result if necessary... + * Construct the complete error message. Start with the simple error + * message, pulled from the interp result if necessary... */ if (msg == NULL) { @@ -1365,7 +1414,10 @@ ParseExpr( start + scanned, (start + scanned + limit > parsePtr->end) ? "" : "..."); - /* Next, append any postscript message. */ + /* + * Next, append any postscript message. + */ + if (post != NULL) { Tcl_AppendToObj(msg, ";\n", -1); Tcl_AppendObjToObj(msg, post); @@ -1373,12 +1425,19 @@ ParseExpr( } Tcl_SetObjResult(interp, msg); - /* Finally, place context information in the errorInfo. */ + /* + * Finally, place context information in the errorInfo. + */ + numBytes = parsePtr->end - parsePtr->string; Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( "\n (parsing expression \"%.*s%s\")", (numBytes < limit) ? numBytes : limit - 3, parsePtr->string, (numBytes < limit) ? "" : "...")); + if (errCode) { + Tcl_SetErrorCode(interp, "TCL", "PARSE", "EXPR", errCode, + subErrCode, NULL); + } } return TCL_ERROR; @@ -1392,10 +1451,10 @@ ParseExpr( * Given a string, the numBytes bytes starting at start, and an OpNode * tree and Tcl_Token array created by passing that same string to * ParseExpr(), this function writes into *parsePtr the sequence of - * Tcl_Tokens needed so to satisfy the historical interface provided - * by Tcl_ParseExpr(). Note that this routine exists only for the sake - * of the public Tcl_ParseExpr() routine. It is not used by Tcl itself - * at all. + * Tcl_Tokens needed so to satisfy the historical interface provided by + * Tcl_ParseExpr(). Note that this routine exists only for the sake of + * the public Tcl_ParseExpr() routine. It is not used by Tcl itself at + * all. * * Results: * None. @@ -1431,7 +1490,10 @@ ConvertTreeToTokens( nodePtr->mark++; - /* Handle next child node or leaf */ + /* + * Handle next child node or leaf. + */ + switch (next) { case OT_EMPTY: @@ -1440,12 +1502,18 @@ ConvertTreeToTokens( case OT_LITERAL: - /* Skip any white space that comes before the literal */ + /* + * Skip any white space that comes before the literal. + */ + scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; + start += scanned; numBytes -= scanned; - /* Reparse the literal to get pointers into source string */ + /* + * Reparse the literal to get pointers into source string. + */ + scanned = ParseLexeme(start, numBytes, &lexeme, NULL); TclGrowParseTokenArray(parsePtr, 2); @@ -1460,32 +1528,30 @@ ConvertTreeToTokens( subExprTokenPtr[1].numComponents = 0; parsePtr->numTokens += 2; - start +=scanned; + start += scanned; numBytes -= scanned; break; case OT_TOKENS: { - /* - * tokenPtr points to a token sequence that came from parsing - * a Tcl word. A Tcl word is made up of a sequence of one or - * more elements. When the word is only a single element, it's - * been the historical practice to replace the TCL_TOKEN_WORD - * token directly with a TCL_TOKEN_SUB_EXPR token. However, - * when the word has multiple elements, a TCL_TOKEN_WORD token - * is kept as a grouping device so that TCL_TOKEN_SUB_EXPR - * always has only one element. Wise or not, these are the - * rules the Tcl expr parser has followed, and for the sake - * of those few callers of Tcl_ParseExpr() we do not change - * them now. Internally, we can do better. + * tokenPtr points to a token sequence that came from parsing a + * Tcl word. A Tcl word is made up of a sequence of one or more + * elements. When the word is only a single element, it's been the + * historical practice to replace the TCL_TOKEN_WORD token + * directly with a TCL_TOKEN_SUB_EXPR token. However, when the + * word has multiple elements, a TCL_TOKEN_WORD token is kept as a + * grouping device so that TCL_TOKEN_SUB_EXPR always has only one + * element. Wise or not, these are the rules the Tcl expr parser + * has followed, and for the sake of those few callers of + * Tcl_ParseExpr() we do not change them now. Internally, we can + * do better. */ - + int toCopy = tokenPtr->numComponents + 1; if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { - /* - * Single element word. Copy tokens and convert the leading + * Single element word. Copy tokens and convert the leading * token to TCL_TOKEN_SUB_EXPR. */ @@ -1496,11 +1562,10 @@ ConvertTreeToTokens( subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; parsePtr->numTokens += toCopy; } else { - - /* - * Multiple element word. Create a TCL_TOKEN_SUB_EXPR - * token to lead, with fields initialized from the leading - * token, then copy entire set of word tokens. + /* + * Multiple element word. Create a TCL_TOKEN_SUB_EXPR token to + * lead, with fields initialized from the leading token, then + * copy entire set of word tokens. */ TclGrowParseTokenArray(parsePtr, toCopy+1); @@ -1515,7 +1580,7 @@ ConvertTreeToTokens( } scanned = tokenPtr->start + tokenPtr->size - start; - start +=scanned; + start += scanned; numBytes -= scanned; tokenPtr += toCopy; break; @@ -1523,21 +1588,30 @@ ConvertTreeToTokens( default: - /* Advance to the child node, which is an operator. */ + /* + * Advance to the child node, which is an operator. + */ + nodePtr = nodes + next; - /* Skip any white space that comes before the subexpression */ + /* + * Skip any white space that comes before the subexpression. + */ + scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; + start += scanned; numBytes -= scanned; - /* Generate tokens for the operator / subexpression... */ + /* + * Generate tokens for the operator / subexpression... + */ + switch (nodePtr->lexeme) { case OPEN_PAREN: case COMMA: case COLON: - /* + /* * Historical practice has been to have no Tcl_Tokens for * these operators. */ @@ -1548,16 +1622,16 @@ ConvertTreeToTokens( /* * Remember the index of the last subexpression we were - * working on -- that of our parent. We'll stack it later. + * working on -- that of our parent. We'll stack it later. */ parentIdx = subExprTokenIdx; /* * Verify space for the two leading Tcl_Tokens representing - * the subexpression rooted by this operator. The first - * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second - * of type TCL_TOKEN_OPERATOR. + * the subexpression rooted by this operator. The first + * Tcl_Token will be of type TCL_TOKEN_SUB_EXPR; the second of + * type TCL_TOKEN_OPERATOR. */ TclGrowParseTokenArray(parsePtr, 2); @@ -1576,7 +1650,7 @@ ConvertTreeToTokens( /* * Eventually, we know that the numComponents field of the - * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means + * Tcl_Token of type TCL_TOKEN_OPERATOR will be 0. This means * we can make other use of this field for now to track the * stack of subexpressions we have pending. */ @@ -1598,9 +1672,12 @@ ConvertTreeToTokens( case MARK_RIGHT: next = nodePtr->right; - /* Skip any white space that comes before the operator */ + /* + * Skip any white space that comes before the operator. + */ + scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; + start += scanned; numBytes -= scanned; /* @@ -1615,7 +1692,10 @@ ConvertTreeToTokens( case COMMA: case COLON: - /* No tokens for these lexemes -> nothing to do. */ + /* + * No tokens for these lexemes -> nothing to do. + */ + break; default: @@ -1631,7 +1711,7 @@ ConvertTreeToTokens( break; } - start +=scanned; + start += scanned; numBytes -= scanned; break; @@ -1650,21 +1730,24 @@ ConvertTreeToTokens( case OPEN_PAREN: - /* Skip past matching close paren. */ + /* + * Skip past matching close paren. + */ + scanned = TclParseAllWhiteSpace(start, numBytes); - start +=scanned; + start += scanned; numBytes -= scanned; scanned = ParseLexeme(start, numBytes, &lexeme, NULL); - start +=scanned; + start += scanned; numBytes -= scanned; break; - default: { + default: /* * Before we leave this node/operator/subexpression for the * last time, finish up its tokens.... - * + * * Our current position scanning the string is where the * substring for the subexpression ends. */ @@ -1674,7 +1757,7 @@ ConvertTreeToTokens( /* * All the Tcl_Tokens allocated and filled belong to - * this subexpresion. The first token is the leading + * this subexpresion. The first token is the leading * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) * are its components. */ @@ -1693,9 +1776,11 @@ ConvertTreeToTokens( subExprTokenIdx = parentIdx; break; } - } - /* Since we're returning to parent, skip child handling code. */ + /* + * Since we're returning to parent, skip child handling code. + */ + nodePtr = nodes + nodePtr->p.parent; goto router; } @@ -1740,19 +1825,18 @@ Tcl_ParseExpr( * information in the structure is ignored. */ { int code; - OpNode *opTree = NULL; /* Will point to the tree of operators */ - Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ - Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse *exprParsePtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); - /* Holds the Tcl_Tokens of substitutions */ + OpNode *opTree = NULL; /* Will point to the tree of operators. */ + Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals. */ + Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names. */ + Tcl_Parse *exprParsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); + /* Holds the Tcl_Tokens of substitutions. */ if (numBytes < 0) { numBytes = (start ? strlen(start) : 0); } - code = ParseExpr(interp, start, numBytes, &opTree, litList, - funcList, exprParsePtr, 1 /* parseOnly */); + code = ParseExpr(interp, start, numBytes, &opTree, litList, funcList, + exprParsePtr, 1 /* parseOnly */); Tcl_DecrRefCount(funcList); Tcl_DecrRefCount(litList); @@ -1767,7 +1851,7 @@ Tcl_ParseExpr( Tcl_FreeParse(exprParsePtr); TclStackFree(interp, exprParsePtr); - ckfree((char *) opTree); + ckfree(opTree); return code; } @@ -1807,7 +1891,7 @@ ParseLexeme( *lexemePtr = END; return 0; } - byte = (unsigned char)(*start); + byte = UCHAR(*start); if (byte < sizeof(Lexeme) && Lexeme[byte] != 0) { *lexemePtr = Lexeme[byte]; return 1; @@ -1884,11 +1968,10 @@ ParseLexeme( case 'i': if ((numBytes > 1) && (start[1] == 'n') && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { - /* - * Must make this check so we can tell the difference between - * the "in" operator and the "int" function name and the - * "infinity" numeric value. + * Must make this check so we can tell the difference between the + * "in" operator and the "int" function name and the "infinity" + * numeric value. */ *lexemePtr = IN_LIST; @@ -1944,6 +2027,7 @@ ParseLexeme( */ if (literal->typePtr == &tclDoubleType) { const char *p = start; + while (p < end) { if (!isalnum(UCHAR(*p++))) { /* @@ -1963,6 +2047,7 @@ ParseLexeme( */ goto number; } + /* * Otherwise, fall through and parse the whole as a bareword. */ @@ -1973,6 +2058,7 @@ ParseLexeme( scanned = Tcl_UtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; scanned = Tcl_UtfToUniChar(utfBytes, &ch); @@ -1990,6 +2076,7 @@ ParseLexeme( scanned = Tcl_UtfToUniChar(end, &ch); } else { char utfBytes[TCL_UTF_MAX]; + memcpy(utfBytes, end, (size_t) numBytes); utfBytes[numBytes] = '\0'; scanned = Tcl_UtfToUniChar(utfBytes, &ch); @@ -2011,7 +2098,7 @@ ParseLexeme( * TclCompileExpr -- * * This procedure compiles a string containing a Tcl expression into Tcl - * bytecodes. + * bytecodes. * * Results: * None. @@ -2028,21 +2115,22 @@ TclCompileExpr( const char *script, /* The source script to compile. */ int numBytes, /* Number of bytes in script. */ CompileEnv *envPtr, /* Holds resulting instructions. */ - int optimize) /* 0 for one-off expressions */ + int optimize) /* 0 for one-off expressions. */ { OpNode *opTree = NULL; /* Will point to the tree of operators */ Tcl_Obj *litList = Tcl_NewObj(); /* List to hold the literals */ Tcl_Obj *funcList = Tcl_NewObj(); /* List to hold the functon names*/ - Tcl_Parse *parsePtr = - (Tcl_Parse *) TclStackAlloc(interp, sizeof(Tcl_Parse)); + Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); /* Holds the Tcl_Tokens of substitutions */ int code = ParseExpr(interp, script, numBytes, &opTree, litList, funcList, parsePtr, 0 /* parseOnly */); if (code == TCL_OK) { + /* + * Valid parse; compile the tree. + */ - /* Valid parse; compile the tree. */ int objc; Tcl_Obj *const *litObjv; Tcl_Obj **funcObjv; @@ -2063,7 +2151,7 @@ TclCompileExpr( TclStackFree(interp, parsePtr); Tcl_DecrRefCount(funcList); Tcl_DecrRefCount(litList); - ckfree((char *) opTree); + ckfree(opTree); } /* @@ -2095,6 +2183,7 @@ ExecConstantExprTree( ByteCode *byteCodePtr; int code; Tcl_Obj *byteCodeObj = Tcl_NewObj(); + NRE_callback *rootPtr = TOP_CB(interp); /* * Note we are compiling an expression with literal arguments. This means @@ -2102,7 +2191,7 @@ ExecConstantExprTree( * bytecode, so there's no need to tend to TIP 280 issues. */ - envPtr = (CompileEnv *) TclStackAlloc(interp, sizeof(CompileEnv)); + envPtr = TclStackAlloc(interp, sizeof(CompileEnv)); TclInitCompileEnv(interp, envPtr, NULL, 0, NULL, 0); CompileExprTree(interp, nodes, index, litObjvPtr, NULL, NULL, envPtr, 0 /* optimize */); @@ -2111,8 +2200,9 @@ ExecConstantExprTree( TclInitByteCodeObj(byteCodeObj, envPtr); TclFreeCompileEnv(envPtr); TclStackFree(interp, envPtr); - byteCodePtr = (ByteCode *) byteCodeObj->internalRep.twoPtrValue.ptr1; - code = TclExecuteByteCode(interp, byteCodePtr); + byteCodePtr = byteCodeObj->internalRep.twoPtrValue.ptr1; + TclNRExecuteByteCode(interp, byteCodePtr); + code = TclNRRunCallbacks(interp, TCL_OK, rootPtr); Tcl_DecrRefCount(byteCodeObj); return code; } @@ -2121,20 +2211,20 @@ ExecConstantExprTree( *---------------------------------------------------------------------- * * CompileExprTree -- - * Compiles and writes to envPtr instructions for the subexpression - * tree at index in the nodes array. (*litObjvPtr) must point to the - * proper location in a corresponding literals list. Likewise, when - * non-NULL, funcObjv and tokenPtr must point into matching arrays of - * function names and Tcl_Token's derived from earlier call to - * ParseExpr(). When optimize is true, any constant subexpressions - * will be precomputed. + * + * Compiles and writes to envPtr instructions for the subexpression tree + * at index in the nodes array. (*litObjvPtr) must point to the proper + * location in a corresponding literals list. Likewise, when non-NULL, + * funcObjv and tokenPtr must point into matching arrays of function + * names and Tcl_Token's derived from earlier call to ParseExpr(). When + * optimize is true, any constant subexpressions will be precomputed. * * Results: * None. * * Side effects: * Adds instructions to envPtr to evaluate the expression at runtime. - * Consumes subtree of nodes rooted at index. Advances the pointer + * Consumes subtree of nodes rooted at index. Advances the pointer * *litObjvPtr. * *---------------------------------------------------------------------- @@ -2164,30 +2254,8 @@ CompileExprTree( if (nodePtr->mark == MARK_LEFT) { next = nodePtr->left; - switch (nodePtr->lexeme) { - case QUESTION: - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; + if (nodePtr->lexeme == QUESTION) { convert = 1; - break; - case AND: - case OR: - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = (JumpList *) TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - break; } } else if (nodePtr->mark == MARK_RIGHT) { next = nodePtr->right; @@ -2199,20 +2267,20 @@ CompileExprTree( int length; Tcl_DStringInit(&cmdName); - Tcl_DStringAppend(&cmdName, "tcl::mathfunc::", -1); + TclDStringAppendLiteral(&cmdName, "tcl::mathfunc::"); p = TclGetStringFromObj(*funcObjv, &length); funcObjv++; Tcl_DStringAppend(&cmdName, p, length); - TclEmitPush(TclRegisterNewNSLiteral(envPtr, + TclEmitPush(TclRegisterNewCmdLiteral(envPtr, Tcl_DStringValue(&cmdName), Tcl_DStringLength(&cmdName)), envPtr); Tcl_DStringFree(&cmdName); /* * Start a count of the number of words in this function - * command invocation. In case there's already a count - * in progress (nested functions), save it in our unused - * "left" field for restoring later. + * command invocation. In case there's already a count in + * progress (nested functions), save it in our unused "left" + * field for restoring later. */ nodePtr->left = numWords; @@ -2220,24 +2288,35 @@ CompileExprTree( break; } case QUESTION: - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump); break; case COLON: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->jump)); - envPtr->currStackDepth = jumpPtr->depth; - jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart); - jumpPtr->convert = convert; + &jumpPtr->jump); + TclAdjustStackDepth(-1, envPtr); + if (convert) { + jumpPtr->jump.jumpType = TCL_TRUE_JUMP; + } convert = 1; break; case AND: - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &(jumpPtr->jump)); - break; case OR: - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &(jumpPtr->jump)); + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) + ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, &jumpPtr->jump); break; } } else { + int pc1, pc2, target; + switch (nodePtr->lexeme) { case START: case QUESTION: @@ -2250,69 +2329,72 @@ CompileExprTree( /* do nothing */ break; case FUNCTION: - /* - * Use the numWords count we've kept to invoke the - * function command with the correct number of arguments. + * Use the numWords count we've kept to invoke the function + * command with the correct number of arguments. */ - + if (numWords < 255) { TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr); } else { TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr); } - /* Restore any saved numWords value. */ + /* + * Restore any saved numWords value. + */ + numWords = nodePtr->left; convert = 1; break; case COMMA: + /* + * Each comma implies another function argument. + */ - /* Each comma implies another function argument. */ numWords++; break; case COLON: - if (TclFixupForwardJump(envPtr, &(jumpPtr->next->jump), - (envPtr->codeNext - envPtr->codeStart) - - jumpPtr->next->jump.codeOffset, 127)) { - jumpPtr->offset += 3; + CLANG_ASSERT(jumpPtr); + if (jumpPtr->jump.jumpType == TCL_TRUE_JUMP) { + jumpPtr->jump.jumpType = TCL_UNCONDITIONAL_JUMP; + convert = 1; + } + target = jumpPtr->jump.codeOffset + 2; + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + target += 3; } - TclFixupForwardJump(envPtr, &(jumpPtr->jump), - jumpPtr->offset - jumpPtr->jump.codeOffset, 127); - convert |= jumpPtr->convert; - envPtr->currStackDepth = jumpPtr->depth + 1; freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); + TclFixupForwardJump(envPtr, &jumpPtr->jump, + target - jumpPtr->jump.codeOffset, 127); + freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); break; case AND: case OR: - TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) - ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, - &(jumpPtr->next->jump)); + CLANG_ASSERT(jumpPtr); + pc1 = CurrentOffset(envPtr); + TclEmitInstInt1((nodePtr->lexeme == AND) ? INST_JUMP_FALSE1 + : INST_JUMP_TRUE1, 0, envPtr); TclEmitPush(TclRegisterNewLiteral(envPtr, (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &(jumpPtr->next->next->jump)); - TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->jump), 127); - if (TclFixupForwardJumpToHere(envPtr, &(jumpPtr->jump), 127)) { - jumpPtr->next->next->jump.codeOffset += 3; + pc2 = CurrentOffset(envPtr); + TclEmitInstInt1(INST_JUMP1, 0, envPtr); + TclAdjustStackDepth(-1, envPtr); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc1, + envPtr->codeStart + pc1 + 1); + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + pc2 += 3; } TclEmitPush(TclRegisterNewLiteral(envPtr, (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); - TclFixupForwardJumpToHere(envPtr, &(jumpPtr->next->next->jump), - 127); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc2, + envPtr->codeStart + pc2 + 1); convert = 0; - envPtr->currStackDepth = jumpPtr->depth + 1; - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); @@ -2323,8 +2405,8 @@ CompileExprTree( break; } if (nodePtr == rootPtr) { - /* We're done */ + return; } nodePtr = nodes + nodePtr->p.parent; @@ -2341,14 +2423,11 @@ CompileExprTree( Tcl_Obj *literal = *litObjv; if (optimize) { - int length, index; + int length; const char *bytes = TclGetStringFromObj(literal, &length); - LiteralEntry *lePtr; - Tcl_Obj *objPtr; - - index = TclRegisterNewLiteral(envPtr, bytes, length); - lePtr = envPtr->literalArrayPtr + index; - objPtr = lePtr->objPtr; + int index = TclRegisterNewLiteral(envPtr, bytes, length); + Tcl_Obj *objPtr = TclFetchLiteral(envPtr, index); + if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { /* * Would like to do this: @@ -2359,10 +2438,10 @@ CompileExprTree( * * However, the design of the "global" and "local" * LiteralTable does not permit the value of lePtr->objPtr - * to change. So rather than replace lePtr->objPtr, we - * do surgery to transfer our desired intrep into it. - * + * to change. So rather than replace lePtr->objPtr, we do + * surgery to transfer our desired intrep into it. */ + objPtr->typePtr = literal->typePtr; objPtr->internalRep = literal->internalRep; literal->typePtr = NULL; @@ -2370,30 +2449,57 @@ CompileExprTree( TclEmitPush(index, envPtr); } else { /* - * When optimize==0, we know the expression is a one-off - * and there's nothing to be gained from sharing literals - * when they won't live long, and the copies we have already - * have an appropriate intrep. In this case, skip literal + * When optimize==0, we know the expression is a one-off and + * there's nothing to be gained from sharing literals when + * they won't live long, and the copies we have already have + * an appropriate intrep. In this case, skip literal * registration that would enable sharing, and use the routine * that preserves intreps. */ + TclEmitPush(TclAddLiteralObj(envPtr, literal, NULL), envPtr); } (*litObjvPtr)++; break; } case OT_TOKENS: - TclCompileTokens(interp, tokenPtr+1, tokenPtr->numComponents, - envPtr); + CompileTokens(envPtr, tokenPtr, interp); tokenPtr += tokenPtr->numComponents + 1; break; default: if (optimize && nodes[next].constant) { Tcl_InterpState save = Tcl_SaveInterpState(interp, TCL_OK); + if (ExecConstantExprTree(interp, nodes, next, litObjvPtr) == TCL_OK) { - TclEmitPush(TclAddLiteralObj(envPtr, - Tcl_GetObjResult(interp), NULL), envPtr); + int index; + Tcl_Obj *objPtr = Tcl_GetObjResult(interp); + + /* + * Don't generate a string rep, but if we have one + * already, then use it to share via the literal table. + */ + + if (objPtr->bytes) { + Tcl_Obj *tableValue; + + index = TclRegisterNewLiteral(envPtr, objPtr->bytes, + objPtr->length); + tableValue = TclFetchLiteral(envPtr, index); + if ((tableValue->typePtr == NULL) && + (objPtr->typePtr != NULL)) { + /* + * Same intrep surgery as for OT_LITERAL. + */ + + tableValue->typePtr = objPtr->typePtr; + tableValue->internalRep = objPtr->internalRep; + objPtr->typePtr = NULL; + } + } else { + index = TclAddLiteralObj(envPtr, objPtr, NULL); + } + TclEmitPush(index, envPtr); } else { TclCompileSyntaxError(interp, envPtr); } @@ -2410,6 +2516,7 @@ CompileExprTree( *---------------------------------------------------------------------- * * TclSingleOpCmd -- + * * Implements the commands: ~, !, <<, >>, %, !=, ne, in, ni * in the ::tcl::mathop namespace. These commands have no * extension to arbitrary arguments; they accept only exactly one @@ -2419,7 +2526,7 @@ CompileExprTree( * A standard Tcl return code and result left in interp. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -2431,12 +2538,12 @@ TclSingleOpCmd( int objc, Tcl_Obj *const objv[]) { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + TclOpCmdClientData *occdPtr = clientData; unsigned char lexeme; OpNode nodes[2]; Tcl_Obj *const *litObjv = objv + 1; - if (objc != 1+occdPtr->i.numArgs) { + if (objc != 1 + occdPtr->i.numArgs) { Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); return TCL_ERROR; } @@ -2462,16 +2569,17 @@ TclSingleOpCmd( *---------------------------------------------------------------------- * * TclSortingOpCmd -- - * Implements the commands: <, <=, >, >=, ==, eq - * in the ::tcl::mathop namespace. These commands are defined for + * Implements the commands: + * <, <=, >, >=, ==, eq + * in the ::tcl::mathop namespace. These commands are defined for * arbitrary number of arguments by computing the AND of the base - * operator applied to all neighbor argument pairs. + * operator applied to all neighbor argument pairs. * * Results: * A standard Tcl return code and result left in interp. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -2488,11 +2596,10 @@ TclSortingOpCmd( if (objc < 3) { Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); } else { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; - Tcl_Obj **litObjv = (Tcl_Obj **) TclStackAlloc(interp, - 2*(objc-2)*sizeof(Tcl_Obj *)); - OpNode *nodes = (OpNode *) TclStackAlloc(interp, - 2*(objc-2)*sizeof(OpNode)); + TclOpCmdClientData *occdPtr = clientData; + Tcl_Obj **litObjv = TclStackAlloc(interp, + 2 * (objc-2) * sizeof(Tcl_Obj *)); + OpNode *nodes = TclStackAlloc(interp, 2 * (objc-2) * sizeof(OpNode)); unsigned char lexeme; int i, lastAnd = 1; Tcl_Obj *const *litObjPtrPtr = litObjv; @@ -2543,16 +2650,16 @@ TclSortingOpCmd( * * TclVariadicOpCmd -- * Implements the commands: +, *, &, |, ^, ** - * in the ::tcl::mathop namespace. These commands are defined for + * in the ::tcl::mathop namespace. These commands are defined for * arbitrary number of arguments by repeatedly applying the base - * operator with suitable associative rules. When fewer than two + * operator with suitable associative rules. When fewer than two * arguments are provided, suitable identity values are returned. * * Results: * A standard Tcl return code and result left in interp. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -2564,7 +2671,7 @@ TclVariadicOpCmd( int objc, Tcl_Obj *const objv[]) { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + TclOpCmdClientData *occdPtr = clientData; unsigned char lexeme; int code; @@ -2619,14 +2726,13 @@ TclVariadicOpCmd( return code; } else { Tcl_Obj *const *litObjv = objv + 1; - OpNode *nodes = (OpNode *) TclStackAlloc(interp, - (objc-1)*sizeof(OpNode)); + OpNode *nodes = TclStackAlloc(interp, (objc-1) * sizeof(OpNode)); int i, lastOp = OT_LITERAL; nodes[0].lexeme = START; nodes[0].mark = MARK_RIGHT; if (lexeme == EXPON) { - for (i=objc-2; i>0; i-- ) { + for (i=objc-2; i>0; i--) { nodes[i].lexeme = lexeme; nodes[i].mark = MARK_LEFT; nodes[i].left = OT_LITERAL; @@ -2637,7 +2743,7 @@ TclVariadicOpCmd( lastOp = i; } } else { - for (i=1; i<objc-1; i++ ) { + for (i=1; i<objc-1; i++) { nodes[i].lexeme = lexeme; nodes[i].mark = MARK_LEFT; nodes[i].left = lastOp; @@ -2654,7 +2760,6 @@ TclVariadicOpCmd( code = ExecConstantExprTree(interp, nodes, 0, &litObjv); TclStackFree(interp, nodes); - return code; } } @@ -2664,16 +2769,16 @@ TclVariadicOpCmd( * * TclNoIdentOpCmd -- * Implements the commands: -, / - * in the ::tcl::mathop namespace. These commands are defined for - * arbitrary non-zero number of arguments by repeatedly applying - * the base operator with suitable associative rules. When no - * arguments are provided, an error is raised. + * in the ::tcl::mathop namespace. These commands are defined for + * arbitrary non-zero number of arguments by repeatedly applying the base + * operator with suitable associative rules. When no arguments are + * provided, an error is raised. * * Results: * A standard Tcl return code and result left in interp. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -2685,7 +2790,8 @@ TclNoIdentOpCmd( int objc, Tcl_Obj *const objv[]) { - TclOpCmdClientData *occdPtr = (TclOpCmdClientData *)clientData; + TclOpCmdClientData *occdPtr = clientData; + if (objc < 2) { Tcl_WrongNumArgs(interp, 1, objv, occdPtr->expected); return TCL_ERROR; |