diff options
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r-- | generic/tclCompExpr.c | 196 |
1 files changed, 86 insertions, 110 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 2a48117..4390282 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -365,7 +365,7 @@ static const unsigned char prec[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, + 0, /* Unary operator lexemes */ PREC_UNARY, /* UNARY_PLUS */ PREC_UNARY, /* UNARY_MINUS */ @@ -420,7 +420,7 @@ static const unsigned char instruction[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, + 0, /* Unary operator lexemes */ INST_UPLUS, /* UNARY_PLUS */ INST_UMINUS, /* UNARY_MINUS */ @@ -488,15 +488,8 @@ static const unsigned char Lexeme[] = { typedef struct JumpList { JumpFixup jump; /* Pass this argument to matching calls of - * TclEmitForwardJump() and + * TclEmitForwardJump() and * TclFixupForwardJump(). */ - int depth; /* Remember the currStackDepth of the - * CompileEnv here. */ - int offset; /* Data used to compute jump lengths to pass - * to TclFixupForwardJump() */ - int convert; /* Temporary storage used to compute whether - * numeric conversion will be needed following - * the operator we're compiling. */ struct JumpList *next; /* Point to next item on the stack */ } JumpList; @@ -571,13 +564,13 @@ ParseExpr( { OpNode *nodes = NULL; /* Pointer to the OpNode storage array where * we build the parse tree. */ - int nodesAvailable = 64; /* Initial size of the storage array. This + unsigned int nodesAvailable = 64; /* Initial size of the storage array. This * value establishes a minimum tree memory * cost of only about 1 kibyte, and is large * enough for most expressions to parse with * no need for array growth and * reallocation. */ - int nodesUsed = 0; /* Number of OpNodes filled. */ + unsigned int nodesUsed = 0; /* Number of OpNodes filled. */ int scanned = 0; /* Capture number of byte scanned by parsing * routines. */ int lastParsed; /* Stores info about what the lexeme parsed @@ -669,11 +662,13 @@ ParseExpr( */ if (nodesUsed >= nodesAvailable) { - int size = nodesUsed * 2; - OpNode *newPtr; + unsigned int size = nodesUsed * 2; + OpNode *newPtr = NULL; do { + if (size <= UINT_MAX/sizeof(OpNode)) { newPtr = attemptckrealloc(nodes, size * sizeof(OpNode)); + } } while ((newPtr == NULL) && ((size -= (size - nodesUsed) / 2) > nodesUsed)); if (newPtr == NULL) { @@ -845,7 +840,7 @@ ParseExpr( switch (lexeme) { case NUMBER: - case BOOLEAN: + case BOOLEAN: /* * TODO: Consider using a dict or hash to collapse all * duplicate literals into a single representative value. @@ -868,7 +863,7 @@ ParseExpr( start += scanned; numBytes -= scanned; continue; - + default: break; } @@ -1331,7 +1326,7 @@ ParseExpr( nodePtr->mark = MARK_LEFT; nodePtr->left = complete; - /* + /* * The COMMA operator cannot be optimized, since the function * needs all of its arguments, and optimization would reduce the * number. Other binary operators root constant expressions when @@ -1553,7 +1548,7 @@ ConvertTreeToTokens( * Tcl_ParseExpr() we do not change them now. Internally, we can * do better. */ - + int toCopy = tokenPtr->numComponents + 1; if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) { @@ -1569,7 +1564,7 @@ ConvertTreeToTokens( subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR; parsePtr->numTokens += toCopy; } else { - /* + /* * Multiple element word. Create a TCL_TOKEN_SUB_EXPR token to * lead, with fields initialized from the leading token, then * copy entire set of word tokens. @@ -1618,7 +1613,7 @@ ConvertTreeToTokens( case COMMA: case COLON: - /* + /* * Historical practice has been to have no Tcl_Tokens for * these operators. */ @@ -1754,7 +1749,7 @@ ConvertTreeToTokens( /* * Before we leave this node/operator/subexpression for the * last time, finish up its tokens.... - * + * * Our current position scanning the string is where the * substring for the subexpression ends. */ @@ -1974,7 +1969,7 @@ ParseLexeme( case 'i': if ((numBytes > 1) && (start[1] == 'n') - && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + && ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) { /* * Must make this check so we can tell the difference between the * "in" operator and the "int" function name and the "infinity" @@ -1988,14 +1983,15 @@ ParseLexeme( case 'e': if ((numBytes > 1) && (start[1] == 'q') - && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + && ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) { *lexemePtr = STREQ; return 2; } break; case 'n': - if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) { + if ((numBytes > 1) + && ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) { switch (start[1]) { case 'e': *lexemePtr = STRNEQ; @@ -2010,9 +2006,8 @@ ParseLexeme( literal = Tcl_NewObj(); if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end, TCL_PARSE_NO_WHITESPACE) == TCL_OK) { - if (end < start + numBytes && !isalnum(UCHAR(*end)) - && UCHAR(*end) != '_') { - + if (end < start + numBytes && !TclIsBareword(*end)) { + number: TclInitStringRep(literal, start, end-start); *lexemePtr = NUMBER; @@ -2036,9 +2031,9 @@ ParseLexeme( const char *p = start; while (p < end) { - if (!isalnum(UCHAR(*p++))) { + if (!TclIsBareword(*p++)) { /* - * The number has non-bareword characters, so we + * The number has non-bareword characters, so we * must treat it as a number. */ goto number; @@ -2061,33 +2056,30 @@ ParseLexeme( } } - if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = Tcl_UtfToUniChar(start, &ch); - } else { - char utfBytes[TCL_UTF_MAX]; + /* + * We reject leading underscores in bareword. No sensible reason why. + * Might be inspired by reserved identifier rules in C, which of course + * have no direct relevance here. + */ - memcpy(utfBytes, start, (size_t) numBytes); - utfBytes[numBytes] = '\0'; - scanned = Tcl_UtfToUniChar(utfBytes, &ch); - } - if (!isalnum(UCHAR(ch))) { - *lexemePtr = INVALID; - Tcl_DecrRefCount(literal); - return scanned; - } - end = start; - while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) { - end += scanned; - numBytes -= scanned; - if (Tcl_UtfCharComplete(end, numBytes)) { - scanned = Tcl_UtfToUniChar(end, &ch); + if (!TclIsBareword(*start) || *start == '_') { + if (Tcl_UtfCharComplete(start, numBytes)) { + scanned = Tcl_UtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; - memcpy(utfBytes, end, (size_t) numBytes); + memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; scanned = Tcl_UtfToUniChar(utfBytes, &ch); } + *lexemePtr = INVALID; + Tcl_DecrRefCount(literal); + return scanned; + } + end = start; + while (numBytes && TclIsBareword(*end)) { + end += 1; + numBytes -= 1; } *lexemePtr = BAREWORD; if (literalPtr) { @@ -2105,7 +2097,7 @@ ParseLexeme( * TclCompileExpr -- * * This procedure compiles a string containing a Tcl expression into Tcl - * bytecodes. + * bytecodes. * * Results: * None. @@ -2261,30 +2253,8 @@ CompileExprTree( if (nodePtr->mark == MARK_LEFT) { next = nodePtr->left; - switch (nodePtr->lexeme) { - case QUESTION: - newJump = TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; + if (nodePtr->lexeme == QUESTION) { convert = 1; - break; - case AND: - case OR: - newJump = TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - newJump = TclStackAlloc(interp, sizeof(JumpList)); - newJump->next = jumpPtr; - jumpPtr = newJump; - jumpPtr->depth = envPtr->currStackDepth; - break; } } else if (nodePtr->mark == MARK_RIGHT) { next = nodePtr->right; @@ -2317,25 +2287,35 @@ CompileExprTree( break; } case QUESTION: + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump); break; case COLON: - CLANG_ASSERT(jumpPtr); + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &jumpPtr->next->jump); - envPtr->currStackDepth = jumpPtr->depth; - jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart); - jumpPtr->convert = convert; + &jumpPtr->jump); + TclAdjustStackDepth(-1, envPtr); + if (convert) { + jumpPtr->jump.jumpType = TCL_TRUE_JUMP; + } convert = 1; break; case AND: - TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump); - break; case OR: - TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &jumpPtr->jump); + newJump = TclStackAlloc(interp, sizeof(JumpList)); + newJump->next = jumpPtr; + jumpPtr = newJump; + TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) + ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, &jumpPtr->jump); break; } } else { + int pc1, pc2, target; + switch (nodePtr->lexeme) { case START: case QUESTION: @@ -2352,11 +2332,11 @@ CompileExprTree( * Use the numWords count we've kept to invoke the function * command with the correct number of arguments. */ - + if (numWords < 255) { - TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr); + TclEmitInvoke(envPtr, INST_INVOKE_STK1, numWords); } else { - TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr); + TclEmitInvoke(envPtr, INST_INVOKE_STK4, numWords); } /* @@ -2375,18 +2355,20 @@ CompileExprTree( break; case COLON: CLANG_ASSERT(jumpPtr); - if (TclFixupForwardJump(envPtr, &jumpPtr->next->jump, - (envPtr->codeNext - envPtr->codeStart) - - jumpPtr->next->jump.codeOffset, 127)) { - jumpPtr->offset += 3; + if (jumpPtr->jump.jumpType == TCL_TRUE_JUMP) { + jumpPtr->jump.jumpType = TCL_UNCONDITIONAL_JUMP; + convert = 1; + } + target = jumpPtr->jump.codeOffset + 2; + if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { + target += 3; } - TclFixupForwardJump(envPtr, &jumpPtr->jump, - jumpPtr->offset - jumpPtr->jump.codeOffset, 127); - convert |= jumpPtr->convert; - envPtr->currStackDepth = jumpPtr->depth + 1; freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); + TclFixupForwardJump(envPtr, &jumpPtr->jump, + target - jumpPtr->jump.codeOffset, 127); + freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); @@ -2394,30 +2376,24 @@ CompileExprTree( case AND: case OR: CLANG_ASSERT(jumpPtr); - TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND) - ? TCL_FALSE_JUMP : TCL_TRUE_JUMP, - &jumpPtr->next->jump); + pc1 = CurrentOffset(envPtr); + TclEmitInstInt1((nodePtr->lexeme == AND) ? INST_JUMP_FALSE1 + : INST_JUMP_TRUE1, 0, envPtr); TclEmitPush(TclRegisterNewLiteral(envPtr, (nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, - &jumpPtr->next->next->jump); + pc2 = CurrentOffset(envPtr); + TclEmitInstInt1(INST_JUMP1, 0, envPtr); TclAdjustStackDepth(-1, envPtr); - TclFixupForwardJumpToHere(envPtr, &jumpPtr->next->jump, 127); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc1, + envPtr->codeStart + pc1 + 1); if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) { - jumpPtr->next->next->jump.codeOffset += 3; + pc2 += 3; } TclEmitPush(TclRegisterNewLiteral(envPtr, (nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr); - TclFixupForwardJumpToHere(envPtr, &jumpPtr->next->next->jump, - 127); + TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc2, + envPtr->codeStart + pc2 + 1); convert = 0; - envPtr->currStackDepth = jumpPtr->depth + 1; - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); - freePtr = jumpPtr; - jumpPtr = jumpPtr->next; - TclStackFree(interp, freePtr); freePtr = jumpPtr; jumpPtr = jumpPtr->next; TclStackFree(interp, freePtr); @@ -2450,7 +2426,7 @@ CompileExprTree( const char *bytes = TclGetStringFromObj(literal, &length); int index = TclRegisterNewLiteral(envPtr, bytes, length); Tcl_Obj *objPtr = TclFetchLiteral(envPtr, index); - + if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) { /* * Would like to do this: @@ -2593,7 +2569,7 @@ TclSingleOpCmd( * * TclSortingOpCmd -- * Implements the commands: - * <, <=, >, >=, ==, eq + * <, <=, >, >=, ==, eq * in the ::tcl::mathop namespace. These commands are defined for * arbitrary number of arguments by computing the AND of the base * operator applied to all neighbor argument pairs. |