1 files changed, 86 insertions, 110 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 2a48117..4390282 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -365,7 +365,7 @@ static const unsigned char prec[] = {
     0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  
+    0,
     /* Unary operator lexemes */
     PREC_UNARY,		/* UNARY_PLUS */
     PREC_UNARY,		/* UNARY_MINUS */
@@ -420,7 +420,7 @@ static const unsigned char instruction[] = {
     0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  
+    0,
     /* Unary operator lexemes */
     INST_UPLUS,		/* UNARY_PLUS */
     INST_UMINUS,	/* UNARY_MINUS */
@@ -488,15 +488,8 @@ static const unsigned char Lexeme[] = {
 
 typedef struct JumpList {
     JumpFixup jump;		/* Pass this argument to matching calls of
-				 * TclEmitForwardJump() and 
+				 * TclEmitForwardJump() and
 				 * TclFixupForwardJump(). */
-    int depth;			/* Remember the currStackDepth of the
-				 * CompileEnv here. */
-    int offset;			/* Data used to compute jump lengths to pass
-				 * to TclFixupForwardJump() */
-    int convert;		/* Temporary storage used to compute whether
-				 * numeric conversion will be needed following
-				 * the operator we're compiling. */
     struct JumpList *next;	/* Point to next item on the stack */
 } JumpList;
 
@@ -571,13 +564,13 @@ ParseExpr(
 {
     OpNode *nodes = NULL;	/* Pointer to the OpNode storage array where
 				 * we build the parse tree. */
-    int nodesAvailable = 64;	/* Initial size of the storage array. This
+    unsigned int nodesAvailable = 64; /* Initial size of the storage array. This
 				 * value establishes a minimum tree memory
 				 * cost of only about 1 kibyte, and is large
 				 * enough for most expressions to parse with
 				 * no need for array growth and
 				 * reallocation. */
-    int nodesUsed = 0;		/* Number of OpNodes filled. */
+    unsigned int nodesUsed = 0;	/* Number of OpNodes filled. */
     int scanned = 0;		/* Capture number of byte scanned by parsing
 				 * routines. */
     int lastParsed;		/* Stores info about what the lexeme parsed
@@ -669,11 +662,13 @@ ParseExpr(
 	 */
 
 	if (nodesUsed >= nodesAvailable) {
-	    int size = nodesUsed * 2;
-	    OpNode *newPtr;
+	    unsigned int size = nodesUsed * 2;
+	    OpNode *newPtr = NULL;
 
 	    do {
+	      if (size <= UINT_MAX/sizeof(OpNode)) {
 		newPtr = attemptckrealloc(nodes, size * sizeof(OpNode));
+	      }
 	    } while ((newPtr == NULL)
 		    && ((size -= (size - nodesUsed) / 2) > nodesUsed));
 	    if (newPtr == NULL) {
@@ -845,7 +840,7 @@ ParseExpr(
 
 	    switch (lexeme) {
 	    case NUMBER:
-	    case BOOLEAN: 
+	    case BOOLEAN:
 		/*
 		 * TODO: Consider using a dict or hash to collapse all
 		 * duplicate literals into a single representative value.
@@ -868,7 +863,7 @@ ParseExpr(
 		start += scanned;
 		numBytes -= scanned;
 		continue;
-	    
+
 	    default:
 		break;
 	    }
@@ -1331,7 +1326,7 @@ ParseExpr(
 	    nodePtr->mark = MARK_LEFT;
 	    nodePtr->left = complete;
 
-	    /* 
+	    /*
 	     * The COMMA operator cannot be optimized, since the function
 	     * needs all of its arguments, and optimization would reduce the
 	     * number. Other binary operators root constant expressions when
@@ -1553,7 +1548,7 @@ ConvertTreeToTokens(
 	     * Tcl_ParseExpr() we do not change them now. Internally, we can
 	     * do better.
 	     */
-	
+
 	    int toCopy = tokenPtr->numComponents + 1;
 
 	    if (tokenPtr->numComponents == tokenPtr[1].numComponents + 1) {
@@ -1569,7 +1564,7 @@ ConvertTreeToTokens(
 		subExprTokenPtr->type = TCL_TOKEN_SUB_EXPR;
 		parsePtr->numTokens += toCopy;
 	    } else {
-		/* 
+		/*
 		 * Multiple element word. Create a TCL_TOKEN_SUB_EXPR token to
 		 * lead, with fields initialized from the leading token, then
 		 * copy entire set of word tokens.
@@ -1618,7 +1613,7 @@ ConvertTreeToTokens(
 	    case COMMA:
 	    case COLON:
 
-		/* 
+		/*
 		 * Historical practice has been to have no Tcl_Tokens for
 		 * these operators.
 		 */
@@ -1754,7 +1749,7 @@ ConvertTreeToTokens(
 		/*
 		 * Before we leave this node/operator/subexpression for the
 		 * last time, finish up its tokens....
-		 * 
+		 *
 		 * Our current position scanning the string is where the
 		 * substring for the subexpression ends.
 		 */
@@ -1974,7 +1969,7 @@ ParseLexeme(
 
     case 'i':
 	if ((numBytes > 1) && (start[1] == 'n')
-		&& ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 	    /*
 	     * Must make this check so we can tell the difference between the
 	     * "in" operator and the "int" function name and the "infinity"
@@ -1988,14 +1983,15 @@ ParseLexeme(
 
     case 'e':
 	if ((numBytes > 1) && (start[1] == 'q')
-		&& ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 	    *lexemePtr = STREQ;
 	    return 2;
 	}
 	break;
 
     case 'n':
-	if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+	if ((numBytes > 1)
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 	    switch (start[1]) {
 	    case 'e':
 		*lexemePtr = STRNEQ;
@@ -2010,9 +2006,8 @@ ParseLexeme(
     literal = Tcl_NewObj();
     if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end,
 	    TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
-	if (end < start + numBytes && !isalnum(UCHAR(*end))
-		&& UCHAR(*end) != '_') {
-	
+	if (end < start + numBytes && !TclIsBareword(*end)) {
+
 	number:
 	    TclInitStringRep(literal, start, end-start);
 	    *lexemePtr = NUMBER;
@@ -2036,9 +2031,9 @@ ParseLexeme(
 		const char *p = start;
 
 		while (p < end) {
-		    if (!isalnum(UCHAR(*p++))) {
+		    if (!TclIsBareword(*p++)) {
 			/*
-			 * The number has non-bareword characters, so we 
+			 * The number has non-bareword characters, so we
 			 * must treat it as a number.
 			 */
 			goto number;
@@ -2061,33 +2056,30 @@ ParseLexeme(
 	}
     }
 
-    if (Tcl_UtfCharComplete(start, numBytes)) {
-	scanned = Tcl_UtfToUniChar(start, &ch);
-    } else {
-	char utfBytes[TCL_UTF_MAX];
+    /*
+     * We reject leading underscores in bareword.  No sensible reason why.
+     * Might be inspired by reserved identifier rules in C, which of course
+     * have no direct relevance here.
+     */
 
-	memcpy(utfBytes, start, (size_t) numBytes);
-	utfBytes[numBytes] = '\0';
-	scanned = Tcl_UtfToUniChar(utfBytes, &ch);
-    }
-    if (!isalnum(UCHAR(ch))) {
-	*lexemePtr = INVALID;
-	Tcl_DecrRefCount(literal);
-	return scanned;
-    }
-    end = start;
-    while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) {
-	end += scanned;
-	numBytes -= scanned;
-	if (Tcl_UtfCharComplete(end, numBytes)) {
-	    scanned = Tcl_UtfToUniChar(end, &ch);
+    if (!TclIsBareword(*start) || *start == '_') {
+	if (Tcl_UtfCharComplete(start, numBytes)) {
+	    scanned = Tcl_UtfToUniChar(start, &ch);
 	} else {
 	    char utfBytes[TCL_UTF_MAX];
 
-	    memcpy(utfBytes, end, (size_t) numBytes);
+	    memcpy(utfBytes, start, (size_t) numBytes);
 	    utfBytes[numBytes] = '\0';
 	    scanned = Tcl_UtfToUniChar(utfBytes, &ch);
 	}
+	*lexemePtr = INVALID;
+	Tcl_DecrRefCount(literal);
+	return scanned;
+    }
+    end = start;
+    while (numBytes && TclIsBareword(*end)) {
+	end += 1;
+	numBytes -= 1;
     }
     *lexemePtr = BAREWORD;
     if (literalPtr) {
@@ -2105,7 +2097,7 @@ ParseLexeme(
  * TclCompileExpr --
  *
  *	This procedure compiles a string containing a Tcl expression into Tcl
- *	bytecodes. 
+ *	bytecodes.
  *
  * Results:
  *	None.
@@ -2261,30 +2253,8 @@ CompileExprTree(
 	if (nodePtr->mark == MARK_LEFT) {
 	    next = nodePtr->left;
 
-	    switch (nodePtr->lexeme) {
-	    case QUESTION:
-		newJump = TclStackAlloc(interp, sizeof(JumpList));
-		newJump->next = jumpPtr;
-		jumpPtr = newJump;
-		newJump = TclStackAlloc(interp, sizeof(JumpList));
-		newJump->next = jumpPtr;
-		jumpPtr = newJump;
-		jumpPtr->depth = envPtr->currStackDepth;
+	    if (nodePtr->lexeme == QUESTION) {
 		convert = 1;
-		break;
-	    case AND:
-	    case OR:
-		newJump = TclStackAlloc(interp, sizeof(JumpList));
-		newJump->next = jumpPtr;
-		jumpPtr = newJump;
-		newJump = TclStackAlloc(interp, sizeof(JumpList));
-		newJump->next = jumpPtr;
-		jumpPtr = newJump;
-		newJump = TclStackAlloc(interp, sizeof(JumpList));
-		newJump->next = jumpPtr;
-		jumpPtr = newJump;
-		jumpPtr->depth = envPtr->currStackDepth;
-		break;
 	    }
 	} else if (nodePtr->mark == MARK_RIGHT) {
 	    next = nodePtr->right;
@@ -2317,25 +2287,35 @@ CompileExprTree(
 		break;
 	    }
 	    case QUESTION:
+		newJump = TclStackAlloc(interp, sizeof(JumpList));
+		newJump->next = jumpPtr;
+		jumpPtr = newJump;
 		TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump);
 		break;
 	    case COLON:
-		CLANG_ASSERT(jumpPtr);
+		newJump = TclStackAlloc(interp, sizeof(JumpList));
+		newJump->next = jumpPtr;
+		jumpPtr = newJump;
 		TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP,
-			&jumpPtr->next->jump);
-		envPtr->currStackDepth = jumpPtr->depth;
-		jumpPtr->offset = (envPtr->codeNext - envPtr->codeStart);
-		jumpPtr->convert = convert;
+			&jumpPtr->jump);
+		TclAdjustStackDepth(-1, envPtr);
+		if (convert) {
+		    jumpPtr->jump.jumpType = TCL_TRUE_JUMP;
+		}
 		convert = 1;
 		break;
 	    case AND:
-		TclEmitForwardJump(envPtr, TCL_FALSE_JUMP, &jumpPtr->jump);
-		break;
 	    case OR:
-		TclEmitForwardJump(envPtr, TCL_TRUE_JUMP, &jumpPtr->jump);
+		newJump = TclStackAlloc(interp, sizeof(JumpList));
+		newJump->next = jumpPtr;
+		jumpPtr = newJump;
+		TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND)
+			?  TCL_FALSE_JUMP : TCL_TRUE_JUMP, &jumpPtr->jump);
 		break;
 	    }
 	} else {
+	    int pc1, pc2, target;
+
 	    switch (nodePtr->lexeme) {
 	    case START:
 	    case QUESTION:
@@ -2352,11 +2332,11 @@ CompileExprTree(
 		 * Use the numWords count we've kept to invoke the function
 		 * command with the correct number of arguments.
 		 */
-		
+
 		if (numWords < 255) {
-		    TclEmitInstInt1(INST_INVOKE_STK1, numWords, envPtr);
+		    TclEmitInvoke(envPtr, INST_INVOKE_STK1, numWords);
 		} else {
-		    TclEmitInstInt4(INST_INVOKE_STK4, numWords, envPtr);
+		    TclEmitInvoke(envPtr, INST_INVOKE_STK4, numWords);
 		}
 
 		/*
@@ -2375,18 +2355,20 @@ CompileExprTree(
 		break;
 	    case COLON:
 		CLANG_ASSERT(jumpPtr);
-		if (TclFixupForwardJump(envPtr, &jumpPtr->next->jump,
-			(envPtr->codeNext - envPtr->codeStart)
-			- jumpPtr->next->jump.codeOffset, 127)) {
-		    jumpPtr->offset += 3;
+		if (jumpPtr->jump.jumpType == TCL_TRUE_JUMP) {
+		    jumpPtr->jump.jumpType = TCL_UNCONDITIONAL_JUMP;
+		    convert = 1;
+		}
+		target = jumpPtr->jump.codeOffset + 2;
+		if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) {
+		    target += 3;
 		}
-		TclFixupForwardJump(envPtr, &jumpPtr->jump,
-			jumpPtr->offset - jumpPtr->jump.codeOffset, 127);
-		convert |= jumpPtr->convert;
-		envPtr->currStackDepth = jumpPtr->depth + 1;
 		freePtr = jumpPtr;
 		jumpPtr = jumpPtr->next;
 		TclStackFree(interp, freePtr);
+		TclFixupForwardJump(envPtr, &jumpPtr->jump,
+			target - jumpPtr->jump.codeOffset, 127);
+
 		freePtr = jumpPtr;
 		jumpPtr = jumpPtr->next;
 		TclStackFree(interp, freePtr);
@@ -2394,30 +2376,24 @@ CompileExprTree(
 	    case AND:
 	    case OR:
 		CLANG_ASSERT(jumpPtr);
-		TclEmitForwardJump(envPtr, (nodePtr->lexeme == AND)
-			?  TCL_FALSE_JUMP : TCL_TRUE_JUMP,
-			&jumpPtr->next->jump);
+		pc1 = CurrentOffset(envPtr);
+		TclEmitInstInt1((nodePtr->lexeme == AND) ? INST_JUMP_FALSE1
+			: INST_JUMP_TRUE1, 0, envPtr);
 		TclEmitPush(TclRegisterNewLiteral(envPtr,
 			(nodePtr->lexeme == AND) ? "1" : "0", 1), envPtr);
-		TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP,
-			&jumpPtr->next->next->jump);
+		pc2 = CurrentOffset(envPtr);
+		TclEmitInstInt1(INST_JUMP1, 0, envPtr);
 		TclAdjustStackDepth(-1, envPtr);
-		TclFixupForwardJumpToHere(envPtr, &jumpPtr->next->jump, 127);
+		TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc1,
+			envPtr->codeStart + pc1 + 1);
 		if (TclFixupForwardJumpToHere(envPtr, &jumpPtr->jump, 127)) {
-		    jumpPtr->next->next->jump.codeOffset += 3;
+		    pc2 += 3;
 		}
 		TclEmitPush(TclRegisterNewLiteral(envPtr,
 			(nodePtr->lexeme == AND) ? "0" : "1", 1), envPtr);
-		TclFixupForwardJumpToHere(envPtr, &jumpPtr->next->next->jump,
-			127);
+		TclStoreInt1AtPtr(CurrentOffset(envPtr) - pc2,
+			envPtr->codeStart + pc2 + 1);
 		convert = 0;
-		envPtr->currStackDepth = jumpPtr->depth + 1;
-		freePtr = jumpPtr;
-		jumpPtr = jumpPtr->next;
-		TclStackFree(interp, freePtr);
-		freePtr = jumpPtr;
-		jumpPtr = jumpPtr->next;
-		TclStackFree(interp, freePtr);
 		freePtr = jumpPtr;
 		jumpPtr = jumpPtr->next;
 		TclStackFree(interp, freePtr);
@@ -2450,7 +2426,7 @@ CompileExprTree(
 		const char *bytes = TclGetStringFromObj(literal, &length);
 		int index = TclRegisterNewLiteral(envPtr, bytes, length);
 		Tcl_Obj *objPtr = TclFetchLiteral(envPtr, index);
-		
+
 		if ((objPtr->typePtr == NULL) && (literal->typePtr != NULL)) {
 		    /*
 		     * Would like to do this:
@@ -2593,7 +2569,7 @@ TclSingleOpCmd(
  *
  * TclSortingOpCmd --
  *	Implements the commands:
- *		<, <=, >, >=, ==, eq 
+ *		<, <=, >, >=, ==, eq
  *	in the ::tcl::mathop namespace. These commands are defined for
  *	arbitrary number of arguments by computing the AND of the base
  *	operator applied to all neighbor argument pairs.