1 files changed, 24 insertions, 26 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 9142e2b..23dc0a4 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1883,7 +1883,7 @@ ParseLexeme(
 
     case 'i':
 	if ((numBytes > 1) && (start[1] == 'n')
-		&& ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 
 	    /*
 	     * Must make this check so we can tell the difference between
@@ -1898,14 +1898,15 @@ ParseLexeme(
 
     case 'e':
 	if ((numBytes > 1) && (start[1] == 'q')
-		&& ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 	    *lexemePtr = STREQ;
 	    return 2;
 	}
 	break;
 
     case 'n':
-	if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+	if ((numBytes > 1)
+		&& ((numBytes == 2) || start[2] & 0x80 || !isalpha(UCHAR(start[2])))) {
 	    switch (start[1]) {
 	    case 'e':
 		*lexemePtr = STRNEQ;
@@ -1920,8 +1921,7 @@ ParseLexeme(
     literal = Tcl_NewObj();
     if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end,
 	    TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
-	if (end < start + numBytes && !isalnum(UCHAR(*end))
-		&& UCHAR(*end) != '_') {
+	if (end < start + numBytes && !TclIsBareword(*end)) {
 	
 	number:
 	    TclInitStringRep(literal, start, end-start);
@@ -1945,7 +1945,7 @@ ParseLexeme(
 	    if (literal->typePtr == &tclDoubleType) {
 		const char *p = start;
 		while (p < end) {
-		    if (!isalnum(UCHAR(*p++))) {
+		    if (!TclIsBareword(*p++)) {
 			/*
 			 * The number has non-bareword characters, so we 
 			 * must treat it as a number.
@@ -1969,31 +1969,29 @@ ParseLexeme(
 	}
     }
 
-    if (Tcl_UtfCharComplete(start, numBytes)) {
-	scanned = Tcl_UtfToUniChar(start, &ch);
-    } else {
-	char utfBytes[TCL_UTF_MAX];
-	memcpy(utfBytes, start, (size_t) numBytes);
-	utfBytes[numBytes] = '\0';
-	scanned = Tcl_UtfToUniChar(utfBytes, &ch);
-    }
-    if (!isalnum(UCHAR(ch))) {
-	*lexemePtr = INVALID;
-	Tcl_DecrRefCount(literal);
-	return scanned;
-    }
-    end = start;
-    while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) {
-	end += scanned;
-	numBytes -= scanned;
-	if (Tcl_UtfCharComplete(end, numBytes)) {
-	    scanned = Tcl_UtfToUniChar(end, &ch);
+    /*
+     * We reject leading underscores in bareword.  No sensible reason why.
+     * Might be inspired by reserved identifier rules in C, which of course
+     * have no direct relevance here.
+     */  
+
+    if (!TclIsBareword(*start) || *start == '_') {
+	if (Tcl_UtfCharComplete(start, numBytes)) {
+	    scanned = Tcl_UtfToUniChar(start, &ch);
 	} else {
 	    char utfBytes[TCL_UTF_MAX];
-	    memcpy(utfBytes, end, (size_t) numBytes);
+	    memcpy(utfBytes, start, (size_t) numBytes);
 	    utfBytes[numBytes] = '\0';
 	    scanned = Tcl_UtfToUniChar(utfBytes, &ch);
 	}
+	*lexemePtr = INVALID;
+	Tcl_DecrRefCount(literal);
+	return scanned;
+    }
+    end = start;
+    while (numBytes && TclIsBareword(*end)) {
+	end += 1;
+	numBytes -= 1;
     }
     *lexemePtr = BAREWORD;
     if (literalPtr) {