[d2ffcca163] Limit parsing results that are documented to accept only ASCII chars to actually follow that constraint. This requires not trusting isalnum(.) and isalpha(.) to deliver portable identical results.

author: dgp <dgp@users.sourceforge.net> 2014-12-05 12:28:01 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2014-12-05 12:28:01 (GMT)
commit: 71a48e59c2eb9d90233530198576fc445abfda42 (patch)
tree: 94c5a2756c009d91338f8ee894ae0122dd4f35e9 /generic/tclParse.c
parent: 423020bd63284883ef0e1343574dd256b7023ae0 (diff)
parent: 3041f3f9a1d8a242105ffe99eebae201a7079549 (diff)
download: tcl-71a48e59c2eb9d90233530198576fc445abfda42.zip
tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.gz
tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.bz2
1 files changed, 46 insertions, 17 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index ee0d4c4..ca12be5 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -621,6 +621,47 @@ TclIsSpaceProc(
 /*
  *----------------------------------------------------------------------
  *
+ * TclIsBareword--
+ *
+ *	Report whether byte is one that can be part of a "bareword".
+ *	This concept is named in expression parsing, where it determines
+ *	what can be a legal function name, but is the same definition used
+ *	in determining what variable names can be parsed as variable
+ *	substitutions without the benefit of enclosing braces.  The set of
+ *	ASCII chars that are accepted are the numeric chars ('0'-'9'),
+ *	the alphabetic chars ('a'-'z', 'A'-'Z')	and underscore ('_').
+ *
+ * Results:
+ *	Returns 1, if byte is in the accepted set of chars, 0 otherwise.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclIsBareword(
+    char byte)
+{
+    if (byte < '0' || byte > 'z') {
+	return 0;
+    }
+    if (byte <= '9' || byte >= 'a') {
+	return 1;
+    }
+    if (byte == '_') {
+	return 1;
+    }
+    if (byte < 'A' || byte > 'Z') {
+	return 0;
+    }
+    return 1;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * ParseWhiteSpace --
  *
  *	Scans up to numBytes bytes starting at src, consuming white space
@@ -1346,9 +1387,7 @@ Tcl_ParseVarName(
 {
     Tcl_Token *tokenPtr;
     register const char *src;
-    unsigned char c;
-    int varIndex, offset;
-    Tcl_UniChar ch;
+    int varIndex;
     unsigned array;
 
     if ((numBytes == 0) || (start == NULL)) {
@@ -1431,22 +1470,12 @@ Tcl_ParseVarName(
 	tokenPtr->numComponents = 0;
 
 	while (numBytes) {
-	    if (Tcl_UtfCharComplete(src, numBytes)) {
-		offset = Tcl_UtfToUniChar(src, &ch);
-	    } else {
-		char utfBytes[TCL_UTF_MAX];
-
-		memcpy(utfBytes, src, (size_t) numBytes);
-		utfBytes[numBytes] = '\0';
-		offset = Tcl_UtfToUniChar(utfBytes, &ch);
-	    }
-	    c = UCHAR(ch);
-	    if (isalnum(c) || (c == '_')) {	/* INTL: ISO only, UCHAR. */
-		src += offset;
-		numBytes -= offset;
+	    if (TclIsBareword(*src)) {
+		src += 1;
+		numBytes -= 1;
 		continue;
 	    }
-	    if ((c == ':') && (numBytes != 1) && (src[1] == ':')) {
+	    if ((src[0] == ':') && (numBytes != 1) && (src[1] == ':')) {
 		src += 2;
 		numBytes -= 2;
 		while (numBytes && (*src == ':')) {
author	dgp <dgp@users.sourceforge.net>	2014-12-05 12:28:01 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2014-12-05 12:28:01 (GMT)
commit	71a48e59c2eb9d90233530198576fc445abfda42 (patch)
tree	94c5a2756c009d91338f8ee894ae0122dd4f35e9 /generic/tclParse.c
parent	423020bd63284883ef0e1343574dd256b7023ae0 (diff)
parent	3041f3f9a1d8a242105ffe99eebae201a7079549 (diff)
download	tcl-71a48e59c2eb9d90233530198576fc445abfda42.zip tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.gz tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.bz2