summaryrefslogtreecommitdiffstats
path: root/generic/tclParse.c
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2014-12-05 12:28:01 (GMT)
committerdgp <dgp@users.sourceforge.net>2014-12-05 12:28:01 (GMT)
commit71a48e59c2eb9d90233530198576fc445abfda42 (patch)
tree94c5a2756c009d91338f8ee894ae0122dd4f35e9 /generic/tclParse.c
parent423020bd63284883ef0e1343574dd256b7023ae0 (diff)
parent3041f3f9a1d8a242105ffe99eebae201a7079549 (diff)
downloadtcl-71a48e59c2eb9d90233530198576fc445abfda42.zip
tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.gz
tcl-71a48e59c2eb9d90233530198576fc445abfda42.tar.bz2
[d2ffcca163] Limit parsing results that are documented to accept only ASCII chars to actually follow that constraint. This requires not trusting isalnum(.) and isalpha(.) to deliver portable identical results.
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r--generic/tclParse.c63
1 files changed, 46 insertions, 17 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index ee0d4c4..ca12be5 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -621,6 +621,47 @@ TclIsSpaceProc(
/*
*----------------------------------------------------------------------
*
+ * TclIsBareword--
+ *
+ * Report whether byte is one that can be part of a "bareword".
+ * This concept is named in expression parsing, where it determines
+ * what can be a legal function name, but is the same definition used
+ * in determining what variable names can be parsed as variable
+ * substitutions without the benefit of enclosing braces. The set of
+ * ASCII chars that are accepted are the numeric chars ('0'-'9'),
+ * the alphabetic chars ('a'-'z', 'A'-'Z') and underscore ('_').
+ *
+ * Results:
+ * Returns 1, if byte is in the accepted set of chars, 0 otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclIsBareword(
+ char byte)
+{
+ if (byte < '0' || byte > 'z') {
+ return 0;
+ }
+ if (byte <= '9' || byte >= 'a') {
+ return 1;
+ }
+ if (byte == '_') {
+ return 1;
+ }
+ if (byte < 'A' || byte > 'Z') {
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* ParseWhiteSpace --
*
* Scans up to numBytes bytes starting at src, consuming white space
@@ -1346,9 +1387,7 @@ Tcl_ParseVarName(
{
Tcl_Token *tokenPtr;
register const char *src;
- unsigned char c;
- int varIndex, offset;
- Tcl_UniChar ch;
+ int varIndex;
unsigned array;
if ((numBytes == 0) || (start == NULL)) {
@@ -1431,22 +1470,12 @@ Tcl_ParseVarName(
tokenPtr->numComponents = 0;
while (numBytes) {
- if (Tcl_UtfCharComplete(src, numBytes)) {
- offset = Tcl_UtfToUniChar(src, &ch);
- } else {
- char utfBytes[TCL_UTF_MAX];
-
- memcpy(utfBytes, src, (size_t) numBytes);
- utfBytes[numBytes] = '\0';
- offset = Tcl_UtfToUniChar(utfBytes, &ch);
- }
- c = UCHAR(ch);
- if (isalnum(c) || (c == '_')) { /* INTL: ISO only, UCHAR. */
- src += offset;
- numBytes -= offset;
+ if (TclIsBareword(*src)) {
+ src += 1;
+ numBytes -= 1;
continue;
}
- if ((c == ':') && (numBytes != 1) && (src[1] == ':')) {
+ if ((src[0] == ':') && (numBytes != 1) && (src[1] == ':')) {
src += 2;
numBytes -= 2;
while (numBytes && (*src == ':')) {