summaryrefslogtreecommitdiffstats
path: root/generic/tclCompExpr.c
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2014-12-05 12:28:01 (GMT)
committerdgp <dgp@users.sourceforge.net>2014-12-05 12:28:01 (GMT)
commitbc8ffb392220e949c4aadbe13d7b97912a7ea7ab (patch)
tree94c5a2756c009d91338f8ee894ae0122dd4f35e9 /generic/tclCompExpr.c
parentbff00baca2d262fe129758037163387e65b99b46 (diff)
parentbfb321eb38ad0ef45285f0637c865cf701949b90 (diff)
downloadtcl-bc8ffb392220e949c4aadbe13d7b97912a7ea7ab.zip
tcl-bc8ffb392220e949c4aadbe13d7b97912a7ea7ab.tar.gz
tcl-bc8ffb392220e949c4aadbe13d7b97912a7ea7ab.tar.bz2
[d2ffcca163] Limit parsing results that are documented to accept only ASCII chars to actually follow that constraint. This requires not trusting isalnum(.) and isalpha(.) to deliver portable identical results.
Diffstat (limited to 'generic/tclCompExpr.c')
-rw-r--r--generic/tclCompExpr.c51
1 files changed, 24 insertions, 27 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 94c1bd6..448f99c 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1967,7 +1967,7 @@ ParseLexeme(
case 'i':
if ((numBytes > 1) && (start[1] == 'n')
- && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ && ((numBytes == 2) || start[2] & 0x80 || !isalpha(start[2]))) {
/*
* Must make this check so we can tell the difference between the
* "in" operator and the "int" function name and the "infinity"
@@ -1981,14 +1981,15 @@ ParseLexeme(
case 'e':
if ((numBytes > 1) && (start[1] == 'q')
- && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ && ((numBytes == 2) || start[2] & 0x80 || !isalpha(start[2]))) {
*lexemePtr = STREQ;
return 2;
}
break;
case 'n':
- if ((numBytes > 1) && ((numBytes == 2) || !isalpha(UCHAR(start[2])))) {
+ if ((numBytes > 1)
+ && ((numBytes == 2) || start[2] & 0x80 || !isalpha(start[2]))) {
switch (start[1]) {
case 'e':
*lexemePtr = STRNEQ;
@@ -2003,8 +2004,7 @@ ParseLexeme(
literal = Tcl_NewObj();
if (TclParseNumber(NULL, literal, NULL, start, numBytes, &end,
TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
- if (end < start + numBytes && !isalnum(UCHAR(*end))
- && UCHAR(*end) != '_') {
+ if (end < start + numBytes && !TclIsBareword(*end)) {
number:
TclInitStringRep(literal, start, end-start);
@@ -2029,7 +2029,7 @@ ParseLexeme(
const char *p = start;
while (p < end) {
- if (!isalnum(UCHAR(*p++))) {
+ if (!TclIsBareword(*p++)) {
/*
* The number has non-bareword characters, so we
* must treat it as a number.
@@ -2054,33 +2054,30 @@ ParseLexeme(
}
}
- if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = Tcl_UtfToUniChar(start, &ch);
- } else {
- char utfBytes[TCL_UTF_MAX];
-
- memcpy(utfBytes, start, (size_t) numBytes);
- utfBytes[numBytes] = '\0';
- scanned = Tcl_UtfToUniChar(utfBytes, &ch);
- }
- if (!isalnum(UCHAR(ch))) {
- *lexemePtr = INVALID;
- Tcl_DecrRefCount(literal);
- return scanned;
- }
- end = start;
- while (isalnum(UCHAR(ch)) || (UCHAR(ch) == '_')) {
- end += scanned;
- numBytes -= scanned;
- if (Tcl_UtfCharComplete(end, numBytes)) {
- scanned = Tcl_UtfToUniChar(end, &ch);
+ /*
+ * We reject leading underscores in bareword. No sensible reason why.
+ * Might be inspired by reserved identifier rules in C, which of course
+ * have no direct relevance here.
+ */
+
+ if (!TclIsBareword(*start) || *start == '_') {
+ if (Tcl_UtfCharComplete(start, numBytes)) {
+ scanned = Tcl_UtfToUniChar(start, &ch);
} else {
char utfBytes[TCL_UTF_MAX];
- memcpy(utfBytes, end, (size_t) numBytes);
+ memcpy(utfBytes, start, (size_t) numBytes);
utfBytes[numBytes] = '\0';
scanned = Tcl_UtfToUniChar(utfBytes, &ch);
}
+ *lexemePtr = INVALID;
+ Tcl_DecrRefCount(literal);
+ return scanned;
+ }
+ end = start;
+ while (numBytes && TclIsBareword(*end)) {
+ end += 1;
+ numBytes -= 1;
}
*lexemePtr = BAREWORD;
if (literalPtr) {