summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgriffin <briang42@easystreet.net>2023-09-01 20:53:54 (GMT)
committergriffin <briang42@easystreet.net>2023-09-01 20:53:54 (GMT)
commitaed90d38169f8dd6caf8197b3b44c424eeb6fb12 (patch)
tree5c049ab59f3d5c9455207b4763ed38923c273dda
parentb8e93abb309628a70ecd72c95c7cdfe86b4f8c0a (diff)
parent12d09ed4d9bb9034f455ec89ca2d0c12a426d25c (diff)
downloadtcl-aed90d38169f8dd6caf8197b3b44c424eeb6fb12.zip
tcl-aed90d38169f8dd6caf8197b3b44c424eeb6fb12.tar.gz
tcl-aed90d38169f8dd6caf8197b3b44c424eeb6fb12.tar.bz2
fix bug cd61f085da "tcl terminates silently with invalid expr underscore syntax and radix prefix"
-rw-r--r--doc/expr.n1
-rw-r--r--generic/tclStrToD.c165
-rw-r--r--tests/get.test10
-rw-r--r--tests/parseExpr.test2
4 files changed, 94 insertions, 84 deletions
diff --git a/doc/expr.n b/doc/expr.n
index e9b30a5..3b8f1a8 100644
--- a/doc/expr.n
+++ b/doc/expr.n
@@ -85,6 +85,7 @@ end of a numeric value. Here are some examples:
\fBexpr\fR 100_000_000 \fI100000000\fR
\fBexpr\fR 0xffff_ffff \fI4294967295\fR
\fBformat\fR 0x%x 0b1111_1110_1101_1011 \fI0xfedb\fR
+\fBexpr\fR 3_141_592_653_589e-1_2 \fI3.141592653589\fR
.CE
.RE
diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c
index 03a7ab7..8f69627 100644
--- a/generic/tclStrToD.c
+++ b/generic/tclStrToD.c
@@ -539,8 +539,6 @@ TclParseNumber(
* to avoid a compiler warning. */
int shift = 0; /* Amount to shift when accumulating binary */
mp_err err = MP_OKAY;
- int under = 0; /* Flag trailing '_' as error if true once
- * number is accepted. */
#define MOST_BITS (UWIDE_MAX >> 1)
@@ -573,6 +571,87 @@ TclParseNumber(
acceptLen = len;
while (1) {
char c = len ? *p : '\0';
+
+ /*
+ * Filter out Numeric Whitespace. Expects:
+ *
+ * ::digit:: '_' ::digit::
+ *
+ * Verify current '_' is ok, then move on to next character,
+ * otherwise follow through on to error.
+ */
+ if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+ const char *before, *after;
+
+ if (p==bytes) {
+ /* Not allowed at beginning */
+ goto endgame;
+ }
+ /*
+ * span multiple numeric whitespace
+ * V
+ * example: 5___6
+ */
+ for (before=(p-1);
+ (before && *before=='_');
+ before=(before>p ? (before-1):NULL));
+ for (after=(p+1);
+ (after && *after && *after=='_');
+ after=(*after&&*after=='_')?(after+1):NULL);
+
+ switch (state) {
+ case ZERO_B:
+ case BINARY:
+ if ((before && (*before != '0' && *before != '1')) ||
+ (after && (*after != '0' && *after != '1'))) {
+ /* Not a valid digit */
+ goto endgame;
+ }
+ break;
+ case ZERO_O:
+ case OCTAL:
+ if (((before && (*before < '0' || '7' < *before))) ||
+ ((after && (*after < '0' || '7' < *after)))) {
+ goto endgame;
+ }
+ break;
+ case FRACTION:
+ case ZERO:
+ case ZERO_D:
+ case DECIMAL:
+ case LEADING_RADIX_POINT:
+ case EXPONENT_START:
+ case EXPONENT_SIGNUM:
+ case EXPONENT:
+ if ((!before || isdigit(UCHAR(*before))) &&
+ (!after || isdigit(UCHAR(*after)))) {
+ break;
+ }
+ if (after && *after=='(') {
+ /* could be function */
+ goto continue_num;
+ }
+ goto endgame;
+ case ZERO_X:
+ case HEXADECIMAL:
+ if ( (!before || isxdigit(UCHAR(*before))) &&
+ (!after || isxdigit(UCHAR(*after)))) {
+ break;
+ }
+ goto endgame;
+ default:
+ /*
+ * Not whitespace, but could be legal for other reasons.
+ * Continue number processing for current character.
+ */
+ goto continue_num;
+ }
+
+ /* Valid whitespace found, move on to the next character */
+ goto next;
+ }
+
+ continue_num:
switch (state) {
case INITIAL:
@@ -648,7 +727,7 @@ TclParseNumber(
acceptPoint = p;
acceptLen = len;
if (c == 'x' || c == 'X') {
- if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) || under) {
+ if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY)) {
goto endgame;
}
state = ZERO_X;
@@ -661,7 +740,7 @@ TclParseNumber(
goto zeroo;
}
if (c == 'b' || c == 'B') {
- if ((flags & TCL_PARSE_OCTAL_ONLY) || under) {
+ if ((flags & TCL_PARSE_OCTAL_ONLY)) {
goto endgame;
}
state = ZERO_B;
@@ -671,16 +750,10 @@ TclParseNumber(
goto zerob;
}
if (c == 'o' || c == 'O') {
- if (under) {
- goto endgame;
- }
state = ZERO_O;
break;
}
if (c == 'd' || c == 'D') {
- if (under) {
- goto endgame;
- }
state = ZERO_D;
break;
}
@@ -701,11 +774,9 @@ TclParseNumber(
zeroo:
if (c == '0') {
numTrailZeros++;
- under = 0;
state = OCTAL;
break;
} else if (c >= '1' && c <= '7') {
- under = 0;
if (objPtr != NULL) {
shift = 3 * (numTrailZeros + 1);
significandOverflow = AccumulateDecimalDigit(
@@ -764,10 +835,6 @@ TclParseNumber(
numTrailZeros = 0;
state = OCTAL;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
}
goto endgame;
@@ -787,22 +854,14 @@ TclParseNumber(
zerox:
if (c == '0') {
numTrailZeros++;
- under = 0;
state = HEXADECIMAL;
break;
} else if (isdigit(UCHAR(c))) {
- under = 0;
d = (c-'0');
} else if (c >= 'A' && c <= 'F') {
- under = 0;
d = (c-'A'+10);
} else if (c >= 'a' && c <= 'f') {
- under = 0;
d = (c-'a'+10);
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
} else {
goto endgame;
}
@@ -858,17 +917,10 @@ TclParseNumber(
zerob:
if (c == '0') {
numTrailZeros++;
- under = 0;
state = BINARY;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
} else if (c != '1') {
goto endgame;
- } else {
- under = 0;
}
if (objPtr != NULL) {
shift = numTrailZeros + 1;
@@ -915,17 +967,10 @@ TclParseNumber(
case ZERO_D:
if (c == '0') {
- under = 0;
numTrailZeros++;
} else if ( ! isdigit(UCHAR(c))) {
- if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
- }
goto endgame;
}
- under = 0;
state = DECIMAL;
flags |= TCL_PARSE_INTEGER_ONLY;
/* FALLTHROUGH */
@@ -942,7 +987,6 @@ TclParseNumber(
acceptLen = len;
if (c == '0') {
numTrailZeros++;
- under = 0;
state = DECIMAL;
break;
} else if (isdigit(UCHAR(c))) {
@@ -954,21 +998,14 @@ TclParseNumber(
}
numSigDigs += numTrailZeros+1;
numTrailZeros = 0;
- under = 0;
state = DECIMAL;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
} else if (flags & TCL_PARSE_INTEGER_ONLY) {
goto endgame;
} else if (c == '.') {
- under = 0;
state = FRACTION;
break;
} else if (c == 'E' || c == 'e') {
- under = 0;
state = EXPONENT_START;
break;
}
@@ -994,7 +1031,6 @@ TclParseNumber(
if (c == '0') {
numDigitsAfterDp++;
numTrailZeros++;
- under = 0;
state = FRACTION;
break;
} else if (isdigit(UCHAR(c))) {
@@ -1011,13 +1047,8 @@ TclParseNumber(
numSigDigs = 1;
}
numTrailZeros = 0;
- under = 0;
state = FRACTION;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
}
goto endgame;
@@ -1029,12 +1060,10 @@ TclParseNumber(
*/
if (c == '+') {
- under = 0;
state = EXPONENT_SIGNUM;
break;
} else if (c == '-') {
exponentSignum = 1;
- under = 0;
state = EXPONENT_SIGNUM;
break;
}
@@ -1048,13 +1077,8 @@ TclParseNumber(
if (isdigit(UCHAR(c))) {
exponent = c - '0';
- under = 0;
state = EXPONENT;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
}
goto endgame;
@@ -1073,13 +1097,8 @@ TclParseNumber(
} else {
exponent = LONG_MAX;
}
- under = 0;
state = EXPONENT;
break;
- } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
- /* Ignore numeric "white space" */
- under = 1;
- break;
}
goto endgame;
@@ -1090,14 +1109,12 @@ TclParseNumber(
case sI:
if (c == 'n' || c == 'N') {
- under = 0;
state = sIN;
break;
}
goto endgame;
case sIN:
if (c == 'f' || c == 'F') {
- under = 0;
state = sINF;
break;
}
@@ -1106,7 +1123,6 @@ TclParseNumber(
acceptState = state;
acceptPoint = p;
acceptLen = len;
- under = 0;
if (c == 'i' || c == 'I') {
state = sINFI;
break;
@@ -1114,28 +1130,24 @@ TclParseNumber(
goto endgame;
case sINFI:
if (c == 'n' || c == 'N') {
- under = 0;
state = sINFIN;
break;
}
goto endgame;
case sINFIN:
if (c == 'i' || c == 'I') {
- under = 0;
state = sINFINI;
break;
}
goto endgame;
case sINFINI:
if (c == 't' || c == 'T') {
- under = 0;
state = sINFINIT;
break;
}
goto endgame;
case sINFINIT:
if (c == 'y' || c == 'Y') {
- under = 0;
state = sINFINITY;
break;
}
@@ -1147,14 +1159,12 @@ TclParseNumber(
#ifdef IEEE_FLOATING_POINT
case sN:
if (c == 'a' || c == 'A') {
- under = 0;
state = sNA;
break;
}
goto endgame;
case sNA:
if (c == 'n' || c == 'N') {
- under = 0;
state = sNAN;
break;
}
@@ -1164,7 +1174,6 @@ TclParseNumber(
acceptPoint = p;
acceptLen = len;
if (c == '(') {
- under = 0;
state = sNANPAREN;
break;
}
@@ -1175,14 +1184,12 @@ TclParseNumber(
*/
case sNANHEX:
if (c == ')') {
- under = 0;
state = sNANFINISH;
break;
}
/* FALLTHROUGH */
case sNANPAREN:
if (TclIsSpaceProcM(c)) {
- under = 0;
break;
}
if (numSigDigs < 13) {
@@ -1197,7 +1204,6 @@ TclParseNumber(
}
numSigDigs++;
significandWide = (significandWide << 4) + d;
- under = 0;
state = sNANHEX;
break;
}
@@ -1211,6 +1217,7 @@ TclParseNumber(
acceptLen = len;
goto endgame;
}
+ next:
p++;
len--;
}
@@ -1232,8 +1239,8 @@ TclParseNumber(
* backup to that.
*/
- p = under ? acceptPoint-1 : acceptPoint;
- len = under ? acceptLen-1 : acceptLen;
+ p = acceptPoint;
+ len = acceptLen;
if (!(flags & TCL_PARSE_NO_WHITESPACE)) {
/*
diff --git a/tests/get.test b/tests/get.test
index eb26484..66c9c3a 100644
--- a/tests/get.test
+++ b/tests/get.test
@@ -102,18 +102,20 @@ test get-3.3 {tcl_GetInt with iffy numbers} testgetint {
set x
}
} {44 44 44 44 54 54 52 46}
+
test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj {
- lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10"} {
+ lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10" "2_0.3_4e+1_5" _1.0e+2 1_.0e+2 1._0e+2 1.0_e+2 1.0e_+2 1.0e+_2 1.0e+2_ 1_1.0e+0_2 2__2.0e+2__2 54321________} {
catch {testdoubleobj set 1 $x} x
set x
}
-} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0}
+} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0 20340000000000000.0 {expected floating-point number but got "_1.0e+2"} {expected floating-point number but got "1_.0e+2"} {expected floating-point number but got "1._0e+2"} {expected floating-point number but got "1.0_e+2"} {expected floating-point number but got "1.0e_+2"} {expected floating-point number but got "1.0e+_2"} {expected floating-point number but got "1.0e+2_"} 1100.0 2.2e+23 {expected floating-point number but got "54321________"}}
+
test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint {
- lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x_a " 0b1111_1111 " 0_07 " " 0o_1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 } {
+ lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x0_a " 0b1111_1111 " 0_07 " " 0o1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 0x_b 0o_2_0 0o2__3_4} {
catch {testgetint $x} x
set x
}
-} {0 10 2 33 1423324 10 255 7 8 2 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}}
+} {0 10 2 33 1423324 10 255 7 8 {expected integer but got " 0b_1_0 "} {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"} {expected integer but got "0x_b"} {expected integer but got "0o_2_0"} 156}
# cleanup
::tcltest::cleanupTests
diff --git a/tests/parseExpr.test b/tests/parseExpr.test
index b9245ce..476a250 100644
--- a/tests/parseExpr.test
+++ b/tests/parseExpr.test
@@ -1034,7 +1034,7 @@ test parseExpr-22.10 {Bug 3401704} -constraints testexprparser -body {
dict get $o -errorcode
} -result {TCL PARSE EXPR BADCHAR}
test parseExpr-22.11 {Bug 3401704} -constraints testexprparser -body {
- catch {testexprparser 1e-3_() -1} m o
+ catch {testexprparser 1e-3`() -1} m o
dict get $o -errorcode
} -result {TCL PARSE EXPR BADCHAR}
test parseExpr-22.12 {Bug 3401704} -constraints testexprparser -body {