From aa52afd6f9e2b2ca12b1f0ab8b2f4e15df71a645 Mon Sep 17 00:00:00 2001 From: griffin Date: Tue, 29 Aug 2023 01:51:53 +0000 Subject: fix for invalid numeric whitespace syntax bug. --- generic/tclStrToD.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++---- tests/get.test | 2 +- tests/parseExpr.test | 2 +- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 03a7ab7..e831590 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -766,6 +766,10 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } @@ -801,6 +805,10 @@ TclParseNumber( d = (c-'a'+10); } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } else { @@ -863,6 +871,10 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } else if (c != '1') { @@ -920,6 +932,10 @@ TclParseNumber( } else if ( ! isdigit(UCHAR(c))) { if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } @@ -959,16 +975,26 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } else if (flags & TCL_PARSE_INTEGER_ONLY) { goto endgame; } else if (c == '.') { - under = 0; + if (under) { + // Must be a digit before and after '_' + goto endgame; + } state = FRACTION; break; } else if (c == 'E' || c == 'e') { - under = 0; + if (under) { + // Must be a digit before and after '_' + goto endgame; + } state = EXPONENT_START; break; } @@ -1016,6 +1042,10 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } @@ -1029,12 +1059,18 @@ TclParseNumber( */ if (c == '+') { - under = 0; + if (under) { + // Must be a digit before and after '_' + goto endgame; + } state = EXPONENT_SIGNUM; break; } else if (c == '-') { exponentSignum = 1; - under = 0; + if (under) { + // Must be a digit before and after '_' + goto endgame; + } state = EXPONENT_SIGNUM; break; } @@ -1053,6 +1089,10 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } @@ -1078,6 +1118,10 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ + if (under) { + // No multiple '_' in a row + goto endgame; + } under = 1; break; } @@ -1235,6 +1279,9 @@ TclParseNumber( p = under ? acceptPoint-1 : acceptPoint; len = under ? acceptLen-1 : acceptLen; + // No trailing '_' allowed + status = under ? TCL_ERROR : TCL_OK; + if (!(flags & TCL_PARSE_NO_WHITESPACE)) { /* * Accept trailing whitespace. diff --git a/tests/get.test b/tests/get.test index eb26484..baeda6e 100644 --- a/tests/get.test +++ b/tests/get.test @@ -113,7 +113,7 @@ test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { catch {testgetint $x} x set x } -} {0 10 2 33 1423324 10 255 7 8 2 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}} +} {0 10 2 33 {expected integer but got "14__23__32___4"} 10 255 7 8 2 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}} # cleanup ::tcltest::cleanupTests diff --git a/tests/parseExpr.test b/tests/parseExpr.test index b9245ce..476a250 100644 --- a/tests/parseExpr.test +++ b/tests/parseExpr.test @@ -1034,7 +1034,7 @@ test parseExpr-22.10 {Bug 3401704} -constraints testexprparser -body { dict get $o -errorcode } -result {TCL PARSE EXPR BADCHAR} test parseExpr-22.11 {Bug 3401704} -constraints testexprparser -body { - catch {testexprparser 1e-3_() -1} m o + catch {testexprparser 1e-3`() -1} m o dict get $o -errorcode } -result {TCL PARSE EXPR BADCHAR} test parseExpr-22.12 {Bug 3401704} -constraints testexprparser -body { -- cgit v0.12 From 242cba5fc07c4c01aeada2e3dd878e0b416d6123 Mon Sep 17 00:00:00 2001 From: griffin Date: Wed, 30 Aug 2023 22:06:57 +0000 Subject: fix several issues with numeric whitespace by limiting where '_' is allowed. --- generic/tclStrToD.c | 176 ++++++++++++++++++++++++++++------------------------ tests/get.test | 9 +-- 2 files changed, 99 insertions(+), 86 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index e831590..d4cb81a 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -503,6 +503,30 @@ TclParseNumber( } state = INITIAL; enum State acceptState = INITIAL; + /* identify which numeric whitespace to ignore/skip + * and which to flag as illegal. + * + * The one and only legal position for '_' in an integer value. + * Similar for real values in that the numeric whitespace must + * begin and end with a digit only. + * ---------------------------------\ + * V + * (0b|0o|0d|0x)?(:digit:)|(:digit:+_*:digit:+) + * Note: :digit: set depends on base: + * {}|d :: [0-9] + * o :: [0-7] + * b :: [0-1] + * x :: [0-9a-fA-F] + */ + enum UnderState { + NO_NUM_WS, /* initial/off state */ + NUM_WS, /* on state (general check) */ + NUM_WS_B, /* numeric space illegal after '0b' */ + NUM_WS_O, /* numeric space illegal after '0o' */ + NUM_WS_D, /* numeric space illegal after '0d' */ + NUM_WS_X /* numeric space illegal after '0x' */ + } under = NO_NUM_WS; + int signum = 0; /* Sign of the number being parsed. */ Tcl_WideUInt significandWide = 0; /* Significand of the number being parsed (if @@ -539,8 +563,6 @@ TclParseNumber( * to avoid a compiler warning. */ int shift = 0; /* Amount to shift when accumulating binary */ mp_err err = MP_OKAY; - int under = 0; /* Flag trailing '_' as error if true once - * number is accepted. */ #define MOST_BITS (UWIDE_MAX >> 1) @@ -648,10 +670,12 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == 'x' || c == 'X') { - if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) || under) { + if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) + || under != NO_NUM_WS) { goto endgame; } state = ZERO_X; + under = NUM_WS_X; /* check for '_' after 0x */ break; } if (flags & TCL_PARSE_HEXADECIMAL_ONLY) { @@ -661,10 +685,12 @@ TclParseNumber( goto zeroo; } if (c == 'b' || c == 'B') { - if ((flags & TCL_PARSE_OCTAL_ONLY) || under) { + if ((flags & TCL_PARSE_OCTAL_ONLY) + || under != NO_NUM_WS) { goto endgame; } state = ZERO_B; + under = NUM_WS_B; /* check for '_' after 0b */ break; } if (flags & TCL_PARSE_BINARY_ONLY) { @@ -675,6 +701,7 @@ TclParseNumber( goto endgame; } state = ZERO_O; + under = NUM_WS_O; /* check for '_' after 0o */ break; } if (c == 'd' || c == 'D') { @@ -682,6 +709,7 @@ TclParseNumber( goto endgame; } state = ZERO_D; + under= NUM_WS_D; /* check for '_' after 0d */ break; } goto decimal; @@ -691,21 +719,24 @@ TclParseNumber( * Scanned an optional + or -, followed by a string of octal * digits. Acceptable inputs are more digits, period, or E. If 8 * or 9 is encountered, commit to floating point. + * + * Don't advance acceptPoint while passing over + * numeric whitespace */ acceptState = state; - acceptPoint = p; + acceptPoint = under ? acceptPoint : p; acceptLen = len; /* FALLTHROUGH */ case ZERO_O: zeroo: if (c == '0') { numTrailZeros++; - under = 0; + under = NO_NUM_WS; state = OCTAL; break; } else if (c >= '1' && c <= '7') { - under = 0; + under = NO_NUM_WS; if (objPtr != NULL) { shift = 3 * (numTrailZeros + 1); significandOverflow = AccumulateDecimalDigit( @@ -766,11 +797,11 @@ TclParseNumber( break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row + if (under == NUM_WS_O) { + // No '_' after 0o goto endgame; } - under = 1; + under = NUM_WS; break; } goto endgame; @@ -779,11 +810,14 @@ TclParseNumber( * Scanned 0x. If state is HEXADECIMAL, scanned at least one * character following the 0x. The only acceptable inputs are * hexadecimal digits. + * + * Don't advance acceptPoint while passing over + * numeric whitespace */ case HEXADECIMAL: acceptState = state; - acceptPoint = p; + acceptPoint = under == NUM_WS ? acceptPoint : p; acceptLen = len; /* FALLTHROUGH */ @@ -791,25 +825,25 @@ TclParseNumber( zerox: if (c == '0') { numTrailZeros++; - under = 0; + under = NO_NUM_WS; state = HEXADECIMAL; break; } else if (isdigit(UCHAR(c))) { - under = 0; + under = NO_NUM_WS; d = (c-'0'); } else if (c >= 'A' && c <= 'F') { - under = 0; + under = NO_NUM_WS; d = (c-'A'+10); } else if (c >= 'a' && c <= 'f') { - under = 0; + under = NO_NUM_WS; d = (c-'a'+10); } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row + if (under == NUM_WS_X) { + // No '_' after 0x goto endgame; } - under = 1; + under = NUM_WS; break; } else { goto endgame; @@ -866,21 +900,21 @@ TclParseNumber( zerob: if (c == '0') { numTrailZeros++; - under = 0; + under = NO_NUM_WS; state = BINARY; break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row + if (under == NUM_WS_B) { + // No '_' after 0b goto endgame; } - under = 1; + under = NUM_WS; break; } else if (c != '1') { goto endgame; } else { - under = 0; + under = NO_NUM_WS; } if (objPtr != NULL) { shift = numTrailZeros + 1; @@ -927,21 +961,21 @@ TclParseNumber( case ZERO_D: if (c == '0') { - under = 0; + under = NO_NUM_WS; numTrailZeros++; } else if ( ! isdigit(UCHAR(c))) { if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row + if (under == NUM_WS_D) { + // No '_' after 0d goto endgame; } - under = 1; + under = NUM_WS; break; } goto endgame; } - under = 0; + under = NO_NUM_WS; state = DECIMAL; flags |= TCL_PARSE_INTEGER_ONLY; /* FALLTHROUGH */ @@ -950,15 +984,18 @@ TclParseNumber( /* * Scanned an optional + or - followed by a string of decimal * digits. + * + * Don't advance acceptPoint while passing over + * numeric whitespace */ decimal: acceptState = state; - acceptPoint = p; + acceptPoint = under == NUM_WS ? acceptPoint : p; acceptLen = len; if (c == '0') { numTrailZeros++; - under = 0; + under = NO_NUM_WS; state = DECIMAL; break; } else if (isdigit(UCHAR(c))) { @@ -970,28 +1007,24 @@ TclParseNumber( } numSigDigs += numTrailZeros+1; numTrailZeros = 0; - under = 0; + under = NO_NUM_WS; state = DECIMAL; break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row - goto endgame; - } - under = 1; + under = NUM_WS; break; } else if (flags & TCL_PARSE_INTEGER_ONLY) { goto endgame; } else if (c == '.') { - if (under) { + if (under == NUM_WS) { // Must be a digit before and after '_' goto endgame; } state = FRACTION; break; } else if (c == 'E' || c == 'e') { - if (under) { + if (under == NUM_WS) { // Must be a digit before and after '_' goto endgame; } @@ -1020,7 +1053,7 @@ TclParseNumber( if (c == '0') { numDigitsAfterDp++; numTrailZeros++; - under = 0; + under = NO_NUM_WS; state = FRACTION; break; } else if (isdigit(UCHAR(c))) { @@ -1037,17 +1070,9 @@ TclParseNumber( numSigDigs = 1; } numTrailZeros = 0; - under = 0; + under = NO_NUM_WS; state = FRACTION; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row - goto endgame; - } - under = 1; - break; } goto endgame; @@ -1059,7 +1084,7 @@ TclParseNumber( */ if (c == '+') { - if (under) { + if (under == NUM_WS) { // Must be a digit before and after '_' goto endgame; } @@ -1067,7 +1092,7 @@ TclParseNumber( break; } else if (c == '-') { exponentSignum = 1; - if (under) { + if (under == NUM_WS) { // Must be a digit before and after '_' goto endgame; } @@ -1084,17 +1109,9 @@ TclParseNumber( if (isdigit(UCHAR(c))) { exponent = c - '0'; - under = 0; + under = NO_NUM_WS; state = EXPONENT; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row - goto endgame; - } - under = 1; - break; } goto endgame; @@ -1113,16 +1130,11 @@ TclParseNumber( } else { exponent = LONG_MAX; } - under = 0; + under = NO_NUM_WS; state = EXPONENT; break; } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under) { - // No multiple '_' in a row - goto endgame; - } - under = 1; + under = NUM_WS; break; } goto endgame; @@ -1134,14 +1146,14 @@ TclParseNumber( case sI: if (c == 'n' || c == 'N') { - under = 0; + under = NO_NUM_WS; state = sIN; break; } goto endgame; case sIN: if (c == 'f' || c == 'F') { - under = 0; + under = NO_NUM_WS; state = sINF; break; } @@ -1150,7 +1162,7 @@ TclParseNumber( acceptState = state; acceptPoint = p; acceptLen = len; - under = 0; + under = NO_NUM_WS; if (c == 'i' || c == 'I') { state = sINFI; break; @@ -1158,28 +1170,28 @@ TclParseNumber( goto endgame; case sINFI: if (c == 'n' || c == 'N') { - under = 0; + under = NO_NUM_WS; state = sINFIN; break; } goto endgame; case sINFIN: if (c == 'i' || c == 'I') { - under = 0; + under = NO_NUM_WS; state = sINFINI; break; } goto endgame; case sINFINI: if (c == 't' || c == 'T') { - under = 0; + under = NO_NUM_WS; state = sINFINIT; break; } goto endgame; case sINFINIT: if (c == 'y' || c == 'Y') { - under = 0; + under = NO_NUM_WS; state = sINFINITY; break; } @@ -1191,14 +1203,14 @@ TclParseNumber( #ifdef IEEE_FLOATING_POINT case sN: if (c == 'a' || c == 'A') { - under = 0; + under = NO_NUM_WS; state = sNA; break; } goto endgame; case sNA: if (c == 'n' || c == 'N') { - under = 0; + under = NO_NUM_WS; state = sNAN; break; } @@ -1208,7 +1220,7 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == '(') { - under = 0; + under = NO_NUM_WS; state = sNANPAREN; break; } @@ -1219,14 +1231,14 @@ TclParseNumber( */ case sNANHEX: if (c == ')') { - under = 0; + under = NO_NUM_WS; state = sNANFINISH; break; } /* FALLTHROUGH */ case sNANPAREN: if (TclIsSpaceProcM(c)) { - under = 0; + under = NO_NUM_WS; break; } if (numSigDigs < 13) { @@ -1241,7 +1253,7 @@ TclParseNumber( } numSigDigs++; significandWide = (significandWide << 4) + d; - under = 0; + under = NO_NUM_WS; state = sNANHEX; break; } @@ -1276,11 +1288,11 @@ TclParseNumber( * backup to that. */ - p = under ? acceptPoint-1 : acceptPoint; - len = under ? acceptLen-1 : acceptLen; + p = under == NUM_WS ? acceptPoint-1 : acceptPoint; + len = under == NUM_WS ? acceptLen-1 : acceptLen; // No trailing '_' allowed - status = under ? TCL_ERROR : TCL_OK; + status = under == NUM_WS ? TCL_ERROR : TCL_OK; if (!(flags & TCL_PARSE_NO_WHITESPACE)) { /* diff --git a/tests/get.test b/tests/get.test index baeda6e..a742dd3 100644 --- a/tests/get.test +++ b/tests/get.test @@ -103,17 +103,18 @@ test get-3.3 {tcl_GetInt with iffy numbers} testgetint { } } {44 44 44 44 54 54 52 46} test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj { - lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10"} { + lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10" _1.0e+2 1_.0e+2 1._0e+2 1.0_e+2 1.0e_+2 1.0e+_2 1.0e+2_ 1_1.0e+0_2 2__2.0e+2__2} { catch {testdoubleobj set 1 $x} x set x } -} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0} +} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0 {expected floating-point number but got "_1.0e+2"} {expected floating-point number but got "1_.0e+2"} {expected floating-point number but got "1._0e+2"} {expected floating-point number but got "1.0_e+2"} {expected floating-point number but got "1.0e_+2"} {expected floating-point number but got "1.0e+_2"} {expected floating-point number but got "1.0e+2_"} 1100.0 2.2e+23} + test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { - lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x_a " 0b1111_1111 " 0_07 " " 0o_1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 } { + lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x0_a " 0b1111_1111 " 0_07 " " 0o1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 0x_b 0o_2_0} { catch {testgetint $x} x set x } -} {0 10 2 33 {expected integer but got "14__23__32___4"} 10 255 7 8 2 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}} +} {0 10 2 33 1423324 10 255 7 8 {expected integer but got " 0b_1_0 "} {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"} {expected integer but got "0x_b"} {expected integer but got "0o_2_0"}} # cleanup ::tcltest::cleanupTests -- cgit v0.12 From 618269ede0e592e5e250f7b30d3026592a7b0444 Mon Sep 17 00:00:00 2001 From: griffin Date: Fri, 1 Sep 2023 01:15:50 +0000 Subject: Rewrite how numeric whitespace is processed in TclParseNumber. Add more number test cases. --- generic/tclStrToD.c | 240 ++++++++++++++++++++++------------------------------ tests/get.test | 5 +- 2 files changed, 102 insertions(+), 143 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index d4cb81a..31f33fc 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -371,6 +371,16 @@ static double SafeLdExp(double fraction, int exponent); #ifdef IEEE_FLOATING_POINT static Tcl_WideUInt Nokia770Twiddle(Tcl_WideUInt w); #endif + +static inline int +isHexDigit(int u) +{ + int c = UCHAR(u); + return (isdigit(c) + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f')); +} + /* *---------------------------------------------------------------------- @@ -503,30 +513,6 @@ TclParseNumber( } state = INITIAL; enum State acceptState = INITIAL; - /* identify which numeric whitespace to ignore/skip - * and which to flag as illegal. - * - * The one and only legal position for '_' in an integer value. - * Similar for real values in that the numeric whitespace must - * begin and end with a digit only. - * ---------------------------------\ - * V - * (0b|0o|0d|0x)?(:digit:)|(:digit:+_*:digit:+) - * Note: :digit: set depends on base: - * {}|d :: [0-9] - * o :: [0-7] - * b :: [0-1] - * x :: [0-9a-fA-F] - */ - enum UnderState { - NO_NUM_WS, /* initial/off state */ - NUM_WS, /* on state (general check) */ - NUM_WS_B, /* numeric space illegal after '0b' */ - NUM_WS_O, /* numeric space illegal after '0o' */ - NUM_WS_D, /* numeric space illegal after '0d' */ - NUM_WS_X /* numeric space illegal after '0x' */ - } under = NO_NUM_WS; - int signum = 0; /* Sign of the number being parsed. */ Tcl_WideUInt significandWide = 0; /* Significand of the number being parsed (if @@ -595,6 +581,87 @@ TclParseNumber( acceptLen = len; while (1) { char c = len ? *p : '\0'; + + /* + * Filter out Numeric Whitespace. Expects: + * + * ::digit:: '_' ::digit:: + * + * Verify current '_' is ok, then move on to next character, + * otherwise follow through on to error. + */ + if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + const char *before, *after; + + if (p==bytes) { + /* Not allowed at beginning */ + goto endgame; + } + /* + * span multiple numeric whitespace + * V + * example: 5___6 + */ + for (before=(p-1); + (before && *before=='_'); + before=(before>p ? (before-1):NULL)); + for (after=(p+1); + (after && *after && *after=='_'); + after=(*after&&*after=='_')?(after+1):NULL); + + switch (state) { + case ZERO_B: + case BINARY: + if ((before && (*before != '0' && *before != '1')) || + (after && (*after != '0' && *after != '1'))) { + /* Not a valid digit */ + goto endgame; + } + break; + case ZERO_O: + case OCTAL: + if (((!before || (*before < '0' || '7' < *before))) || + ((!before || (*after < '0' || '7' < *after)))) { + goto endgame; + } + break; + case FRACTION: + case ZERO: + case ZERO_D: + case DECIMAL: + case LEADING_RADIX_POINT: + case EXPONENT_START: + case EXPONENT_SIGNUM: + case EXPONENT: + if ((!before || isdigit(UCHAR(*before))) && + (!after || isdigit(UCHAR(*after)))) { + break; + } + if (after && *after=='(') { + /* could be function */ + goto continue_num; + } + goto endgame; + case ZERO_X: + case HEXADECIMAL: + if ( (!before || isHexDigit(*before)) && + (!after || isHexDigit(*after))) { + break; + } + goto endgame; + default: + /* + * Not whitespace, but could be legal for other reasons. + * Continue number processing for current character. + */ + goto continue_num; + } + + /* Valid whitespace found, move on to the next character */ + goto next; + } + + continue_num: switch (state) { case INITIAL: @@ -670,12 +737,10 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == 'x' || c == 'X') { - if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) - || under != NO_NUM_WS) { + if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY)) { goto endgame; } state = ZERO_X; - under = NUM_WS_X; /* check for '_' after 0x */ break; } if (flags & TCL_PARSE_HEXADECIMAL_ONLY) { @@ -685,31 +750,21 @@ TclParseNumber( goto zeroo; } if (c == 'b' || c == 'B') { - if ((flags & TCL_PARSE_OCTAL_ONLY) - || under != NO_NUM_WS) { + if ((flags & TCL_PARSE_OCTAL_ONLY)) { goto endgame; } state = ZERO_B; - under = NUM_WS_B; /* check for '_' after 0b */ break; } if (flags & TCL_PARSE_BINARY_ONLY) { goto zerob; } if (c == 'o' || c == 'O') { - if (under) { - goto endgame; - } state = ZERO_O; - under = NUM_WS_O; /* check for '_' after 0o */ break; } if (c == 'd' || c == 'D') { - if (under) { - goto endgame; - } state = ZERO_D; - under= NUM_WS_D; /* check for '_' after 0d */ break; } goto decimal; @@ -719,24 +774,19 @@ TclParseNumber( * Scanned an optional + or -, followed by a string of octal * digits. Acceptable inputs are more digits, period, or E. If 8 * or 9 is encountered, commit to floating point. - * - * Don't advance acceptPoint while passing over - * numeric whitespace */ acceptState = state; - acceptPoint = under ? acceptPoint : p; + acceptPoint = p; acceptLen = len; /* FALLTHROUGH */ case ZERO_O: zeroo: if (c == '0') { numTrailZeros++; - under = NO_NUM_WS; state = OCTAL; break; } else if (c >= '1' && c <= '7') { - under = NO_NUM_WS; if (objPtr != NULL) { shift = 3 * (numTrailZeros + 1); significandOverflow = AccumulateDecimalDigit( @@ -795,14 +845,6 @@ TclParseNumber( numTrailZeros = 0; state = OCTAL; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under == NUM_WS_O) { - // No '_' after 0o - goto endgame; - } - under = NUM_WS; - break; } goto endgame; @@ -810,14 +852,11 @@ TclParseNumber( * Scanned 0x. If state is HEXADECIMAL, scanned at least one * character following the 0x. The only acceptable inputs are * hexadecimal digits. - * - * Don't advance acceptPoint while passing over - * numeric whitespace */ case HEXADECIMAL: acceptState = state; - acceptPoint = under == NUM_WS ? acceptPoint : p; + acceptPoint = p; acceptLen = len; /* FALLTHROUGH */ @@ -825,26 +864,14 @@ TclParseNumber( zerox: if (c == '0') { numTrailZeros++; - under = NO_NUM_WS; state = HEXADECIMAL; break; } else if (isdigit(UCHAR(c))) { - under = NO_NUM_WS; d = (c-'0'); } else if (c >= 'A' && c <= 'F') { - under = NO_NUM_WS; d = (c-'A'+10); } else if (c >= 'a' && c <= 'f') { - under = NO_NUM_WS; d = (c-'a'+10); - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under == NUM_WS_X) { - // No '_' after 0x - goto endgame; - } - under = NUM_WS; - break; } else { goto endgame; } @@ -900,21 +927,10 @@ TclParseNumber( zerob: if (c == '0') { numTrailZeros++; - under = NO_NUM_WS; state = BINARY; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under == NUM_WS_B) { - // No '_' after 0b - goto endgame; - } - under = NUM_WS; - break; } else if (c != '1') { goto endgame; - } else { - under = NO_NUM_WS; } if (objPtr != NULL) { shift = numTrailZeros + 1; @@ -961,21 +977,10 @@ TclParseNumber( case ZERO_D: if (c == '0') { - under = NO_NUM_WS; numTrailZeros++; } else if ( ! isdigit(UCHAR(c))) { - if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - if (under == NUM_WS_D) { - // No '_' after 0d - goto endgame; - } - under = NUM_WS; - break; - } goto endgame; } - under = NO_NUM_WS; state = DECIMAL; flags |= TCL_PARSE_INTEGER_ONLY; /* FALLTHROUGH */ @@ -984,18 +989,14 @@ TclParseNumber( /* * Scanned an optional + or - followed by a string of decimal * digits. - * - * Don't advance acceptPoint while passing over - * numeric whitespace */ decimal: acceptState = state; - acceptPoint = under == NUM_WS ? acceptPoint : p; + acceptPoint = p; acceptLen = len; if (c == '0') { numTrailZeros++; - under = NO_NUM_WS; state = DECIMAL; break; } else if (isdigit(UCHAR(c))) { @@ -1007,27 +1008,14 @@ TclParseNumber( } numSigDigs += numTrailZeros+1; numTrailZeros = 0; - under = NO_NUM_WS; state = DECIMAL; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - /* Ignore numeric "white space" */ - under = NUM_WS; - break; } else if (flags & TCL_PARSE_INTEGER_ONLY) { goto endgame; } else if (c == '.') { - if (under == NUM_WS) { - // Must be a digit before and after '_' - goto endgame; - } state = FRACTION; break; } else if (c == 'E' || c == 'e') { - if (under == NUM_WS) { - // Must be a digit before and after '_' - goto endgame; - } state = EXPONENT_START; break; } @@ -1053,7 +1041,6 @@ TclParseNumber( if (c == '0') { numDigitsAfterDp++; numTrailZeros++; - under = NO_NUM_WS; state = FRACTION; break; } else if (isdigit(UCHAR(c))) { @@ -1070,7 +1057,6 @@ TclParseNumber( numSigDigs = 1; } numTrailZeros = 0; - under = NO_NUM_WS; state = FRACTION; break; } @@ -1084,18 +1070,10 @@ TclParseNumber( */ if (c == '+') { - if (under == NUM_WS) { - // Must be a digit before and after '_' - goto endgame; - } state = EXPONENT_SIGNUM; break; } else if (c == '-') { exponentSignum = 1; - if (under == NUM_WS) { - // Must be a digit before and after '_' - goto endgame; - } state = EXPONENT_SIGNUM; break; } @@ -1109,7 +1087,6 @@ TclParseNumber( if (isdigit(UCHAR(c))) { exponent = c - '0'; - under = NO_NUM_WS; state = EXPONENT; break; } @@ -1130,12 +1107,8 @@ TclParseNumber( } else { exponent = LONG_MAX; } - under = NO_NUM_WS; state = EXPONENT; break; - } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { - under = NUM_WS; - break; } goto endgame; @@ -1146,14 +1119,12 @@ TclParseNumber( case sI: if (c == 'n' || c == 'N') { - under = NO_NUM_WS; state = sIN; break; } goto endgame; case sIN: if (c == 'f' || c == 'F') { - under = NO_NUM_WS; state = sINF; break; } @@ -1162,7 +1133,6 @@ TclParseNumber( acceptState = state; acceptPoint = p; acceptLen = len; - under = NO_NUM_WS; if (c == 'i' || c == 'I') { state = sINFI; break; @@ -1170,28 +1140,24 @@ TclParseNumber( goto endgame; case sINFI: if (c == 'n' || c == 'N') { - under = NO_NUM_WS; state = sINFIN; break; } goto endgame; case sINFIN: if (c == 'i' || c == 'I') { - under = NO_NUM_WS; state = sINFINI; break; } goto endgame; case sINFINI: if (c == 't' || c == 'T') { - under = NO_NUM_WS; state = sINFINIT; break; } goto endgame; case sINFINIT: if (c == 'y' || c == 'Y') { - under = NO_NUM_WS; state = sINFINITY; break; } @@ -1203,14 +1169,12 @@ TclParseNumber( #ifdef IEEE_FLOATING_POINT case sN: if (c == 'a' || c == 'A') { - under = NO_NUM_WS; state = sNA; break; } goto endgame; case sNA: if (c == 'n' || c == 'N') { - under = NO_NUM_WS; state = sNAN; break; } @@ -1220,7 +1184,6 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == '(') { - under = NO_NUM_WS; state = sNANPAREN; break; } @@ -1231,14 +1194,12 @@ TclParseNumber( */ case sNANHEX: if (c == ')') { - under = NO_NUM_WS; state = sNANFINISH; break; } /* FALLTHROUGH */ case sNANPAREN: if (TclIsSpaceProcM(c)) { - under = NO_NUM_WS; break; } if (numSigDigs < 13) { @@ -1253,7 +1214,6 @@ TclParseNumber( } numSigDigs++; significandWide = (significandWide << 4) + d; - under = NO_NUM_WS; state = sNANHEX; break; } @@ -1267,6 +1227,7 @@ TclParseNumber( acceptLen = len; goto endgame; } + next: p++; len--; } @@ -1288,11 +1249,8 @@ TclParseNumber( * backup to that. */ - p = under == NUM_WS ? acceptPoint-1 : acceptPoint; - len = under == NUM_WS ? acceptLen-1 : acceptLen; - - // No trailing '_' allowed - status = under == NUM_WS ? TCL_ERROR : TCL_OK; + p = acceptPoint; + len = acceptLen; if (!(flags & TCL_PARSE_NO_WHITESPACE)) { /* diff --git a/tests/get.test b/tests/get.test index a742dd3..2545ac3 100644 --- a/tests/get.test +++ b/tests/get.test @@ -102,12 +102,13 @@ test get-3.3 {tcl_GetInt with iffy numbers} testgetint { set x } } {44 44 44 44 54 54 52 46} + test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj { - lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10" _1.0e+2 1_.0e+2 1._0e+2 1.0_e+2 1.0e_+2 1.0e+_2 1.0e+2_ 1_1.0e+0_2 2__2.0e+2__2} { + lmap x {0 0.0 " .0" ".0 " " 0e0 " "09" "- 0" "-0" "0o12" "0b10" "2_0.3_4e+1_5" _1.0e+2 1_.0e+2 1._0e+2 1.0_e+2 1.0e_+2 1.0e+_2 1.0e+2_ 1_1.0e+0_2 2__2.0e+2__2 54321________} { catch {testdoubleobj set 1 $x} x set x } -} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0 {expected floating-point number but got "_1.0e+2"} {expected floating-point number but got "1_.0e+2"} {expected floating-point number but got "1._0e+2"} {expected floating-point number but got "1.0_e+2"} {expected floating-point number but got "1.0e_+2"} {expected floating-point number but got "1.0e+_2"} {expected floating-point number but got "1.0e+2_"} 1100.0 2.2e+23} +} {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0 20340000000000000.0 {expected floating-point number but got "_1.0e+2"} {expected floating-point number but got "1_.0e+2"} {expected floating-point number but got "1._0e+2"} {expected floating-point number but got "1.0_e+2"} {expected floating-point number but got "1.0e_+2"} {expected floating-point number but got "1.0e+_2"} {expected floating-point number but got "1.0e+2_"} 1100.0 2.2e+23 {expected floating-point number but got "54321________"}} test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x0_a " 0b1111_1111 " 0_07 " " 0o1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 0x_b 0o_2_0} { -- cgit v0.12 From 4f5685bc2e489f3cf31579319479a3edd627d54c Mon Sep 17 00:00:00 2001 From: griffin Date: Fri, 1 Sep 2023 02:26:56 +0000 Subject: Use system isxdigit. --- generic/tclStrToD.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 31f33fc..4912441 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -371,16 +371,6 @@ static double SafeLdExp(double fraction, int exponent); #ifdef IEEE_FLOATING_POINT static Tcl_WideUInt Nokia770Twiddle(Tcl_WideUInt w); #endif - -static inline int -isHexDigit(int u) -{ - int c = UCHAR(u); - return (isdigit(c) - || (c >= 'A' && c <= 'F') - || (c >= 'a' && c <= 'f')); -} - /* *---------------------------------------------------------------------- @@ -644,8 +634,8 @@ TclParseNumber( goto endgame; case ZERO_X: case HEXADECIMAL: - if ( (!before || isHexDigit(*before)) && - (!after || isHexDigit(*after))) { + if ( (!before || isxdigit(*before)) && + (!after || isxdigit(*after))) { break; } goto endgame; -- cgit v0.12 From ebec0ccb97e3a19b3a71c77fda3dbb33f0231c59 Mon Sep 17 00:00:00 2001 From: griffin Date: Fri, 1 Sep 2023 15:17:41 +0000 Subject: Fix bug with octal. Add floating point example to doc. --- doc/expr.n | 1 + generic/tclStrToD.c | 4 ++-- tests/get.test | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/expr.n b/doc/expr.n index e9b30a5..f6d17db 100644 --- a/doc/expr.n +++ b/doc/expr.n @@ -85,6 +85,7 @@ end of a numeric value. Here are some examples: \fBexpr\fR 100_000_000 \fI100000000\fR \fBexpr\fR 0xffff_ffff \fI4294967295\fR \fBformat\fR 0x%x 0b1111_1110_1101_1011 \fI0xfedb\fR +\fBexpr\fR 3_141_592_653_589e-1_2 \fI3.141592653589 .CE .RE diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 4912441..36261c3 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -610,8 +610,8 @@ TclParseNumber( break; case ZERO_O: case OCTAL: - if (((!before || (*before < '0' || '7' < *before))) || - ((!before || (*after < '0' || '7' < *after)))) { + if (((before && (*before < '0' || '7' < *before))) || + ((after && (*after < '0' || '7' < *after)))) { goto endgame; } break; diff --git a/tests/get.test b/tests/get.test index 2545ac3..66c9c3a 100644 --- a/tests/get.test +++ b/tests/get.test @@ -111,11 +111,11 @@ test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj { } {0.0 0.0 0.0 0.0 0.0 9.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0 20340000000000000.0 {expected floating-point number but got "_1.0e+2"} {expected floating-point number but got "1_.0e+2"} {expected floating-point number but got "1._0e+2"} {expected floating-point number but got "1.0_e+2"} {expected floating-point number but got "1.0e_+2"} {expected floating-point number but got "1.0e+_2"} {expected floating-point number but got "1.0e+2_"} 1100.0 2.2e+23 {expected floating-point number but got "54321________"}} test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { - lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x0_a " 0b1111_1111 " 0_07 " " 0o1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 0x_b 0o_2_0} { + lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x0_a " 0b1111_1111 " 0_07 " " 0o1_0 " " 0b_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 0x_b 0o_2_0 0o2__3_4} { catch {testgetint $x} x set x } -} {0 10 2 33 1423324 10 255 7 8 {expected integer but got " 0b_1_0 "} {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"} {expected integer but got "0x_b"} {expected integer but got "0o_2_0"}} +} {0 10 2 33 1423324 10 255 7 8 {expected integer but got " 0b_1_0 "} {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"} {expected integer but got "0x_b"} {expected integer but got "0o_2_0"} 156} # cleanup ::tcltest::cleanupTests -- cgit v0.12 From 12d09ed4d9bb9034f455ec89ca2d0c12a426d25c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 1 Sep 2023 17:03:36 +0000 Subject: Always use UCHAR() with isxdigit(). doc fix --- doc/expr.n | 2 +- generic/tclStrToD.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/expr.n b/doc/expr.n index f6d17db..3b8f1a8 100644 --- a/doc/expr.n +++ b/doc/expr.n @@ -85,7 +85,7 @@ end of a numeric value. Here are some examples: \fBexpr\fR 100_000_000 \fI100000000\fR \fBexpr\fR 0xffff_ffff \fI4294967295\fR \fBformat\fR 0x%x 0b1111_1110_1101_1011 \fI0xfedb\fR -\fBexpr\fR 3_141_592_653_589e-1_2 \fI3.141592653589 +\fBexpr\fR 3_141_592_653_589e-1_2 \fI3.141592653589\fR .CE .RE diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 36261c3..8f69627 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -634,8 +634,8 @@ TclParseNumber( goto endgame; case ZERO_X: case HEXADECIMAL: - if ( (!before || isxdigit(*before)) && - (!after || isxdigit(*after))) { + if ( (!before || isxdigit(UCHAR(*before))) && + (!after || isxdigit(UCHAR(*after)))) { break; } goto endgame; -- cgit v0.12