From e38608eeb31429e3684101d8bcf151ea5fe29ed4 Mon Sep 17 00:00:00 2001 From: griffin Date: Sat, 7 Dec 2019 05:52:14 +0000 Subject: Initial implementation for TIP-551 Permit underscores in numeric literals --- generic/tclStrToD.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++--- tests/get.test | 6 ++++ 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 0e35dbf..f1bf0c6 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -529,7 +529,9 @@ TclParseNumber( * to avoid a compiler warning. */ int shift = 0; /* Amount to shift when accumulating binary */ int explicitOctal = 0; - + int under = 0; /* Flag trailing '_' as error if true once + * number is accepted. */ + #define ALL_BITS ((Tcl_WideUInt)-1) #define MOST_BITS (ALL_BITS >> 1) @@ -637,6 +639,7 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == 'x' || c == 'X') { + under = 0; if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY)) { goto endgame; } @@ -650,6 +653,7 @@ TclParseNumber( goto zeroo; } if (c == 'b' || c == 'B') { + under = 0; if (flags & TCL_PARSE_OCTAL_ONLY) { goto endgame; } @@ -661,10 +665,12 @@ TclParseNumber( } if (c == 'o' || c == 'O') { explicitOctal = 1; + under = 0; state = ZERO_O; break; } if (c == 'd' || c == 'D') { + under = 0; state = ZERO_D; break; } @@ -688,9 +694,11 @@ TclParseNumber( zeroo: if (c == '0') { numTrailZeros++; + under = 0; state = OCTAL; break; } else if (c >= '1' && c <= '7') { + under = 0; if (objPtr != NULL) { shift = 3 * (numTrailZeros + 1); significandOverflow = AccumulateDecimalDigit( @@ -733,6 +741,10 @@ TclParseNumber( numTrailZeros = 0; state = OCTAL; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } /* FALLTHROUGH */ @@ -761,6 +773,7 @@ TclParseNumber( if (c == '0') { numTrailZeros++; + under = 0; state = BAD_OCTAL; break; } else if (isdigit(UCHAR(c))) { @@ -776,12 +789,15 @@ TclParseNumber( numSigDigs = 1; } numTrailZeros = 0; + under = 0; state = BAD_OCTAL; break; } else if (c == '.') { + under = 0; state = FRACTION; break; } else if (c == 'E' || c == 'e') { + under = 0; state = EXPONENT_START; break; } @@ -804,14 +820,22 @@ TclParseNumber( zerox: if (c == '0') { numTrailZeros++; + under = 0; state = HEXADECIMAL; break; } else if (isdigit(UCHAR(c))) { + under = 0; d = (c-'0'); } else if (c >= 'A' && c <= 'F') { + under = 0; d = (c-'A'+10); } else if (c >= 'a' && c <= 'f') { + under = 0; d = (c-'a'+10); + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } else { goto endgame; } @@ -851,8 +875,13 @@ TclParseNumber( zerob: if (c == '0') { numTrailZeros++; + under = 0; state = BINARY; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } else if (c != '1') { goto endgame; } @@ -886,10 +915,17 @@ TclParseNumber( case ZERO_D: if (c == '0') { + under = 0; numTrailZeros++; } else if ( ! isdigit(UCHAR(c))) { + if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; + } goto endgame; } + under = 0; state = DECIMAL; flags |= TCL_PARSE_INTEGER_ONLY; /* FALLTHROUGH */ @@ -908,6 +944,7 @@ TclParseNumber( acceptLen = len; if (c == '0') { numTrailZeros++; + under = 0; state = DECIMAL; break; } else if (isdigit(UCHAR(c))) { @@ -919,14 +956,21 @@ TclParseNumber( } numSigDigs += numTrailZeros+1; numTrailZeros = 0; + under = 0; state = DECIMAL; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } else if (flags & TCL_PARSE_INTEGER_ONLY) { goto endgame; } else if (c == '.') { + under = 0; state = FRACTION; break; } else if (c == 'E' || c == 'e') { + under = 0; state = EXPONENT_START; break; } @@ -952,6 +996,7 @@ TclParseNumber( if (c == '0') { numDigitsAfterDp++; numTrailZeros++; + under = 0; state = FRACTION; break; } else if (isdigit(UCHAR(c))) { @@ -968,8 +1013,13 @@ TclParseNumber( numSigDigs = 1; } numTrailZeros = 0; + under = 0; state = FRACTION; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; @@ -981,10 +1031,12 @@ TclParseNumber( */ if (c == '+') { + under = 0; state = EXPONENT_SIGNUM; break; } else if (c == '-') { exponentSignum = 1; + under = 0; state = EXPONENT_SIGNUM; break; } @@ -998,8 +1050,13 @@ TclParseNumber( if (isdigit(UCHAR(c))) { exponent = c - '0'; + under = 0; state = EXPONENT; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; @@ -1018,8 +1075,13 @@ TclParseNumber( } else { exponent = LONG_MAX; } + under = 0; state = EXPONENT; break; + } else if (c == '_') { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; @@ -1030,12 +1092,14 @@ TclParseNumber( case sI: if (c == 'n' || c == 'N') { + under = 0; state = sIN; break; } goto endgame; case sIN: if (c == 'f' || c == 'F') { + under = 0; state = sINF; break; } @@ -1044,6 +1108,7 @@ TclParseNumber( acceptState = state; acceptPoint = p; acceptLen = len; + under = 0; if (c == 'i' || c == 'I') { state = sINFI; break; @@ -1051,24 +1116,28 @@ TclParseNumber( goto endgame; case sINFI: if (c == 'n' || c == 'N') { + under = 0; state = sINFIN; break; } goto endgame; case sINFIN: if (c == 'i' || c == 'I') { + under = 0; state = sINFINI; break; } goto endgame; case sINFINI: if (c == 't' || c == 'T') { + under = 0; state = sINFINIT; break; } goto endgame; case sINFINIT: if (c == 'y' || c == 'Y') { + under = 0; state = sINFINITY; break; } @@ -1080,12 +1149,14 @@ TclParseNumber( #ifdef IEEE_FLOATING_POINT case sN: if (c == 'a' || c == 'A') { + under = 0; state = sNA; break; } goto endgame; case sNA: if (c == 'n' || c == 'N') { + under = 0; state = sNAN; break; } @@ -1095,6 +1166,7 @@ TclParseNumber( acceptPoint = p; acceptLen = len; if (c == '(') { + under = 0; state = sNANPAREN; break; } @@ -1105,12 +1177,14 @@ TclParseNumber( */ case sNANHEX: if (c == ')') { + under = 0; state = sNANFINISH; break; } /* FALLTHROUGH */ case sNANPAREN: if (TclIsSpaceProc(c)) { + under = 0; break; } if (numSigDigs < 13) { @@ -1125,6 +1199,7 @@ TclParseNumber( } numSigDigs++; significandWide = (significandWide << 4) + d; + under = 0; state = sNANHEX; break; } @@ -1137,6 +1212,7 @@ TclParseNumber( acceptPoint = p; acceptLen = len; goto endgame; + } p++; len--; @@ -1154,11 +1230,14 @@ TclParseNumber( } } else { /* - * Back up to the last accepting state in the lexer. + * Back up to the last accepting state in the lexer. + * If the last char seen is the numeric whitespace character '_', + * backup to that. */ - p = acceptPoint; - len = acceptLen; + p = under ? acceptPoint-1 : acceptPoint; + len = under ? acceptLen-1 : acceptLen; + if (!(flags & TCL_PARSE_NO_WHITESPACE)) { /* * Accept trailing whitespace. diff --git a/tests/get.test b/tests/get.test index e35b2cc..62a074f 100644 --- a/tests/get.test +++ b/tests/get.test @@ -109,6 +109,12 @@ test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj { set x } } {0.0 0.0 0.0 0.0 0.0 7.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0} +test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { + lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x_a " " 0_07 " " 0o_1_0 " " 0_b1_0 " _33 42_} { + catch {testgetint $x} x + set x + } +} {0 10 2 33 1423324 10 7 8 2 {expected integer but got "_33"} {expected integer but got "42_"}} # cleanup ::tcltest::cleanupTests -- cgit v0.12