Merge 8.7

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-31 16:38:25 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-31 16:38:25 (GMT)
commit: 083fb819467b262e130994651f07ffc336f9660a (patch)
tree: c97fe39aebf3124fecb3082a6f1117008377d7db
parent: 0b3e25d9acd83e8b4342f390aff821538e74e6f6 (diff)
parent: ea25a2d32d5f1c090e0f6a0a02950ed621be8efe (diff)
download: tcl-083fb819467b262e130994651f07ffc336f9660a.zip
tcl-083fb819467b262e130994651f07ffc336f9660a.tar.gz
tcl-083fb819467b262e130994651f07ffc336f9660a.tar.bz2
7 files changed, 153 insertions, 27 deletions
diff --git a/doc/expr.n b/doc/expr.n
index 04f0cef..1498ba1 100644
--- a/doc/expr.n
+++ b/doc/expr.n
@@ -17,7 +17,7 @@ expr \- Evaluate an expression
 .BE
 .SH DESCRIPTION
 .PP
-Concatenates \fIarg\fRs, separated by a space, into an expression, and evaluates
+The \fIexpr\fR command concatenates \fIarg\fRs, separated by a space, into an expression, and evaluates
 that expression, returning its value.
 The operators permitted in an expression include a subset of
 the operators permitted in C expressions.  For those operators
@@ -46,22 +46,6 @@ value is the form produced by the \fB%g\fR format specifier of Tcl's
 An expression consists of a combination of operands, operators, parentheses and
 commas, possibly with whitespace between any of these elements, which is
 ignored.
-An integer operand may be specified in decimal (the normal case, the optional
-first two characters are \fB0d\fR), binary
-(the first two characters are \fB0b\fR), octal
-(the first two characters are \fB0o\fR), or hexadecimal
-(the first two characters are \fB0x\fR) form.  For
-compatibility with older Tcl releases, an operand that begins with \fB0\fR is
-interpreted as an octal integer even if the second character is not \fBo\fR.
-A floating-point number may be specified in any of several
-common decimal formats, and may use the decimal point \fB.\fR,
-\fBe\fR or \fBE\fR for scientific notation, and
-the sign characters \fB+\fR and \fB\-\fR.  The
-following are all valid floating-point numbers:  2.1, 3., 6e4, 7.91e+16.
-The strings \fBInf\fR
-and \fBNaN\fR, in any combination of case, are also recognized as floating point
-values.  An operand that doesn't have a numeric interpretation must be quoted
-with either braces or with double quotes.
 .PP
 An operand may be specified in any of the following ways:
 .IP [1]
@@ -103,6 +87,49 @@ produces the value on the right side.
 \fBexpr\fR 4*[llength "6 2"]	\fI8\fR
 \fBexpr\fR {{word one} < "word $a"}	\fI0\fR
 .CE
+.PP
+\fBInteger value\fR
+.PP
+An integer operand may be specified in decimal (the normal case, the optional
+first two characters are \fB0d\fR), binary
+(the first two characters are \fB0b\fR), octal
+(the first two characters are \fB0o\fR), or hexadecimal
+(the first two characters are \fB0x\fR) form.  For
+compatibility with older Tcl releases, an operand that begins with \fB0\fR is
+interpreted as an octal integer even if the second character is not \fBo\fR.
+.PP
+\fBFloating-point value\fR
+.PP
+A floating-point number may be specified in any of several
+common decimal formats, and may use the decimal point \fB.\fR,
+\fBe\fR or \fBE\fR for scientific notation, and
+the sign characters \fB+\fR and \fB\-\fR.  The
+following are all valid floating-point numbers:  2.1, 3., 6e4, 7.91e+16.
+The strings \fBInf\fR
+and \fBNaN\fR, in any combination of case, are also recognized as floating point
+values.  An operand that doesn't have a numeric interpretation must be quoted
+with either braces or with double quotes.
+.PP
+\fBBoolean value\fR
+.PP
+A boolean value may be represented by any of the values \fB0\fR, \fBfalse\fR, \fBno\fR,
+or \fBoff\fR and any of the values \fB1\fR, \fBtrue\fR, \fByes\fR, or \fBon\fR.
+.PP
+\fBDigit Separator\fR
+.PP
+Digits in any numeric value may be separated with one or more underscore
+characters, "\fB_\fR", to improve readability.  These separators may only
+appear between digits.  The separator may not appear at the start of a
+numeric value, between the leading 0 and radix specifier, or at the
+end of a numeric value.  Here are some examples:
+.PP
+.CS
+.ta 9c
+\fBexpr\fR 100_000_000		\fI100000000\fR
+\fBexpr\fR 0xffff_ffff		\fI4294967295\fR
+\fBformat\fR 0x%x 0b1111_1110_1101_1011		\fI0xfedb\fR
+.CE
+.PP
 .SS OPERATORS
 .PP
 For operators having both a numeric mode and a string mode, the numeric mode is
@@ -474,7 +501,7 @@ set randNum [\fBexpr\fR { int(100 * rand()) }]
 array(n), for(n), if(n), mathfunc(n), mathop(n), namespace(n), proc(n),
 string(n), Tcl(n), while(n)
 .SH KEYWORDS
-arithmetic, boolean, compare, expression, fuzzy comparison
+arithmetic, boolean, compare, expression, fuzzy comparison, integer value
 .SH COPYRIGHT
 .nf
 Copyright \(co 1993 The Regents of the University of California.
diff --git a/doc/string.n b/doc/string.n
index 44d621d..7cd53ca 100644
--- a/doc/string.n
+++ b/doc/string.n
@@ -505,7 +505,7 @@ if {$length == 0} {
 .SH "SEE ALSO"
 expr(n), list(n)
 .SH KEYWORDS
-case conversion, compare, index, match, pattern, string, word, equal,
+case conversion, compare, index, integer value, match, pattern, string, word, equal,
 ctype, character, reverse
 .\" Local Variables:
 .\" mode: nroff
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 060ecf3..afb431c 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2713,6 +2713,8 @@ typedef struct ProcessGlobalValue {
 				/* Reject leading/trailing whitespace. */
 #define TCL_PARSE_BINARY_ONLY	64
 				/* Parse binary even without prefix. */
+#define TCL_PARSE_NO_UNDERSCORE	128
+				/* Reject underscore digit separator */
 
 /*
  *----------------------------------------------------------------------
diff --git a/generic/tclScan.c b/generic/tclScan.c
index dc98f54..4d86382 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -896,7 +896,7 @@ Tcl_ScanObjCmd(
 		width = ~0;
 	    }
 	    if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
-		    &end, TCL_PARSE_INTEGER_ONLY | parseFlag)) {
+		&end, TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_UNDERSCORE | parseFlag)) {
 		Tcl_DecrRefCount(objPtr);
 		if (width < 0) {
 		    if (*end == '\0') {
@@ -998,7 +998,7 @@ Tcl_ScanObjCmd(
 		width = ~0;
 	    }
 	    if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
-		    &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE)) {
+		    &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE | TCL_PARSE_NO_UNDERSCORE)) {
 		Tcl_DecrRefCount(objPtr);
 		if (width < 0) {
 		    if (*end == '\0') {
diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c
index 7ef2c60..85b660d 100644
--- a/generic/tclStrToD.c
+++ b/generic/tclStrToD.c
@@ -535,7 +535,9 @@ TclParseNumber(
     int shift = 0;		/* Amount to shift when accumulating binary */
     int explicitOctal = 0;
     mp_err err = MP_OKAY;
-
+    int under = 0;              /* Flag trailing '_' as error if true once
+				 * number is accepted. */
+    
 #define ALL_BITS	((Tcl_WideUInt)-1)
 #define MOST_BITS	(ALL_BITS >> 1)
 
@@ -643,7 +645,7 @@ TclParseNumber(
 	    acceptPoint = p;
 	    acceptLen = len;
 	    if (c == 'x' || c == 'X') {
-		if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY)) {
+		if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) || under) {
 		    goto endgame;
 		}
 		state = ZERO_X;
@@ -656,7 +658,7 @@ TclParseNumber(
 		goto zeroo;
 	    }
 	    if (c == 'b' || c == 'B') {
-		if (flags & TCL_PARSE_OCTAL_ONLY) {
+		if ((flags & TCL_PARSE_OCTAL_ONLY) || under) {
 		    goto endgame;
 		}
 		state = ZERO_B;
@@ -666,11 +668,17 @@ TclParseNumber(
 		goto zerob;
 	    }
 	    if (c == 'o' || c == 'O') {
+		if (under) {
+		    goto endgame;
+		}
 		explicitOctal = 1;
 		state = ZERO_O;
 		break;
 	    }
 	    if (c == 'd' || c == 'D') {
+		if (under) {
+		    goto endgame;
+		}
 		state = ZERO_D;
 		break;
 	    }
@@ -694,9 +702,11 @@ TclParseNumber(
 	zeroo:
 	    if (c == '0') {
 		numTrailZeros++;
+		under = 0;
 		state = OCTAL;
 		break;
 	    } else if (c >= '1' && c <= '7') {
+		under = 0;
 		if (objPtr != NULL) {
 		    shift = 3 * (numTrailZeros + 1);
 		    significandOverflow = AccumulateDecimalDigit(
@@ -746,6 +756,10 @@ TclParseNumber(
 		numTrailZeros = 0;
 		state = OCTAL;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    }
 	    /* FALLTHROUGH */
 
@@ -774,6 +788,7 @@ TclParseNumber(
 
 	    if (c == '0') {
 		numTrailZeros++;
+		under = 0;
 		state = BAD_OCTAL;
 		break;
 	    } else if (isdigit(UCHAR(c))) {
@@ -789,12 +804,15 @@ TclParseNumber(
 		    numSigDigs = 1;
 		}
 		numTrailZeros = 0;
+		under = 0;
 		state = BAD_OCTAL;
 		break;
 	    } else if (c == '.') {
+		under = 0;
 		state = FRACTION;
 		break;
 	    } else if (c == 'E' || c == 'e') {
+		under = 0;
 		state = EXPONENT_START;
 		break;
 	    }
@@ -817,14 +835,22 @@ TclParseNumber(
 	zerox:
 	    if (c == '0') {
 		numTrailZeros++;
+		under = 0;
 		state = HEXADECIMAL;
 		break;
 	    } else if (isdigit(UCHAR(c))) {
+		under = 0;
 		d = (c-'0');
 	    } else if (c >= 'A' && c <= 'F') {
+		under = 0;
 		d = (c-'A'+10);
 	    } else if (c >= 'a' && c <= 'f') {
+		under = 0;
 		d = (c-'a'+10);
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    } else {
 		goto endgame;
 	    }
@@ -870,8 +896,13 @@ TclParseNumber(
 	zerob:
 	    if (c == '0') {
 		numTrailZeros++;
+		under = 0;
 		state = BINARY;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    } else if (c != '1') {
 		goto endgame;
 	    }
@@ -910,10 +941,17 @@ TclParseNumber(
 
 	case ZERO_D:
 	    if (c == '0') {
+		under = 0;
 		numTrailZeros++;
 	    } else if ( ! isdigit(UCHAR(c))) {
+                if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                    /* Ignore numeric "white space" */
+                    under = 1;
+                    break;
+                }
 		goto endgame;
 	    }
+	    under = 0;
 	    state = DECIMAL;
 	    flags |= TCL_PARSE_INTEGER_ONLY;
 	    /* FALLTHROUGH */
@@ -932,6 +970,7 @@ TclParseNumber(
 	    acceptLen = len;
 	    if (c == '0') {
 		numTrailZeros++;
+		under = 0;
 		state = DECIMAL;
 		break;
 	    } else if (isdigit(UCHAR(c))) {
@@ -943,14 +982,21 @@ TclParseNumber(
 		}
 		numSigDigs += numTrailZeros+1;
 		numTrailZeros = 0;
+		under = 0;
 		state = DECIMAL;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    } else if (flags & TCL_PARSE_INTEGER_ONLY) {
 		goto endgame;
 	    } else if (c == '.') {
+		under = 0;
 		state = FRACTION;
 		break;
 	    } else if (c == 'E' || c == 'e') {
+		under = 0;
 		state = EXPONENT_START;
 		break;
 	    }
@@ -976,6 +1022,7 @@ TclParseNumber(
 	    if (c == '0') {
 		numDigitsAfterDp++;
 		numTrailZeros++;
+		under = 0;
 		state = FRACTION;
 		break;
 	    } else if (isdigit(UCHAR(c))) {
@@ -992,8 +1039,13 @@ TclParseNumber(
 		    numSigDigs = 1;
 		}
 		numTrailZeros = 0;
+		under = 0;
 		state = FRACTION;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    }
 	    goto endgame;
 
@@ -1005,10 +1057,12 @@ TclParseNumber(
 	     */
 
 	    if (c == '+') {
+		under = 0;
 		state = EXPONENT_SIGNUM;
 		break;
 	    } else if (c == '-') {
 		exponentSignum = 1;
+		under = 0;
 		state = EXPONENT_SIGNUM;
 		break;
 	    }
@@ -1022,8 +1076,13 @@ TclParseNumber(
 
 	    if (isdigit(UCHAR(c))) {
 		exponent = c - '0';
+		under = 0;
 		state = EXPONENT;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    }
 	    goto endgame;
 
@@ -1042,8 +1101,13 @@ TclParseNumber(
 		} else {
 		    exponent = LONG_MAX;
 		}
+		under = 0;
 		state = EXPONENT;
 		break;
+            } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) {
+                /* Ignore numeric "white space" */
+                under = 1;
+                break;
 	    }
 	    goto endgame;
 
@@ -1054,12 +1118,14 @@ TclParseNumber(
 
 	case sI:
 	    if (c == 'n' || c == 'N') {
+		under = 0;
 		state = sIN;
 		break;
 	    }
 	    goto endgame;
 	case sIN:
 	    if (c == 'f' || c == 'F') {
+		under = 0;
 		state = sINF;
 		break;
 	    }
@@ -1068,6 +1134,7 @@ TclParseNumber(
 	    acceptState = state;
 	    acceptPoint = p;
 	    acceptLen = len;
+            under = 0;
 	    if (c == 'i' || c == 'I') {
 		state = sINFI;
 		break;
@@ -1075,24 +1142,28 @@ TclParseNumber(
 	    goto endgame;
 	case sINFI:
 	    if (c == 'n' || c == 'N') {
+		under = 0;
 		state = sINFIN;
 		break;
 	    }
 	    goto endgame;
 	case sINFIN:
 	    if (c == 'i' || c == 'I') {
+		under = 0;
 		state = sINFINI;
 		break;
 	    }
 	    goto endgame;
 	case sINFINI:
 	    if (c == 't' || c == 'T') {
+		under = 0;
 		state = sINFINIT;
 		break;
 	    }
 	    goto endgame;
 	case sINFINIT:
 	    if (c == 'y' || c == 'Y') {
+		under = 0;
 		state = sINFINITY;
 		break;
 	    }
@@ -1104,12 +1175,14 @@ TclParseNumber(
 #ifdef IEEE_FLOATING_POINT
 	case sN:
 	    if (c == 'a' || c == 'A') {
+		under = 0;
 		state = sNA;
 		break;
 	    }
 	    goto endgame;
 	case sNA:
 	    if (c == 'n' || c == 'N') {
+		under = 0;
 		state = sNAN;
 		break;
 	    }
@@ -1119,6 +1192,7 @@ TclParseNumber(
 	    acceptPoint = p;
 	    acceptLen = len;
 	    if (c == '(') {
+		under = 0;
 		state = sNANPAREN;
 		break;
 	    }
@@ -1129,12 +1203,14 @@ TclParseNumber(
 	     */
 	case sNANHEX:
 	    if (c == ')') {
+		under = 0;
 		state = sNANFINISH;
 		break;
 	    }
 	    /* FALLTHROUGH */
 	case sNANPAREN:
 	    if (TclIsSpaceProcM(c)) {
+		under = 0;
 		break;
 	    }
 	    if (numSigDigs < 13) {
@@ -1149,6 +1225,7 @@ TclParseNumber(
 		}
 		numSigDigs++;
 		significandWide = (significandWide << 4) + d;
+		under = 0;
 		state = sNANHEX;
 		break;
 	    }
@@ -1161,6 +1238,7 @@ TclParseNumber(
 	    acceptPoint = p;
 	    acceptLen = len;
 	    goto endgame;
+
 	}
 	p++;
 	len--;
@@ -1178,11 +1256,14 @@ TclParseNumber(
 	}
     } else {
 	/*
-	 * Back up to the last accepting state in the lexer.
+	 * Back up to the last accepting state in the lexer.  
+	 * If the last char seen is the numeric whitespace character '_',
+	 * backup to that.
 	 */
 
-	p = acceptPoint;
-	len = acceptLen;
+	p = under ? acceptPoint-1 : acceptPoint;
+	len = under ? acceptLen-1 : acceptLen;
+
 	if (!(flags & TCL_PARSE_NO_WHITESPACE)) {
 	    /*
 	     * Accept trailing whitespace.
diff --git a/tests/get.test b/tests/get.test
index e35b2cc..7ab189c 100644
--- a/tests/get.test
+++ b/tests/get.test
@@ -109,6 +109,12 @@ test get-3.4 {Tcl_GetDouble with iffy numbers} testdoubleobj {
 	set x
     }
 } {0.0 0.0 0.0 0.0 0.0 7.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0}
+test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint {
+    lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x_a " " 0_07 " " 0o_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 } {
+	catch {testgetint $x} x
+	set x
+    }
+} {0 10 2 33 1423324 10 7 8 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}}
 
 # cleanup
 ::tcltest::cleanupTests
diff --git a/tests/scan.test b/tests/scan.test
index b488f68..eaeaa49 100644
--- a/tests/scan.test
+++ b/tests/scan.test
@@ -555,6 +555,11 @@ test scan-5.19 {bigint scanning invalid} -setup {
     list [scan "207698809136909011942886895" \
            %llu a] $a
 } -result {1 207698809136909011942886895}
+test scan-5.20 {ignore digit separators} -setup {
+    set a {}; set b {}; set c {};
+} -body {
+    list [scan "10_23_45" %d_%d_%d a b c] $a $b $c
+} -result {3 10 23 45}
 
 test scan-6.1 {floating-point scanning} -setup {
     set a {}; set b {}; set c {}; set d {}
@@ -600,6 +605,11 @@ test scan-6.8 {floating-point scanning} -setup {
 } -body {
     list [scan "4.6 5.2" "%f %f %f %f" a b c d] $a $b $c $d
 } -result {2 4.6 5.2 {} {}}
+test scan-6.8 {disallow diget separator in floating-point} -setup {
+    set a {}; set b {}; set c {};
+} -body {
+    list [scan "3.14_2.35_98.6" %f_%f_%f a b c ] $a $b $c
+} -result {3 3.14 2.35 98.6}
 
 test scan-7.1 {string and character scanning} -setup {
     set a {}; set b {}; set c {}; set d {}
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-31 16:38:25 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-31 16:38:25 (GMT)
commit	083fb819467b262e130994651f07ffc336f9660a (patch)
tree	c97fe39aebf3124fecb3082a6f1117008377d7db
parent	0b3e25d9acd83e8b4342f390aff821538e74e6f6 (diff)
parent	ea25a2d32d5f1c090e0f6a0a02950ed621be8efe (diff)
download	tcl-083fb819467b262e130994651f07ffc336f9660a.zip tcl-083fb819467b262e130994651f07ffc336f9660a.tar.gz tcl-083fb819467b262e130994651f07ffc336f9660a.tar.bz2