TIP #697: 32-bit truncation in format and scan (let's gain some time)

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2024-06-19 10:32:39 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2024-06-19 10:32:39 (GMT)
commit: c7e60575dde5c9b7ca92fb83bbf61319b17e506c (patch)
tree: 5e90c99c08a69c6b4b862eaa3b455014b4183147
parent: ba8a9c512f733da5f0db3055c0a24590c542b9ea (diff)
parent: 96dd069f9fc0b4e987db3a0ee7dde49edd93d3af (diff)
download: tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.zip
tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.tar.gz
tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.tar.bz2
10 files changed, 51 insertions, 92 deletions
diff --git a/doc/format.n b/doc/format.n
index 59774fc..b1e204a 100644
--- a/doc/format.n
+++ b/doc/format.n
@@ -141,8 +141,7 @@ element of the \fBtcl_platform\fR array.
 If it is \fBL\fR it specifies that an integer or double value is taken
 without truncation for conversion to a formatted substring.
 If neither of those are present, the integer value is
-truncated to the range determined by the value of the
-\fBwordSize\fR element of the \fBtcl_platform\fR array).
+truncated to a 32-bit range.
 .SS "MANDATORY CONVERSION TYPE"
 .PP
 The last thing in a conversion specifier is an alphabetic character
diff --git a/doc/scan.n b/doc/scan.n
index e6e1ad1..9178a43 100644
--- a/doc/scan.n
+++ b/doc/scan.n
@@ -73,17 +73,18 @@ The size modifier field is used only when scanning a substring into
 one of Tcl's integer values.  The size modifier field dictates the
 integer range acceptable to be stored in a variable, or, for the inline
 case, in a position in the result list.
-The syntactically valid values for the size modifier are \fBh\fR, \fBL\fR,
-\fBl\fR, \fBz\fR, \fBt\fR, and \fBll\fR.  The \fBh\fR size
-modifier value is equivalent
-to the absence of a size modifier in the the conversion specifier.
-Either one indicates the integer range to be stored is limited to the range
-determined by the value of the \fBwordSize\fR element of the \fBtcl_platform\fR
-array).  The \fBL\fR, \fBq\fR or \fBj\fR size modifiers are equivalent to the
-\fBl\fR size modifier. Either of them indicates the integer range to be stored is
-limited to the same range produced by the \fBwide()\fR function of
-the \fBexpr\fR command.  The \fBll\fR size modifier indicates that
-the integer range to be stored is unlimited.
+The syntactically valid values for the size modifier are \fBh\fR,
+\fBl\fR, \fBz\fR, \fBt\fR, \fBq\fR, \fBj\fR, \fBll\fR, and \fBL\fR.
+The \fBh\fR size modifier value is equivalent to the absence of a size
+modifier in the the conversion specifier.  Either one indicates the
+integer range to be stored is limited to the 32-bit range.  The \fBL\fR
+size modifier is equivalent to the \fBll\fR size modifier.  Either one
+indicates the integer range to be stored is unlimited.  The \fBl\fR (or
+\fBq\fR or \fBj\fR) size modifier indicates that the integer range to be
+stored is limited to the same range produced by the \fBwide()\fR function
+of the \fBexpr\fR command. The \fBz\fR and \fBt\fR modifiers indicate the
+integer range to be the same as for either \fBh\fR or \fBl\fR, depending
+on the value of the \fBpointerSize\fR element of the \fBtcl_platform\fR array.
 .SS "MANDATORY CONVERSION CHARACTER"
 .PP
 The following conversion characters are supported:
@@ -248,8 +249,6 @@ An interactive session demonstrating the truncation of integer
 values determined by size modifiers:
 .PP
 .CS
-\fI%\fR set tcl_platform(wordSize)
-4
 \fI%\fR scan 20000000000000000000 %d
 2147483647
 \fI%\fR scan 20000000000000000000 %ld
diff --git a/generic/tclArithSeries.c b/generic/tclArithSeries.c
index 7618415..7fe0f3a 100755
--- a/generic/tclArithSeries.c
+++ b/generic/tclArithSeries.c
@@ -512,7 +512,7 @@ assignNumber(
     void *clientData;
     int tcl_number_type;
 
-    if (Tcl_GetNumberFromObj(interp, numberObj, &clientData, 
+    if (Tcl_GetNumberFromObj(interp, numberObj, &clientData,
 		&tcl_number_type) != TCL_OK) {
     	return TCL_ERROR;
     }
diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c
index a9bcf0c..bdf486a 100644
--- a/generic/tclEnsemble.c
+++ b/generic/tclEnsemble.c
@@ -277,7 +277,7 @@ TclNamespaceEnsembleCmd(
  *
  * Note:
  *	Can't use SetEnsembleConfigOptions() here. Different (but overlapping)
- *	options are supported. 
+ *	options are supported.
  *
  *----------------------------------------------------------------------
  */
@@ -886,7 +886,7 @@ Tcl_CreateEnsemble(
  */
 static inline EnsembleConfig *
 GetEnsembleFromCommand(
-    Tcl_Interp *interp,		/* Where to report an error. May be NULL. */	
+    Tcl_Interp *interp,		/* Where to report an error. May be NULL. */
     Tcl_Command token)		/* What to check for ensemble-ness. */
 {
     Command *cmdPtr = (Command *) token;
diff --git a/generic/tclScan.c b/generic/tclScan.c
index c143efa..48d2bcc 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -385,6 +385,10 @@ ValidateFormat(
 	    }
 	    format += TclUtfToUniChar(format, &ch);
 	    break;
+	case 'L':
+	    flags |= SCAN_BIG;
+	    format += TclUtfToUniChar(format, &ch);
+	    break;
 	case 'l':
 	    if (*format == 'l') {
 		flags |= SCAN_BIG;
@@ -393,7 +397,6 @@ ValidateFormat(
 		break;
 	    }
 	    /* FALLTHRU */
-	case 'L':
 	case 'j':
 	case 'q':
 	    flags |= SCAN_LONGER;
@@ -601,7 +604,7 @@ Tcl_ScanObjCmd(
     const char *format;
     int numVars, nconversions, totalVars = -1;
     int objIndex, offset, i, result, code;
-    long value;
+    int value;
     const char *string, *end, *baseString;
     char op = 0;
     int underflow = 0;
@@ -994,11 +997,11 @@ Tcl_ScanObjCmd(
 		    }
 		}
 	    } else {
-		if (TclGetLongFromObj(NULL, objPtr, &value) != TCL_OK) {
+		if (TclGetIntFromObj(NULL, objPtr, &value) != TCL_OK) {
 		    if (TclGetString(objPtr)[0] == '-') {
-			value = LONG_MIN;
+			value = INT_MIN;
 		    } else {
-			value = LONG_MAX;
+			value = INT_MAX;
 		    }
 		}
 		if ((flags & SCAN_UNSIGNED) && (value < 0)) {
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 8b6b719..75b4fdd 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1875,9 +1875,7 @@ Tcl_AppendFormatToObj(
 	int gotMinus = 0, gotHash = 0, gotZero = 0, gotSpace = 0, gotPlus = 0;
 	int gotPrecision, sawFlag, useShort = 0, useBig = 0;
 	Tcl_WideInt width, precision;
-#ifndef TCL_WIDE_INT_IS_LONG
 	int useWide = 0;
-#endif
 	int newXpg, allocSegment = 0;
 	Tcl_Size numChars, segmentLimit, segmentNumBytes;
 	Tcl_Obj *segment;
@@ -2082,18 +2080,14 @@ Tcl_AppendFormatToObj(
 		useBig = 1;
 		format += step;
 		step = TclUtfToUniChar(format, &ch);
-#ifndef TCL_WIDE_INT_IS_LONG
 	    } else {
 		useWide = 1;
-#endif
 	    }
 	} else if (ch == 'I') {
 	    if ((format[1] == '6') && (format[2] == '4')) {
 		format += (step + 2);
 		step = TclUtfToUniChar(format, &ch);
-#ifndef TCL_WIDE_INT_IS_LONG
 		useWide = 1;
-#endif
 	    } else if ((format[1] == '3') && (format[2] == '2')) {
 		format += (step + 2);
 		step = TclUtfToUniChar(format, &ch);
@@ -2104,17 +2098,13 @@ Tcl_AppendFormatToObj(
 	} else if ((ch == 'q') || (ch == 'j')) {
 	    format += step;
 	    step = TclUtfToUniChar(format, &ch);
-#ifndef TCL_WIDE_INT_IS_LONG
 	    useWide = 1;
-#endif
 	} else if ((ch == 't') || (ch == 'z')) {
 	    format += step;
 	    step = TclUtfToUniChar(format, &ch);
-#ifndef TCL_WIDE_INT_IS_LONG
 	    if (sizeof(void *) > sizeof(int)) {
 		useWide = 1;
 	    }
-#endif
 	} else if (ch == 'L') {
 	    format += step;
 	    step = TclUtfToUniChar(format, &ch);
@@ -2180,17 +2170,15 @@ Tcl_AppendFormatToObj(
 	case 'b': {
 	    short s = 0;	/* Silence compiler warning; only defined and
 				 * used when useShort is true. */
-	    long l;
+	    int l;
 	    Tcl_WideInt w;
 	    mp_int big;
 	    int isNegative = 0;
 	    Tcl_Size toAppend;
 
-#ifndef TCL_WIDE_INT_IS_LONG
 	    if ((ch == 'p') && (sizeof(void *) > sizeof(int))) {
 		useWide = 1;
 	    }
-#endif
 	    if (useBig) {
 		int cmpResult;
 		if (Tcl_GetBignumFromObj(interp, segment, &big) != TCL_OK) {
@@ -2211,7 +2199,6 @@ Tcl_AppendFormatToObj(
 			ch = 'd';
 		    }
 		}
-#ifndef TCL_WIDE_INT_IS_LONG
 	    } else if (useWide) {
 		if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) {
 		    goto error;
@@ -2220,12 +2207,11 @@ Tcl_AppendFormatToObj(
 		if (w == (Tcl_WideInt) 0) {
 		    gotHash = 0;
 		}
-#endif
-	    } else if (TclGetLongFromObj(NULL, segment, &l) != TCL_OK) {
+	    } else if (TclGetIntFromObj(NULL, segment, &l) != TCL_OK) {
 		if (TclGetWideBitsFromObj(interp, segment, &w) != TCL_OK) {
 		    goto error;
 		} else {
-		    l = (long) w;
+		    l = (int) w;
 		}
 		if (useShort) {
 		    s = (short) l;
@@ -2234,8 +2220,8 @@ Tcl_AppendFormatToObj(
 			gotHash = 0;
 		    }
 		} else {
-		    isNegative = (l < (long) 0);
-		    if (l == (long) 0) {
+		    isNegative = (l < (int) 0);
+		    if (l == (int) 0) {
 			gotHash = 0;
 		    }
 		}
@@ -2246,8 +2232,8 @@ Tcl_AppendFormatToObj(
 		    gotHash = 0;
 		}
 	    } else {
-		isNegative = (l < (long) 0);
-		if (l == (long) 0) {
+		isNegative = (l < (int) 0);
+		if (l == (int) 0) {
 		    gotHash = 0;
 		}
 	    }
@@ -2294,10 +2280,8 @@ Tcl_AppendFormatToObj(
 
 		if (useShort) {
 		    TclNewIntObj(pure, s);
-#ifndef TCL_WIDE_INT_IS_LONG
 		} else if (useWide) {
 		    TclNewIntObj(pure, w);
-#endif
 		} else if (useBig) {
 		    pure = Tcl_NewBignumObj(&big);
 		} else {
@@ -2382,7 +2366,6 @@ Tcl_AppendFormatToObj(
 			numDigits++;
 			us /= base;
 		    }
-#ifndef TCL_WIDE_INT_IS_LONG
 		} else if (useWide) {
 		    Tcl_WideUInt uw = (Tcl_WideUInt) w;
 
@@ -2391,7 +2374,6 @@ Tcl_AppendFormatToObj(
 			numDigits++;
 			uw /= base;
 		    }
-#endif
 		} else if (useBig && !mp_iszero(&big)) {
 		    int leftover = (big.used * MP_DIGIT_BIT) % numBits;
 		    mp_digit mask = (~(mp_digit)0) << (MP_DIGIT_BIT-leftover);
@@ -2408,7 +2390,7 @@ Tcl_AppendFormatToObj(
 			goto errorMsg;
 		    }
 		} else if (!useBig) {
-		    unsigned long ul = (unsigned long) l;
+		    unsigned ul = (unsigned) l;
 
 		    bits = (Tcl_WideUInt) ul;
 		    while (ul) {
diff --git a/tests/expr.test b/tests/expr.test
index f2c7ae6..bfefde3 100644
--- a/tests/expr.test
+++ b/tests/expr.test
@@ -5842,7 +5842,7 @@ test expr-33.1 {parse largest long value} {
         [expr {int(2147483647 + 1) > 0}] \
 
 } {2147483647 2147483647 2147483647 2147483647 1 1}
-test expr-33.2 {parse smallest long value} longIs32bit {
+test expr-33.2 {parse smallest long value} {
     set min_long_str -2147483648
     set min_long_hex "-0x80000000 "
 
diff --git a/tests/format.test b/tests/format.test
index 5af6c19..39de907 100644
--- a/tests/format.test
+++ b/tests/format.test
@@ -15,10 +15,7 @@ if {"::tcltest" ni [namespace children]} {
     namespace import -force ::tcltest::*
 }
 
-# %u output depends on word length, so this test is not portable.
-testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}]
-testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}]
-testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}]
+# %z/%t/%p output depends on pointerSize, so some tests are not portable.
 testConstraint pointerIs64bit [expr {$tcl_platform(pointerSize) >= 8}]
 # MSVC uses a broken libc that gets sprintf("%g") wrong. This is a pain
 # particularly in Continuous Integration, and there isn't anything much we can
@@ -31,12 +28,9 @@ test format-1.1 {integer formatting} {
 test format-1.2 {integer formatting} {
     format "%4d %4d %4d %4d %d %#x %#X" 6 34 16923 -12 -1 14 12
 } {   6   34 16923  -12 -1 0xe 0xC}
-test format-1.3 {integer formatting} longIs32bit {
+test format-1.3 {integer formatting} {
     format "%4u %4u %4u %4u %d %#o" 6 34 16923 -12 -1 0
 } {   6   34 16923 4294967284 -1 0}
-test format-1.3.1 {integer formatting} longIs64bit {
-    format "%4u %4u %4u %4u %d %#o" 6 34 16923 -12 -1 0
-} {   6   34 16923 18446744073709551604 -1 0}
 test format-1.4 {integer formatting} {
     format "%-4d %-4i %-4d %-4ld" 6 34 16923 -12 -1
 } {6    34   16923 -12 }
@@ -48,36 +42,21 @@ test format-1.6 {integer formatting} {
 } {000034}
 # Printing negative numbers in hex or octal format depends on word
 # length, so these tests are not portable.
-test format-1.7 {integer formatting} longIs32bit {
+test format-1.7 {integer formatting} {
     format "%4x %4x %4x %4x" 6 34 16923 -12 -1
 } {   6   22 421b fffffff4}
-test format-1.7.1 {integer formatting} longIs64bit {
-    format "%4x %4x %4x %4x" 6 34 16923 -12 -1
-} {   6   22 421b fffffffffffffff4}
-test format-1.8 {integer formatting} longIs32bit {
+test format-1.8 {integer formatting} {
     format "%#x %#x %#X %#X %#x" 0 6 34 16923 -12 -1
 } {0 0x6 0x22 0x421B 0xfffffff4}
-test format-1.8.1 {integer formatting} longIs64bit {
-    format "%#x %#x %#X %#X %#x" 0 6 34 16923 -12 -1
-} {0 0x6 0x22 0x421B 0xfffffffffffffff4}
-test format-1.9 {integer formatting} longIs32bit {
+test format-1.9 {integer formatting} {
     format "%#5x %#20x %#20x %#20x %#20x" 0 6 34 16923 -12 -1
 } {    0                  0x6                 0x22               0x421b           0xfffffff4}
-test format-1.9.1 {integer formatting} longIs64bit {
-    format "%#5x %#20x %#20x %#20x %#20x" 0 6 34 16923 -12 -1
-} {    0                  0x6                 0x22               0x421b   0xfffffffffffffff4}
-test format-1.10 {integer formatting} longIs32bit {
+test format-1.10 {integer formatting} {
     format "%-#5x %-#20x %-#20x %-#20x %-#20x" 0 6 34 16923 -12 -1
 } {0     0x6                  0x22                 0x421b               0xfffffff4          }
-test format-1.10.1 {integer formatting} longIs64bit {
-    format "%-#5x %-#20x %-#20x %-#20x %-#20x" 0 6 34 16923 -12 -1
-} {0     0x6                  0x22                 0x421b               0xfffffffffffffff4  }
-test format-1.11 {integer formatting} longIs32bit {
+test format-1.11 {integer formatting} {
     format "%-#5o %-#20o %#-20o %#-20o %#-20o" 0 6 34 16923 -12 -1
 } {0     0o6                  0o42                 0o41033              0o37777777764       }
-test format-1.11.1 {integer formatting} longIs64bit {
-    format "%-#5o %-#20o %#-20o %#-20o %#-20o" 0 6 34 16923 -12 -1
-} {0     0o6                  0o42                 0o41033              0o1777777777777777777764}
 test format-1.12 {integer formatting} {
     format "%b %#b %#b %llb" 5 0 5 [expr {2**100}]
 } {101 0 0b101 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000}
@@ -556,13 +535,13 @@ for {set i 290} {$i < 400} {incr i} {
     append b "x"
 }
 
-test format-17.1 {testing %d with wide} {longIs32bit wideIs64bit} {
+test format-17.1 {testing %d with wide} {
     format %d 7810179016327718216
 } 1819043144
-test format-17.2 {testing %ld with wide} {wideIs64bit} {
+test format-17.2 {testing %ld with wide} {
     format %ld 7810179016327718216
 } 7810179016327718216
-test format-17.3 {testing %ld with non-wide} {wideIs64bit} {
+test format-17.3 {testing %ld with non-wide} {
     format %ld 42
 } 42
 test format-17.4 {testing %l with non-integer} {
@@ -589,7 +568,7 @@ test format-18.1 {do not demote existing numeric values} {
     format %08x $b
     lappend result [expr {$a == $b}]
 } {1 1 1 1}
-test format-18.2 {do not demote existing numeric values} {longIs32bit wideIs64bit} {
+test format-18.2 {do not demote existing numeric values} {
     set a [expr {0xaaaaaaaaaa + 1}]
     set b 0xaaaaaaaaab
     list [format %08x $a] [expr {$a == $b}]
@@ -606,8 +585,7 @@ test format-19.3 {Bug 2830354} {
     string length [format %340f 0]
 } 340
 
-test format-19.4.1 {Bug d498578df4: width overflow should cause limit exceeded} \
--constraints {longIs32bit} -body {
+test format-19.4.1 {Bug d498578df4: width overflow should cause limit exceeded} -body {
     # in case of overflow into negative, it produces width -2 (and limit exceeded),
     # in case of width will be unsigned, it will be outside limit (2GB for 32bit)...
     # and it don't throw an error in case the bug is not fixed (and probably no segfault).
diff --git a/tests/obj.test b/tests/obj.test
index eb85c84..fcd8d89 100644
--- a/tests/obj.test
+++ b/tests/obj.test
@@ -20,7 +20,6 @@ if {"::tcltest" ni [namespace children]} {
 catch [list package require -exact tcl::test [info patchlevel]]
 
 testConstraint testobj [llength [info commands testobj]]
-testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}]
 testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}]
 
 test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} testobj {
@@ -547,11 +546,11 @@ test obj-32.1 {freeing very large object trees} {
     unset x
 } {}
 
-test obj-33.1 {integer overflow on input} {longIs32bit wideIs64bit} {
+test obj-33.1 {integer overflow on input} {wideIs64bit} {
     set x 0x8000; append x 0000
     list [string is integer $x] [expr { wide($x) }]
 } {1 2147483648}
-test obj-33.2 {integer overflow on input} {longIs32bit wideIs64bit} {
+test obj-33.2 {integer overflow on input} {wideIs64bit} {
     set x 0xffff; append x ffff
     list [string is integer $x] [expr { wide($x) }]
 } {1 4294967295}
@@ -559,15 +558,15 @@ test obj-33.3 {integer overflow on input} {
     set x 0x10000; append x 0000
     list [string is integer $x] [expr { wide($x) }]
 } {1 4294967296}
-test obj-33.4 {integer overflow on input} {longIs32bit wideIs64bit} {
+test obj-33.4 {integer overflow on input} {wideIs64bit} {
     set x -0x8000; append x 0000
     list [string is integer $x] [expr { wide($x) }]
 } {1 -2147483648}
-test obj-33.5 {integer overflow on input} {longIs32bit wideIs64bit} {
+test obj-33.5 {integer overflow on input} {wideIs64bit} {
     set x -0x8000; append x 0001
     list [string is integer $x] [expr { wide($x) }]
 } {1 -2147483649}
-test obj-33.6 {integer overflow on input} {longIs32bit wideIs64bit} {
+test obj-33.6 {integer overflow on input} {wideIs64bit} {
     set x -0xffff; append x ffff
     list [string is integer $x] [expr { wide($x) }]
 } {1 -4294967295}
diff --git a/tests/scan.test b/tests/scan.test
index 6d7a9fb..6d91c8d 100644
--- a/tests/scan.test
+++ b/tests/scan.test
@@ -82,7 +82,6 @@ proc testIEEE {} {
 }
 
 testConstraint ieeeFloatingPoint [testIEEE]
-testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}]
 
 test scan-1.1 {BuildCharSet, CharInSet} {
     list [scan foo {%[^o]} x] $x
@@ -517,7 +516,7 @@ test scan-5.11 {integer scanning} -constraints {nonPortable} -setup {
     list [scan "4294967280 4294967280" "%u %d" a b] $a \
 	    [expr {$b == -16 || $b == 0x7fffffff}]
 } -result {2 4294967280 1}
-test scan-5.12 {integer scanning} -constraints {wideIs64bit} -setup {
+test scan-5.12 {integer scanning} -setup {
     set a {}; set b {}; set c {}
 } -body {
     list [scan "7810179016327718216,6c63546f6c6c6548,661432506755433062510" \
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2024-06-19 10:32:39 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2024-06-19 10:32:39 (GMT)
commit	c7e60575dde5c9b7ca92fb83bbf61319b17e506c (patch)
tree	5e90c99c08a69c6b4b862eaa3b455014b4183147
parent	ba8a9c512f733da5f0db3055c0a24590c542b9ea (diff)
parent	96dd069f9fc0b4e987db3a0ee7dde49edd93d3af (diff)
download	tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.zip tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.tar.gz tcl-c7e60575dde5c9b7ca92fb83bbf61319b17e506c.tar.bz2