TIP #502 implementation: Index value reform.

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2018-10-12 18:14:51 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2018-10-12 18:14:51 (GMT)
commit: 1d465c4012d7a83025402787ff824e934a840b3c (patch)
tree: c299962b18ca43166dca3b1e4f1c0deefcb13e39
parent: cc945fabb73c3733169f7441140a7b2f2c57572c (diff)
parent: 084fba1d17eed8d0e2cf704bb963f42c13ef0aed (diff)
download: tcl-1d465c4012d7a83025402787ff824e934a840b3c.zip
tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.gz
tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.bz2
6 files changed, 269 insertions, 100 deletions
diff --git a/ChangeLog.2007 b/ChangeLog.2007
index 5995956..34725e3 100644
--- a/ChangeLog.2007
+++ b/ChangeLog.2007
@@ -1426,7 +1426,7 @@
 	initialization assumptions of the TIP 280 code in CompileWord().
 
 	* generic/tclCompExpr.c:	Suppress the attempt to convert to
-	numeric when pre-compiling a constant expresion indicates an error.
+	numeric when pre-compiling a constant expression indicates an error.
 
 2007-08-22  Miguel Sofer  <msofer@users.sf.net>
 
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index e6e2a2e..f94c094 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -258,7 +258,7 @@ Tcl_RegexpObjCmd(
     stringLength = Tcl_GetCharLength(objPtr);
 
     if (startIndex) {
-	TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
+	TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
 	Tcl_DecrRefCount(startIndex);
 	if (offset < 0) {
 	    offset = 0;
@@ -582,7 +582,7 @@ Tcl_RegsubObjCmd(
     if (startIndex) {
 	int stringLength = Tcl_GetCharLength(objv[1]);
 
-	TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
+	TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
 	Tcl_DecrRefCount(startIndex);
 	if (offset < 0) {
 	    offset = 0;
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index b854b0f..f8835b9 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1759,7 +1759,7 @@ ConvertTreeToTokens(
 
 		/*
 		 * All the Tcl_Tokens allocated and filled belong to
-		 * this subexpresion. The first token is the leading
+		 * this subexpression. The first token is the leading
 		 * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer)
 		 * are its components.
 		 */
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index ef5086d..7355bc1 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -4761,7 +4761,7 @@ TEBCresume(
 
 	if ((TclListObjGetElements(interp, valuePtr, &objc, &objv) == TCL_OK)
 		&& (value2Ptr->typePtr != &tclListType)
-		&& (TclGetIntForIndexM(NULL , value2Ptr, objc-1,
+		&& (TclGetIntForIndexM(NULL, value2Ptr, objc-1,
 			&index) == TCL_OK)) {
 	    TclDecrRefCount(value2Ptr);
 	    tosPtr--;
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index c488a42..97879c7 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -15,6 +15,7 @@
 #include "tclInt.h"
 #include "tclParse.h"
 #include "tclStringTrim.h"
+#include "tommath.h"
 #include <math.h>
 
 /*
@@ -107,9 +108,11 @@ static Tcl_ThreadDataKey precisionKey;
 static void		ClearHash(Tcl_HashTable *tablePtr);
 static void		FreeProcessGlobalValue(ClientData clientData);
 static void		FreeThreadHash(ClientData clientData);
-static int		GetEndOffsetFromObj(Tcl_Obj *objPtr, int endValue,
-			    int *indexPtr);
+static int		GetEndOffsetFromObj(Tcl_Obj *objPtr,
+			    Tcl_WideInt endValue, Tcl_WideInt *indexPtr);
 static Tcl_HashTable *	GetThreadHash(Tcl_ThreadDataKey *keyPtr);
+static int		GetWideForIndex(Tcl_Interp *interp, Tcl_Obj *objPtr,
+			    Tcl_WideInt endValue, Tcl_WideInt *widePtr);
 static int		SetEndOffsetFromAny(Tcl_Interp *interp,
 			    Tcl_Obj *objPtr);
 static int		FindElement(Tcl_Interp *interp, const char *string,
@@ -3654,6 +3657,204 @@ TclFormatInt(
 /*
  *----------------------------------------------------------------------
  *
+ * GetWideForIndex --
+ *
+ *	This function produces a wide integer value corresponding to the
+ *	list index held in *objPtr. The parsing supports all values 
+ *	recognized as any size of integer, and the syntaxes end[-+]$integer
+ *	and $integer[-+]$integer. The argument endValue is used to give
+ *	the meaning of the literal index value "end".
+ *
+ * Results:
+ *	When parsing of *objPtr successfully recognizes an index value,
+ *	TCL_OK is returned, and the wide integer value corresponding to
+ *	the recognized index value is written to *widePtr. When parsing
+ *	fails, TCL_ERROR is returned and error information is written to
+ *	interp, if non-NULL.
+ *
+ * Side effects:
+ *	The type of *objPtr may change.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+GetWideForIndex(
+    Tcl_Interp *interp,         /* Interpreter to use for error reporting. If
+                                 * NULL, then no error message is left after
+                                 * errors. */
+    Tcl_Obj *objPtr,            /* Points to the value to be parsed */
+    Tcl_WideInt endValue,       /* The value to be stored at *widePtr if
+                                 * objPtr holds "end".
+				 * NOTE: this value may be negative. */
+    Tcl_WideInt *widePtr)       /* Location filled in with a wide integer
+                                 * representing an index. */
+{
+    ClientData cd;
+    const char *opPtr;
+    int numType, length, t1 = 0, t2 = 0;
+    int code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType);
+
+    if (code == TCL_OK) {
+	if (numType == TCL_NUMBER_WIDE) {
+	    /* objPtr holds an integer in the signed wide range */
+	    *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd);
+	    return TCL_OK;
+	}
+	if (numType == TCL_NUMBER_BIG) {
+	    /* objPtr holds an integer outside the signed wide range */
+	    /* Truncate to the signed wide range. */
+	    if (mp_isneg((mp_int *)cd)) {
+		*widePtr = LLONG_MIN;
+	    } else {
+		*widePtr = LLONG_MAX;
+	    }
+	    return TCL_OK;
+	}
+	/* Must be a double -> not a valid index */
+	goto parseError;
+    }
+
+    /* objPtr does not hold a number, check the end+/- format... */
+    if (GetEndOffsetFromObj(objPtr, endValue, widePtr) == TCL_OK) {
+	return TCL_OK;
+    }
+
+    /* If we reach here, the string rep of objPtr exists. */
+
+    /*
+     * The valid index syntax does not include any value that is
+     * a list of more than one element. This is necessary so that
+     * lists of index values can be reliably distinguished from any
+     * single index value.
+     */
+
+    /*
+     * Quick scan to see if multi-value list is even possible.
+     * This relies on TclGetString() returning a NUL-terminated string.
+     */
+    if ((TclMaxListLength(TclGetString(objPtr), -1, NULL) > 1)
+
+	    /* If it's possible, do the full list parse. */
+            && (TCL_OK == Tcl_ListObjLength(NULL, objPtr, &length))
+            && (length > 1)) {
+        goto parseError;
+    }
+
+    /* Passed the list screen, so parse for index arithmetic expression */
+    if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, NULL, -1, &opPtr,
+            TCL_PARSE_INTEGER_ONLY)) {
+	Tcl_WideInt w1=0, w2=0;
+
+	/* value starts with valid integer... */
+
+        if ((*opPtr == '-') || (*opPtr == '+')) {
+	    /* ... value continues with [-+] ... */
+
+	    /* Save first integer as wide if possible */
+	    TclGetNumberFromObj(NULL, objPtr, &cd, &t1);
+	    if (t1 == TCL_NUMBER_WIDE) {
+		w1 = (*(Tcl_WideInt *)cd);
+	    }
+
+	    if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, opPtr + 1,
+		    -1, NULL, TCL_PARSE_INTEGER_ONLY)) {
+		/* ... value concludes with second valid integer */
+
+		/* Save second integer as wide if possible */
+		TclGetNumberFromObj(NULL, objPtr, &cd, &t2);
+		if (t2 == TCL_NUMBER_WIDE) {
+		    w2 = (*(Tcl_WideInt *)cd);
+		}
+	    }
+        }
+	/* Clear invalid intreps left by TclParseNumber */
+	TclFreeIntRep(objPtr);
+
+	if (t1 && t2) {
+	    /* We have both integer values */
+	    if ((t1 == TCL_NUMBER_WIDE) && (t2 == TCL_NUMBER_WIDE)) {
+		/* Both are wide, do wide-integer math */
+		if (*opPtr == '-') {
+		    if ((w2 == LLONG_MIN) && (interp != NULL)) {
+			goto extreme;
+		    }
+		    w2 = -w2;
+		}
+
+		if ((w1 ^ w2) < 0) {
+		    /* Different signs, sum cannot overflow */
+		    *widePtr = w1 + w2;
+		} else if (w1 >= 0) {
+		    if (w1 < LLONG_MAX - w2) {
+			*widePtr = w1 + w2;
+		    } else {
+			*widePtr = LLONG_MAX;
+		    }
+		} else {
+		    if (w1 > LLONG_MIN - w2) {
+			*widePtr = w1 + w2;
+		    } else {
+			*widePtr = LLONG_MIN;
+		    }
+		}
+	    } else if (interp == NULL) {
+		/*
+		 * We use an interp to do bignum index calculations.
+		 * If we don't get one, call all indices with bignums errors,
+		 * and rely on callers to handle it.
+		 */
+		return TCL_ERROR;
+	    } else {
+		/*
+		 * At least one is big, do bignum math. Little reason to
+		 * value performance here. Re-use code.  Parse has verified
+		 * objPtr is an expression. Compute it.
+		 */
+
+		Tcl_Obj *sum;
+
+	    extreme:
+		Tcl_ExprObj(interp, objPtr, &sum);
+		TclGetNumberFromObj(NULL, sum, &cd, &numType);
+
+		if (numType == TCL_NUMBER_WIDE) {
+		    /* sum holds an integer in the signed wide range */
+		    *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd);
+		} else {
+		    /* sum holds an integer outside the signed wide range */
+		    /* Truncate to the signed wide range. */
+		    if (mp_isneg((mp_int *)cd)) {
+			*widePtr = LLONG_MIN;
+		    } else {
+			*widePtr = LLONG_MAX;
+		    }
+		}
+		Tcl_DecrRefCount(sum);
+	    }
+	    return TCL_OK;
+	}
+    }
+
+    /* Report a parse error. */
+  parseError:
+    if (interp != NULL) {
+        char * bytes = TclGetString(objPtr);
+        Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+                "bad index \"%s\": must be integer?[+-]integer? or"
+                " end?[+-]integer?", bytes));
+        if (!strncmp(bytes, "end-", 4)) {
+            bytes += 4;
+        }
+        TclCheckBadOctal(interp, bytes);
+        Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
+    }
+    return TCL_ERROR;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * TclGetIntForIndex --
  *
  *	This function returns an integer corresponding to the list index held
@@ -3687,76 +3888,19 @@ TclGetIntForIndex(
     int *indexPtr)		/* Location filled in with an integer
 				 * representing an index. */
 {
-    size_t length;
-    char *opPtr;
-    const char *bytes;
-
-    if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) {
-	return TCL_OK;
-    }
-
-    if (GetEndOffsetFromObj(objPtr, endValue, indexPtr) == TCL_OK) {
-	return TCL_OK;
-    }
-
-    bytes = TclGetString(objPtr);
-    length = objPtr->length;
-
-    /*
-     * Leading whitespace is acceptable in an index.
-     */
-
-    while (length && TclIsSpaceProc(*bytes)) {
-	bytes++;
-	length--;
-    }
-
-    if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr,
-	    TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) {
-	int code, first, second;
-	char savedOp = *opPtr;
+    Tcl_WideInt wide;
 
-	if ((savedOp != '+') && (savedOp != '-')) {
-	    goto parseError;
-	}
-	if (TclIsSpaceProc(opPtr[1])) {
-	    goto parseError;
-	}
-	*opPtr = '\0';
-	code = Tcl_GetInt(interp, bytes, &first);
-	*opPtr = savedOp;
-	if (code == TCL_ERROR) {
-	    goto parseError;
-	}
-	if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) {
-	    goto parseError;
-	}
-	if (savedOp == '+') {
-	    *indexPtr = first + second;
-	} else {
-	    *indexPtr = first - second;
-	}
-	return TCL_OK;
+    if (GetWideForIndex(interp, objPtr, endValue, &wide) == TCL_ERROR) {
+	return TCL_ERROR;
     }
-
-    /*
-     * Report a parse error.
-     */
-
-  parseError:
-    if (interp != NULL) {
-	bytes = TclGetString(objPtr);
-	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
-		"bad index \"%s\": must be integer?[+-]integer? or"
-		" end?[+-]integer?", bytes));
-	if (!strncmp(bytes, "end-", 4)) {
-	    bytes += 4;
-	}
-	TclCheckBadOctal(interp, bytes);
-	Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL);
+    if (wide < INT_MIN) {
+	*indexPtr = INT_MIN;
+    } else if (wide > INT_MAX) {
+	*indexPtr = INT_MAX;
+    } else {
+	*indexPtr = (int) wide;
     }
-
-    return TCL_ERROR;
+    return TCL_OK;
 }
 
 /*
@@ -3779,21 +3923,35 @@ TclGetIntForIndex(
 static int
 GetEndOffsetFromObj(
     Tcl_Obj *objPtr,            /* Pointer to the object to parse */
-    int endValue,               /* The value to be stored at "indexPtr" if
+    Tcl_WideInt endValue,       /* The value to be stored at "indexPtr" if
                                  * "objPtr" holds "end". */
-    int *indexPtr)              /* Location filled in with an integer
+    Tcl_WideInt *widePtr)       /* Location filled in with an integer
                                  * representing an index. */
 {
-    if (SetEndOffsetFromAny(NULL, objPtr) != TCL_OK) {
-	return TCL_ERROR;
+    if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) {
+        Tcl_WideInt offset = objPtr->internalRep.wideValue;
+
+        if ((endValue ^ offset) < 0) {
+            /* Different signs, sum cannot overflow */
+            *widePtr = endValue + offset;
+        } else if (endValue >= 0) {
+            if (endValue < LLONG_MAX - offset) {
+                *widePtr = endValue + offset;
+            } else {
+                *widePtr = LLONG_MAX;
+            }
+	} else {
+            if (endValue > LLONG_MIN - offset) {
+                *widePtr = endValue + offset;
+            } else {
+                *widePtr = LLONG_MIN;
+            }
+	}
+	return TCL_OK;
     }
-
-    /* TODO: Handle overflow cases sensibly */
-    *indexPtr = endValue + (int)objPtr->internalRep.wideValue;
-    return TCL_OK;
+    return TCL_ERROR;
 }
 
-
 /*
  *----------------------------------------------------------------------
  *
@@ -3961,43 +4119,48 @@ TclIndexEncode(
     int after,		/* Value to return for index after end */
     int *indexPtr)	/* Where to write the encoded answer, not NULL */
 {
-    int idx;
+    ClientData cd;
+    Tcl_WideInt wide;
+    int idx, numType, code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType);
 
-    if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &idx)) {
-        /* We parsed a value in the range INT_MIN...INT_MAX */
+    if ((code == TCL_OK) && (numType == TCL_NUMBER_WIDE)) {
+        /* We parsed a value in the range LLONG_MIN...LLONG_MAX */
+	wide = (*(Tcl_WideInt *)cd);
     integerEncode:
-        if (idx < TCL_INDEX_START) {
+        if (wide < TCL_INDEX_START) {
             /* All negative absolute indices are "before the beginning" */
             idx = before;
-        } else if (idx == INT_MAX) {
+        } else if (wide >= INT_MAX) {
             /* This index value is always "after the end" */
             idx = after;
-        }
+        } else {
+	    idx = (int) wide;
+	}
         /* usual case, the absolute index value encodes itself */
-    } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &idx)) {
+    } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &wide)) {
         /*
          * We parsed an end+offset index value.
-         * idx holds the offset value in the range INT_MIN...INT_MAX.
+         * wide holds the offset value in the range LLONG_MIN...LLONG_MAX.
          */
-        if (idx > 0) {
+        if (wide > 0) {
             /*
              * All end+postive or end-negative expressions
              * always indicate "after the end".
              */
             idx = after;
-        } else if (idx < INT_MIN - TCL_INDEX_END) {
+        } else if (wide < INT_MIN - TCL_INDEX_END) {
             /* These indices always indicate "before the beginning */
             idx = before;
         } else {
             /* Encoded end-positive (or end+negative) are offset */
-            idx += TCL_INDEX_END;
+            idx = (int)wide + TCL_INDEX_END;
         }
 
     /* TODO: Consider flag to suppress repeated end-offset parse. */
-    } else if (TCL_OK == TclGetIntForIndexM(interp, objPtr, 0, &idx)) {
+    } else if (TCL_OK == GetWideForIndex(interp, objPtr, 0, &wide)) {
         /*
          * Only reach this case when the index value is a
-         * constant index arithmetic expression, and idx
+         * constant index arithmetic expression, and wide
          * holds the result. Treat it the same as if it were
          * parsed as an absolute integer value.
          */
diff --git a/tests/util.test b/tests/util.test
index 35fc642..34113c0 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -689,13 +689,13 @@ test util-9.31.1 {TclGetIntForIndex} -body {
 } -returnCodes error -match glob -result *
 test util-9.32 {TclGetIntForIndex} -body {
     string index a 0x1FFFFFFFF+0
-} -returnCodes error -match glob -result *
+} -result {}
 test util-9.33 {TclGetIntForIndex} -body {
     string index a 100000000000+0
-} -returnCodes error -match glob -result *
+} -result {}
 test util-9.33.1 {TclGetIntForIndex} -body {
     string index a 0d100000000000+0
-} -returnCodes error -match glob -result *
+} -result {}
 test util-9.34 {TclGetIntForIndex} -body {
     string index a 1.0
 } -returnCodes error -match glob -result *
@@ -728,7 +728,13 @@ test util-9.43 {TclGetIntForIndex} -body {
 } -returnCodes error -match glob -result *
 test util-9.44 {TclGetIntForIndex} -body {
     string index a 0+1000000000000
-} -returnCodes error -match glob -result *
+} -result {}
+test util-9.45 {TclGetIntForIndex} {
+    string index abcd end+2305843009213693950
+} {}
+test util-9.46 {TclGetIntForIndex} {
+    string index abcd end+4294967294
+} {}
 
 test util-10.1 {Tcl_PrintDouble - rounding} {ieeeFloatingPoint} {
     convertDouble 0x0000000000000000
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2018-10-12 18:14:51 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2018-10-12 18:14:51 (GMT)
commit	1d465c4012d7a83025402787ff824e934a840b3c (patch)
tree	c299962b18ca43166dca3b1e4f1c0deefcb13e39
parent	cc945fabb73c3733169f7441140a7b2f2c57572c (diff)
parent	084fba1d17eed8d0e2cf704bb963f42c13ef0aed (diff)
download	tcl-1d465c4012d7a83025402787ff824e934a840b3c.zip tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.gz tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.bz2