diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-10-12 18:14:51 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-10-12 18:14:51 (GMT) |
commit | 1d465c4012d7a83025402787ff824e934a840b3c (patch) | |
tree | c299962b18ca43166dca3b1e4f1c0deefcb13e39 | |
parent | cc945fabb73c3733169f7441140a7b2f2c57572c (diff) | |
parent | 084fba1d17eed8d0e2cf704bb963f42c13ef0aed (diff) | |
download | tcl-1d465c4012d7a83025402787ff824e934a840b3c.zip tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.gz tcl-1d465c4012d7a83025402787ff824e934a840b3c.tar.bz2 |
TIP #502 implementation: Index value reform.
-rw-r--r-- | ChangeLog.2007 | 2 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 4 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 2 | ||||
-rw-r--r-- | generic/tclExecute.c | 2 | ||||
-rw-r--r-- | generic/tclUtil.c | 345 | ||||
-rw-r--r-- | tests/util.test | 14 |
6 files changed, 269 insertions, 100 deletions
diff --git a/ChangeLog.2007 b/ChangeLog.2007 index 5995956..34725e3 100644 --- a/ChangeLog.2007 +++ b/ChangeLog.2007 @@ -1426,7 +1426,7 @@ initialization assumptions of the TIP 280 code in CompileWord(). * generic/tclCompExpr.c: Suppress the attempt to convert to - numeric when pre-compiling a constant expresion indicates an error. + numeric when pre-compiling a constant expression indicates an error. 2007-08-22 Miguel Sofer <msofer@users.sf.net> diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index e6e2a2e..f94c094 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -258,7 +258,7 @@ Tcl_RegexpObjCmd( stringLength = Tcl_GetCharLength(objPtr); if (startIndex) { - TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { offset = 0; @@ -582,7 +582,7 @@ Tcl_RegsubObjCmd( if (startIndex) { int stringLength = Tcl_GetCharLength(objv[1]); - TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { offset = 0; diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index b854b0f..f8835b9 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1759,7 +1759,7 @@ ConvertTreeToTokens( /* * All the Tcl_Tokens allocated and filled belong to - * this subexpresion. The first token is the leading + * this subexpression. The first token is the leading * TCL_TOKEN_SUB_EXPR token, and all the rest (one fewer) * are its components. */ diff --git a/generic/tclExecute.c b/generic/tclExecute.c index ef5086d..7355bc1 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -4761,7 +4761,7 @@ TEBCresume( if ((TclListObjGetElements(interp, valuePtr, &objc, &objv) == TCL_OK) && (value2Ptr->typePtr != &tclListType) - && (TclGetIntForIndexM(NULL , value2Ptr, objc-1, + && (TclGetIntForIndexM(NULL, value2Ptr, objc-1, &index) == TCL_OK)) { TclDecrRefCount(value2Ptr); tosPtr--; diff --git a/generic/tclUtil.c b/generic/tclUtil.c index c488a42..97879c7 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -15,6 +15,7 @@ #include "tclInt.h" #include "tclParse.h" #include "tclStringTrim.h" +#include "tommath.h" #include <math.h> /* @@ -107,9 +108,11 @@ static Tcl_ThreadDataKey precisionKey; static void ClearHash(Tcl_HashTable *tablePtr); static void FreeProcessGlobalValue(ClientData clientData); static void FreeThreadHash(ClientData clientData); -static int GetEndOffsetFromObj(Tcl_Obj *objPtr, int endValue, - int *indexPtr); +static int GetEndOffsetFromObj(Tcl_Obj *objPtr, + Tcl_WideInt endValue, Tcl_WideInt *indexPtr); static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr); +static int GetWideForIndex(Tcl_Interp *interp, Tcl_Obj *objPtr, + Tcl_WideInt endValue, Tcl_WideInt *widePtr); static int SetEndOffsetFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static int FindElement(Tcl_Interp *interp, const char *string, @@ -3654,6 +3657,204 @@ TclFormatInt( /* *---------------------------------------------------------------------- * + * GetWideForIndex -- + * + * This function produces a wide integer value corresponding to the + * list index held in *objPtr. The parsing supports all values + * recognized as any size of integer, and the syntaxes end[-+]$integer + * and $integer[-+]$integer. The argument endValue is used to give + * the meaning of the literal index value "end". + * + * Results: + * When parsing of *objPtr successfully recognizes an index value, + * TCL_OK is returned, and the wide integer value corresponding to + * the recognized index value is written to *widePtr. When parsing + * fails, TCL_ERROR is returned and error information is written to + * interp, if non-NULL. + * + * Side effects: + * The type of *objPtr may change. + * + *---------------------------------------------------------------------- + */ + +static int +GetWideForIndex( + Tcl_Interp *interp, /* Interpreter to use for error reporting. If + * NULL, then no error message is left after + * errors. */ + Tcl_Obj *objPtr, /* Points to the value to be parsed */ + Tcl_WideInt endValue, /* The value to be stored at *widePtr if + * objPtr holds "end". + * NOTE: this value may be negative. */ + Tcl_WideInt *widePtr) /* Location filled in with a wide integer + * representing an index. */ +{ + ClientData cd; + const char *opPtr; + int numType, length, t1 = 0, t2 = 0; + int code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType); + + if (code == TCL_OK) { + if (numType == TCL_NUMBER_WIDE) { + /* objPtr holds an integer in the signed wide range */ + *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd); + return TCL_OK; + } + if (numType == TCL_NUMBER_BIG) { + /* objPtr holds an integer outside the signed wide range */ + /* Truncate to the signed wide range. */ + if (mp_isneg((mp_int *)cd)) { + *widePtr = LLONG_MIN; + } else { + *widePtr = LLONG_MAX; + } + return TCL_OK; + } + /* Must be a double -> not a valid index */ + goto parseError; + } + + /* objPtr does not hold a number, check the end+/- format... */ + if (GetEndOffsetFromObj(objPtr, endValue, widePtr) == TCL_OK) { + return TCL_OK; + } + + /* If we reach here, the string rep of objPtr exists. */ + + /* + * The valid index syntax does not include any value that is + * a list of more than one element. This is necessary so that + * lists of index values can be reliably distinguished from any + * single index value. + */ + + /* + * Quick scan to see if multi-value list is even possible. + * This relies on TclGetString() returning a NUL-terminated string. + */ + if ((TclMaxListLength(TclGetString(objPtr), -1, NULL) > 1) + + /* If it's possible, do the full list parse. */ + && (TCL_OK == Tcl_ListObjLength(NULL, objPtr, &length)) + && (length > 1)) { + goto parseError; + } + + /* Passed the list screen, so parse for index arithmetic expression */ + if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, NULL, -1, &opPtr, + TCL_PARSE_INTEGER_ONLY)) { + Tcl_WideInt w1=0, w2=0; + + /* value starts with valid integer... */ + + if ((*opPtr == '-') || (*opPtr == '+')) { + /* ... value continues with [-+] ... */ + + /* Save first integer as wide if possible */ + TclGetNumberFromObj(NULL, objPtr, &cd, &t1); + if (t1 == TCL_NUMBER_WIDE) { + w1 = (*(Tcl_WideInt *)cd); + } + + if (TCL_OK == TclParseNumber(NULL, objPtr, NULL, opPtr + 1, + -1, NULL, TCL_PARSE_INTEGER_ONLY)) { + /* ... value concludes with second valid integer */ + + /* Save second integer as wide if possible */ + TclGetNumberFromObj(NULL, objPtr, &cd, &t2); + if (t2 == TCL_NUMBER_WIDE) { + w2 = (*(Tcl_WideInt *)cd); + } + } + } + /* Clear invalid intreps left by TclParseNumber */ + TclFreeIntRep(objPtr); + + if (t1 && t2) { + /* We have both integer values */ + if ((t1 == TCL_NUMBER_WIDE) && (t2 == TCL_NUMBER_WIDE)) { + /* Both are wide, do wide-integer math */ + if (*opPtr == '-') { + if ((w2 == LLONG_MIN) && (interp != NULL)) { + goto extreme; + } + w2 = -w2; + } + + if ((w1 ^ w2) < 0) { + /* Different signs, sum cannot overflow */ + *widePtr = w1 + w2; + } else if (w1 >= 0) { + if (w1 < LLONG_MAX - w2) { + *widePtr = w1 + w2; + } else { + *widePtr = LLONG_MAX; + } + } else { + if (w1 > LLONG_MIN - w2) { + *widePtr = w1 + w2; + } else { + *widePtr = LLONG_MIN; + } + } + } else if (interp == NULL) { + /* + * We use an interp to do bignum index calculations. + * If we don't get one, call all indices with bignums errors, + * and rely on callers to handle it. + */ + return TCL_ERROR; + } else { + /* + * At least one is big, do bignum math. Little reason to + * value performance here. Re-use code. Parse has verified + * objPtr is an expression. Compute it. + */ + + Tcl_Obj *sum; + + extreme: + Tcl_ExprObj(interp, objPtr, &sum); + TclGetNumberFromObj(NULL, sum, &cd, &numType); + + if (numType == TCL_NUMBER_WIDE) { + /* sum holds an integer in the signed wide range */ + *widePtr = (Tcl_WideInt)(*(Tcl_WideInt *)cd); + } else { + /* sum holds an integer outside the signed wide range */ + /* Truncate to the signed wide range. */ + if (mp_isneg((mp_int *)cd)) { + *widePtr = LLONG_MIN; + } else { + *widePtr = LLONG_MAX; + } + } + Tcl_DecrRefCount(sum); + } + return TCL_OK; + } + } + + /* Report a parse error. */ + parseError: + if (interp != NULL) { + char * bytes = TclGetString(objPtr); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad index \"%s\": must be integer?[+-]integer? or" + " end?[+-]integer?", bytes)); + if (!strncmp(bytes, "end-", 4)) { + bytes += 4; + } + TclCheckBadOctal(interp, bytes); + Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL); + } + return TCL_ERROR; +} + +/* + *---------------------------------------------------------------------- + * * TclGetIntForIndex -- * * This function returns an integer corresponding to the list index held @@ -3687,76 +3888,19 @@ TclGetIntForIndex( int *indexPtr) /* Location filled in with an integer * representing an index. */ { - size_t length; - char *opPtr; - const char *bytes; - - if (TclGetIntFromObj(NULL, objPtr, indexPtr) == TCL_OK) { - return TCL_OK; - } - - if (GetEndOffsetFromObj(objPtr, endValue, indexPtr) == TCL_OK) { - return TCL_OK; - } - - bytes = TclGetString(objPtr); - length = objPtr->length; - - /* - * Leading whitespace is acceptable in an index. - */ - - while (length && TclIsSpaceProc(*bytes)) { - bytes++; - length--; - } - - if (TclParseNumber(NULL, NULL, NULL, bytes, length, (const char **)&opPtr, - TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_WHITESPACE) == TCL_OK) { - int code, first, second; - char savedOp = *opPtr; + Tcl_WideInt wide; - if ((savedOp != '+') && (savedOp != '-')) { - goto parseError; - } - if (TclIsSpaceProc(opPtr[1])) { - goto parseError; - } - *opPtr = '\0'; - code = Tcl_GetInt(interp, bytes, &first); - *opPtr = savedOp; - if (code == TCL_ERROR) { - goto parseError; - } - if (TCL_ERROR == Tcl_GetInt(interp, opPtr+1, &second)) { - goto parseError; - } - if (savedOp == '+') { - *indexPtr = first + second; - } else { - *indexPtr = first - second; - } - return TCL_OK; + if (GetWideForIndex(interp, objPtr, endValue, &wide) == TCL_ERROR) { + return TCL_ERROR; } - - /* - * Report a parse error. - */ - - parseError: - if (interp != NULL) { - bytes = TclGetString(objPtr); - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "bad index \"%s\": must be integer?[+-]integer? or" - " end?[+-]integer?", bytes)); - if (!strncmp(bytes, "end-", 4)) { - bytes += 4; - } - TclCheckBadOctal(interp, bytes); - Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX", NULL); + if (wide < INT_MIN) { + *indexPtr = INT_MIN; + } else if (wide > INT_MAX) { + *indexPtr = INT_MAX; + } else { + *indexPtr = (int) wide; } - - return TCL_ERROR; + return TCL_OK; } /* @@ -3779,21 +3923,35 @@ TclGetIntForIndex( static int GetEndOffsetFromObj( Tcl_Obj *objPtr, /* Pointer to the object to parse */ - int endValue, /* The value to be stored at "indexPtr" if + Tcl_WideInt endValue, /* The value to be stored at "indexPtr" if * "objPtr" holds "end". */ - int *indexPtr) /* Location filled in with an integer + Tcl_WideInt *widePtr) /* Location filled in with an integer * representing an index. */ { - if (SetEndOffsetFromAny(NULL, objPtr) != TCL_OK) { - return TCL_ERROR; + if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) { + Tcl_WideInt offset = objPtr->internalRep.wideValue; + + if ((endValue ^ offset) < 0) { + /* Different signs, sum cannot overflow */ + *widePtr = endValue + offset; + } else if (endValue >= 0) { + if (endValue < LLONG_MAX - offset) { + *widePtr = endValue + offset; + } else { + *widePtr = LLONG_MAX; + } + } else { + if (endValue > LLONG_MIN - offset) { + *widePtr = endValue + offset; + } else { + *widePtr = LLONG_MIN; + } + } + return TCL_OK; } - - /* TODO: Handle overflow cases sensibly */ - *indexPtr = endValue + (int)objPtr->internalRep.wideValue; - return TCL_OK; + return TCL_ERROR; } - /* *---------------------------------------------------------------------- * @@ -3961,43 +4119,48 @@ TclIndexEncode( int after, /* Value to return for index after end */ int *indexPtr) /* Where to write the encoded answer, not NULL */ { - int idx; + ClientData cd; + Tcl_WideInt wide; + int idx, numType, code = TclGetNumberFromObj(NULL, objPtr, &cd, &numType); - if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &idx)) { - /* We parsed a value in the range INT_MIN...INT_MAX */ + if ((code == TCL_OK) && (numType == TCL_NUMBER_WIDE)) { + /* We parsed a value in the range LLONG_MIN...LLONG_MAX */ + wide = (*(Tcl_WideInt *)cd); integerEncode: - if (idx < TCL_INDEX_START) { + if (wide < TCL_INDEX_START) { /* All negative absolute indices are "before the beginning" */ idx = before; - } else if (idx == INT_MAX) { + } else if (wide >= INT_MAX) { /* This index value is always "after the end" */ idx = after; - } + } else { + idx = (int) wide; + } /* usual case, the absolute index value encodes itself */ - } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &idx)) { + } else if (TCL_OK == GetEndOffsetFromObj(objPtr, 0, &wide)) { /* * We parsed an end+offset index value. - * idx holds the offset value in the range INT_MIN...INT_MAX. + * wide holds the offset value in the range LLONG_MIN...LLONG_MAX. */ - if (idx > 0) { + if (wide > 0) { /* * All end+postive or end-negative expressions * always indicate "after the end". */ idx = after; - } else if (idx < INT_MIN - TCL_INDEX_END) { + } else if (wide < INT_MIN - TCL_INDEX_END) { /* These indices always indicate "before the beginning */ idx = before; } else { /* Encoded end-positive (or end+negative) are offset */ - idx += TCL_INDEX_END; + idx = (int)wide + TCL_INDEX_END; } /* TODO: Consider flag to suppress repeated end-offset parse. */ - } else if (TCL_OK == TclGetIntForIndexM(interp, objPtr, 0, &idx)) { + } else if (TCL_OK == GetWideForIndex(interp, objPtr, 0, &wide)) { /* * Only reach this case when the index value is a - * constant index arithmetic expression, and idx + * constant index arithmetic expression, and wide * holds the result. Treat it the same as if it were * parsed as an absolute integer value. */ diff --git a/tests/util.test b/tests/util.test index 35fc642..34113c0 100644 --- a/tests/util.test +++ b/tests/util.test @@ -689,13 +689,13 @@ test util-9.31.1 {TclGetIntForIndex} -body { } -returnCodes error -match glob -result * test util-9.32 {TclGetIntForIndex} -body { string index a 0x1FFFFFFFF+0 -} -returnCodes error -match glob -result * +} -result {} test util-9.33 {TclGetIntForIndex} -body { string index a 100000000000+0 -} -returnCodes error -match glob -result * +} -result {} test util-9.33.1 {TclGetIntForIndex} -body { string index a 0d100000000000+0 -} -returnCodes error -match glob -result * +} -result {} test util-9.34 {TclGetIntForIndex} -body { string index a 1.0 } -returnCodes error -match glob -result * @@ -728,7 +728,13 @@ test util-9.43 {TclGetIntForIndex} -body { } -returnCodes error -match glob -result * test util-9.44 {TclGetIntForIndex} -body { string index a 0+1000000000000 -} -returnCodes error -match glob -result * +} -result {} +test util-9.45 {TclGetIntForIndex} { + string index abcd end+2305843009213693950 +} {} +test util-9.46 {TclGetIntForIndex} { + string index abcd end+4294967294 +} {} test util-10.1 {Tcl_PrintDouble - rounding} {ieeeFloatingPoint} { convertDouble 0x0000000000000000 |