From 6798b2404f883074422c40ca7d196ad174a2d1ef Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 19:23:42 +0000 Subject: Refactor the index value encode/decode machinery for broader use. Make use of it to fix index value flaws in [lsearch]. --- generic/tclCmdIL.c | 43 +++++++++------- generic/tclCompile.h | 9 ---- generic/tclInt.h | 15 ++++++ generic/tclUtil.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 27 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index afb5b62..7a13b71 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -64,8 +64,9 @@ typedef struct SortInfo { * SORTMODE_COMMAND. Pre-initialized to hold * base of command. */ int *indexv; /* If the -index option was specified, this - * holds the indexes contained in the list - * supplied as an argument to that option. + * holds an encoding of the indexes contained + * in the list supplied as an argument to + * that option. * NULL if no indexes supplied, and points to * singleIndex field when only one * supplied. */ @@ -2913,7 +2914,7 @@ Tcl_LsearchObjCmd( Tcl_Obj *const objv[]) /* Argument values. */ { const char *bytes, *patternBytes; - int i, match, index, result, listc, length, elemLen, bisect; + int i, match, index, result=TCL_OK, listc, length, elemLen, bisect; int dataType, isIncreasing, lower, upper, offset; Tcl_WideInt patWide, objWide; int allMatches, inlineReturn, negatedMatch, returnSubindices, noCase; @@ -3113,13 +3114,26 @@ Tcl_LsearchObjCmd( */ for (j=0 ; jresultCode = TCL_ERROR; return NULL; } - index = infoPtr->indexv[i]; - /* - * Adjust for end-based indexing. - */ - - if (index < SORTIDX_NONE) { - index += listLen + 1; - } + index = TclIndexDecode(infoPtr->indexv[i], listLen - 1); if (Tcl_ListObjIndex(infoPtr->interp, objPtr, index, ¤tObj) != TCL_OK) { diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 9501d93..d842fdd 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1686,15 +1686,6 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, #define TCL_NO_ELEMENT 2 /* Do not push the array element. */ /* - * Special value used by TclGetIndexFromToken to encoding the "end" index. - */ - -#define TCL_INDEX_END (-2) -#define TCL_INDEX_BEFORE (-1) -#define TCL_INDEX_START (0) -#define TCL_INDEX_AFTER (INT_MAX) - -/* * DTrace probe macros (NOPs if DTrace support is not enabled). */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 1b1b078..3821e42 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4013,6 +4013,21 @@ MODULE_SCOPE unsigned TclHashObjKey(Tcl_HashTable *tablePtr, void *keyPtr); MODULE_SCOPE int TclFullFinalizationRequested(void); /* + * Utility routines for encoding index values as integers. Used by both + * some of the command compilers and by [lsort] and [lsearch]. + */ + +MODULE_SCOPE int TclIndexEncode(Tcl_Interp *interp, Tcl_Obj *objPtr, + int before, int after, int *indexPtr); +MODULE_SCOPE int TclIndexDecode(int encoded, int endValue); + +/* Constants used in index value encoding routines. */ +#define TCL_INDEX_END (-2) +#define TCL_INDEX_BEFORE (-1) +#define TCL_INDEX_START (0) +#define TCL_INDEX_AFTER (INT_MAX) + +/* *---------------------------------------------------------------- * Macros used by the Tcl core to create and release Tcl objects. * TclNewObj(objPtr) creates a new object denoting an empty string. diff --git a/generic/tclUtil.c b/generic/tclUtil.c index beeaae1..f6a92fc 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -3813,6 +3813,143 @@ SetEndOffsetFromAny( /* *---------------------------------------------------------------------- * + * TclIndexEncode -- + * + * Parse objPtr to determine if it is an index value. Two cases + * are possible. The value objPtr might be parsed as an absolute + * index value in the C signed int range. Note that this includes + * index values that are integers as presented and it includes index + * arithmetic expressions. The absolute index values that can be + * directly meaningful as an index into either a list or a string are + * those integer values >= TCL_INDEX_START (0) + * and < TCL_INDEX_AFTER (INT_MAX). + * The largest string supported in Tcl 8 has bytelength INT_MAX. + * This means the largest supported character length is also INT_MAX, + * and the index of the last character in a string of length INT_MAX + * is INT_MAX-1. + * + * Any absolute index value parsed outside that range is encoded + * using the before and after values passed in by the + * caller as the encoding to use for indices that are either + * less than or greater than the usable index range. TCL_INDEX_AFTER + * is available as a good choice for most callers to use for + * after. Likewise, the value TCL_INDEX_BEFORE is good for + * most callers to use for before. Other values are possible + * when the caller knows it is helpful in producing its own behavior + * for indices before and after the indexed item. + * + * A token can also be parsed as an end-relative index expression. + * All end-relative expressions that indicate an index larger + * than end (end+2, end--5) point beyond the end of the indexed + * collection, and can be encoded as after. The end-relative + * expressions that indicate an index less than or equal to end + * are encoded relative to the value TCL_INDEX_END (-2). The + * index "end" is encoded as -2, down to the index "end-0x7ffffffe" + * which is encoded as INT_MIN. Since the largest index into a + * string possible in Tcl 8 is 0x7ffffffe, the interpretation of + * "end-0x7ffffffe" for that largest string would be 0. Thus, + * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed, + * they can be encoded with the before value. + * + * These details will require re-examination whenever string and + * list length limits are increased, but that will likely also + * mean a revised routine capable of returning Tcl_WideInt values. + * + * Returns: + * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. + * + * Side effects: + * When TCL_OK is returned, the encoded index value is written + * to *indexPtr. + * + *---------------------------------------------------------------------- + */ + +int +TclIndexEncode( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* Index value to parse */ + int before, /* Value to return for index before beginning */ + int after, /* Value to return for index after end */ + int *indexPtr) /* Where to write the encoded answer, not NULL */ +{ + int idx; + + if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &idx)) { + /* We parsed a value in the range INT_MIN...INT_MAX */ + integerEncode: + if (idx < TCL_INDEX_START) { + /* All negative absolute indices are "before the beginning" */ + idx = before; + } else if (idx == INT_MAX) { + /* This index value is always "after the end" */ + idx = after; + } + /* usual case, the absolute index value encodes itself */ + } else if (TCL_OK == TclGetEndOffsetFromObj(NULL, objPtr, 0, &idx)) { + /* + * We parsed an end+offset index value. + * idx holds the offset value in the range INT_MIN...INT_MAX. + */ + if (idx > 0) { + /* + * All end+postive or end-negative expressions + * always indicate "after the end". + */ + idx = after; + } else if (idx < INT_MIN - TCL_INDEX_END) { + /* These indices always indicate "before the beginning */ + idx = before; + } else { + /* Encoded end-positive (or end+negative) are offset */ + idx += TCL_INDEX_END; + } + + /* TODO: Consider flag to suppress repeated end-offset parse. */ + } else if (TCL_OK == TclGetIntForIndexM(interp, objPtr, 0, &idx)) { + /* + * Only reach this case when the index value is a + * constant index arithmetic expression, and idx + * holds the result. Treat it the same as if it were + * parsed as an absolute integer value. + */ + goto integerEncode; + } else { + return TCL_ERROR; + } + *indexPtr = idx; + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * TclIndexDecode -- + * + * Decodes a value previously encoded by TclIndexEncode. The argument + * endValue indicates what value of "end" should be used in the + * decoding. + * + * Results: + * The decoded index value. + * + *---------------------------------------------------------------------- + */ + +int +TclIndexDecode( + int encoded, /* Value to decode */ + int endValue) /* Meaning of "end" to use, > TCL_INDEX_END */ +{ + if (encoded <= TCL_INDEX_END) { + return (encoded - TCL_INDEX_END) + endValue; + } + return encoded; +} + +/* + *---------------------------------------------------------------------- + * * TclCheckBadOctal -- * * This function checks for a bad octal value and appends a meaningful -- cgit v0.12