From 847a048c18e68af9dd1140e12e922544b4fb25c6 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 6 Mar 2018 20:09:00 +0000 Subject: New internal routine TclGetEndOffsetFromObj. --- generic/tclInt.h | 2 ++ generic/tclUtil.c | 46 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 567a28c..1b1b078 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2978,6 +2978,8 @@ MODULE_SCOPE int TclGetChannelFromObj(Tcl_Interp *interp, MODULE_SCOPE CmdFrame * TclGetCmdFrameForProcedure(Proc *procPtr); MODULE_SCOPE int TclGetCompletionCodeFromObj(Tcl_Interp *interp, Tcl_Obj *value, int *code); +MODULE_SCOPE int TclGetEndOffsetFromObj(Tcl_Interp *interp, + Tcl_Obj *objPtr, int endValue, int *indexPtr); MODULE_SCOPE int TclGetNumberFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, ClientData *clientDataPtr, int *typePtr); diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3424295..beeaae1 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -3575,13 +3575,7 @@ TclGetIntForIndex( return TCL_OK; } - if (SetEndOffsetFromAny(NULL, objPtr) == TCL_OK) { - /* - * If the object is already an offset from the end of the list, or can - * be converted to one, use it. - */ - - *indexPtr = endValue + objPtr->internalRep.longValue; + if (TclGetEndOffsetFromObj(NULL, objPtr, endValue, indexPtr) == TCL_OK) { return TCL_OK; } @@ -3684,6 +3678,42 @@ UpdateStringOfEndOffset( /* *---------------------------------------------------------------------- * + * TclGetEndOffsetFromObj -- + * + * Look for a string of the form "end[+-]offset" and convert it to an + * internal representation holding the offset. + * + * Results: + * Tcl return code. + * + * Side effects: + * May store a Tcl_ObjType. + * + *---------------------------------------------------------------------- + */ + +int +TclGetEndOffsetFromObj( + Tcl_Interp *interp, /* For error reporting, may be NULL. */ + Tcl_Obj *objPtr, /* Pointer to the object to parse */ + int endValue, /* The value to be stored at "indexPtr" if + * "objPtr" holds "end". */ + int *indexPtr) /* Location filled in with an integer + * representing an index. */ +{ + if (SetEndOffsetFromAny(interp, objPtr) != TCL_OK) { + return TCL_ERROR; + } + + /* TODO: Handle overflow cases sensibly */ + *indexPtr = endValue + (int)objPtr->internalRep.longValue; + return TCL_OK; +} + + +/* + *---------------------------------------------------------------------- + * * SetEndOffsetFromAny -- * * Look for a string of the form "end[+-]offset" and convert it to an @@ -3750,6 +3780,8 @@ SetEndOffsetFromAny( return TCL_ERROR; } if (bytes[3] == '-') { + + /* TODO: Review overflow concerns here! */ offset = -offset; } } else { -- cgit v0.12 From 596e89b17d16efdca4d428cf91667142cc1336f3 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 6 Mar 2018 20:27:39 +0000 Subject: Have assembler use same index value parser as the bytecode compiler. --- generic/tclAssembly.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 120fd9a..273a012 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -2248,24 +2248,17 @@ GetListIndexOperand( Tcl_Token* tokenPtr = *tokenPtrPtr; /* INOUT: Pointer to the next token in the * source code */ - Tcl_Obj* intObj; /* Integer from the source code */ - int status; /* Tcl status return */ - - /* - * Extract the next token as a string. - */ - - if (GetNextOperand(assemEnvPtr, tokenPtrPtr, &intObj) != TCL_OK) { - return TCL_ERROR; - } /* * Convert to an integer, advance to the next token and return. */ - - status = TclGetIntForIndex(interp, intObj, -2, result); - Tcl_DecrRefCount(intObj); + int status = TclGetIndexFromToken(tokenPtr, result); *tokenPtrPtr = TokenAfter(tokenPtr); + if (status == TCL_ERROR && interp) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "unsupported index value: \"%.*s\"", tokenPtr->size, + tokenPtr->start)); + } return status; } -- cgit v0.12 From 720c8de45208caf2ba463bac1ad09ea5766fd34c Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 6 Mar 2018 20:59:44 +0000 Subject: rework error handling to keep test suite happy. --- generic/tclAssembly.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 273a012..f40d662 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -2248,16 +2248,22 @@ GetListIndexOperand( Tcl_Token* tokenPtr = *tokenPtrPtr; /* INOUT: Pointer to the next token in the * source code */ + Tcl_Obj *value; + int status; - /* - * Convert to an integer, advance to the next token and return. - */ - int status = TclGetIndexFromToken(tokenPtr, result); + /* General operand validity check */ + if (GetNextOperand(assemEnvPtr, tokenPtrPtr, &value) != TCL_OK) { + return TCL_ERROR; + } + Tcl_DecrRefCount(value); + + /* Convert to an integer, advance to the next token and return. */ + status = TclGetIndexFromToken(tokenPtr, result); *tokenPtrPtr = TokenAfter(tokenPtr); if (status == TCL_ERROR && interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "unsupported index value: \"%.*s\"", tokenPtr->size, - tokenPtr->start)); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%.*s\"", + tokenPtr->size, tokenPtr->start)); + Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADINDEX", NULL); } return status; } -- cgit v0.12 From c68d043691c94408564f252d18ffce9db7afcdad Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 6 Mar 2018 21:59:53 +0000 Subject: try to fix [db36fa5122]: better compiled variants of several indices-related commands, test-cases extended --- generic/tclCompCmdsGR.c | 57 +++++++++++++++++++++++++++++++++++++++---------- generic/tclCompCmdsSZ.c | 15 +++++++++---- generic/tclCompile.h | 6 ++++-- generic/tclExecute.c | 6 ++++-- tests/lindex.test | 9 ++++++++ tests/lrange.test | 18 ++++++++++++++++ 6 files changed, 92 insertions(+), 19 deletions(-) diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index 375653b..a5f2ee4 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -40,6 +40,11 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, * * Returns: * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. + * The return value of *index is: + * >= 0 -- constant index from start, + * == minBoundary -- (TCL_INDEX_OUT_OF_RANGE, 0) if out of range (before start) + * == maxBoundary -- (INT_MAX, TCL_INDEX_OUT_OF_RANGE) if out of range (after end) + * <= -2 -- (<= TCL_INDEX_END) negative index from end. * * Side effects: * Sets *index to the index value if successful. @@ -50,7 +55,9 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, int TclGetIndexFromToken( Tcl_Token *tokenPtr, - int *index) + int *index, + int minBoundary, + int maxBoundary) { Tcl_Obj *tmpObj = Tcl_NewObj(); int result, idx; @@ -62,13 +69,26 @@ TclGetIndexFromToken( result = TclGetIntFromObj(NULL, tmpObj, &idx); if (result == TCL_OK) { - if (idx < 0) { - result = TCL_ERROR; + /* out of range (..., -2] */ + if (idx <= TCL_INDEX_END) { + idx = minBoundary; + /* before start */ } } else { result = TclGetIntForIndexM(NULL, tmpObj, TCL_INDEX_END, &idx); - if (result == TCL_OK && idx > TCL_INDEX_END) { - result = TCL_ERROR; + if (result == TCL_OK) { + int endSyntax = (tmpObj->length >= 3 && *tmpObj->bytes == 'e'); + /* + * Check computed index results to out of range (after end or negative constant), + * set it to -1 in order to avoid ambiguity with "end[+-integer] syntax" + */ + if (idx > TCL_INDEX_END && endSyntax) { + /* after end [end+1, ...) */ + idx = maxBoundary; /* may be TCL_INDEX_OUT_OF_RANGE or INT_MAX */ + } else if (idx < 0 && !endSyntax) { + /* before start, negative constant (..., -1-1] */ + idx = minBoundary; + } } } Tcl_DecrRefCount(tmpObj); @@ -1103,7 +1123,8 @@ TclCompileLindexCmd( } idxTokenPtr = TokenAfter(valTokenPtr); - if (TclGetIndexFromToken(idxTokenPtr, &idx) == TCL_OK) { + if (TclGetIndexFromToken(idxTokenPtr, &idx, + TCL_INDEX_OUT_OF_RANGE, TCL_INDEX_OUT_OF_RANGE) == TCL_OK) { /* * All checks have been completed, and we have exactly one of these * constructs: @@ -1338,14 +1359,17 @@ TclCompileLrangeCmd( */ tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { return TCL_ERROR; } tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { return TCL_ERROR; } + if (idx1 == INT_MAX && idx2 == INT_MAX) { + idx2 = TCL_INDEX_OUT_OF_RANGE; + } /* * Issue instructions. It's not safe to skip doing the LIST_RANGE, as @@ -1395,7 +1419,7 @@ TclCompileLinsertCmd( */ tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx, 0, INT_MAX) != TCL_OK) { return TCL_ERROR; } @@ -1490,12 +1514,12 @@ TclCompileLreplaceCmd( */ tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { return TCL_ERROR; } tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx2, -1, TCL_INDEX_END) != TCL_OK) { return TCL_ERROR; } @@ -1514,10 +1538,21 @@ TclCompileLreplaceCmd( * now. [Bug 47ac84309b] */ + if (idx1 == INT_MAX) { + /* consider special handling for too large first index + * "list doesn't contain element ...", so still not compiled */ + return TCL_ERROR; + } + if ((idx1 <= TCL_INDEX_END) != (idx2 <= TCL_INDEX_END)) { return TCL_ERROR; } + if (idx1 == -1) { + /* linsert before start or replace from start */ + idx1 = 0; + } + if (idx2 != TCL_INDEX_END && idx2 >= 0 && idx2 < idx1) { idx2 = idx1 - 1; } diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index fb0981d..ef1a4b0 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -934,12 +934,15 @@ TclCompileStringRangeCmd( * Parse the two indices. */ - if (TclGetIndexFromToken(fromTokenPtr, &idx1) != TCL_OK) { + if (TclGetIndexFromToken(fromTokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { goto nonConstantIndices; } - if (TclGetIndexFromToken(toTokenPtr, &idx2) != TCL_OK) { + if (TclGetIndexFromToken(toTokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { goto nonConstantIndices; } + if (idx1 == INT_MAX && idx2 == INT_MAX) { + idx2 = TCL_INDEX_OUT_OF_RANGE; + } /* * Push the operand onto the stack and then the substring operation. @@ -992,12 +995,16 @@ TclCompileStringReplaceCmd( */ tokenPtr = TokenAfter(valueTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { goto genericReplace; } tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { + goto genericReplace; + } + if (idx1 == INT_MAX && idx2 == INT_MAX) { + /* avoid replacement of last char in large string (just don't compile). */ goto genericReplace; } diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 1c64a21..2f23b90 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1121,7 +1121,8 @@ MODULE_SCOPE int TclFixupForwardJump(CompileEnv *envPtr, int distThreshold); MODULE_SCOPE void TclFreeCompileEnv(CompileEnv *envPtr); MODULE_SCOPE void TclFreeJumpFixupArray(JumpFixupArray *fixupArrayPtr); -MODULE_SCOPE int TclGetIndexFromToken(Tcl_Token *tokenPtr, int *index); +MODULE_SCOPE int TclGetIndexFromToken(Tcl_Token *tokenPtr, int *index, + int minBoundary, int maxBoundary); MODULE_SCOPE void TclInitByteCodeObj(Tcl_Obj *objPtr, CompileEnv *envPtr); MODULE_SCOPE void TclInitCompileEnv(Tcl_Interp *interp, @@ -1688,7 +1689,8 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, * Special value used by TclGetIndexFromToken to encoding the "end" index. */ -#define TCL_INDEX_END (-2) +#define TCL_INDEX_END (-2) +#define TCL_INDEX_OUT_OF_RANGE (-1) /* * DTrace probe macros (NOPs if DTrace support is not enabled). diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 93ed50b..d345899 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5091,7 +5091,7 @@ TEBCresume( /* * Select the list item based on the index. Negative operand means - * end-based indexing. + * end-based indexing (-2, ...), and -1 means out of range. */ if (opnd < -1) { @@ -5649,7 +5649,9 @@ TEBCresume( * Adjust indices for end-based indexing. */ - if (fromIdx < -1) { + if (fromIdx == -1) { + fromIdx = 0; + } else if (fromIdx < -1) { fromIdx += 1 + length; if (fromIdx < 0) { fromIdx = 0; diff --git a/tests/lindex.test b/tests/lindex.test index b86e2e0..e513b62 100644 --- a/tests/lindex.test +++ b/tests/lindex.test @@ -79,6 +79,15 @@ test lindex-3.7 {indexes don't shimmer wide ints} { set x [expr {(wide(1)<<31) - 2}] list $x [lindex {1 2 3} $x] [incr x] [incr x] } {2147483646 {} 2147483647 2147483648} +test lindex-3.8 {compiled with static indices out of range, negative} { + list [lindex {a b c} -1] [lindex {a b c} -2] [lindex {a b c} -3] +} [lrepeat 3 {}] +test lindex-3.9 {compiled with calculated indices out of range, negative constant} { + list [lindex {a b c} -1-1] [lindex {a b c} -2+0] [lindex {a b c} -2+1] +} [lrepeat 3 {}] +test lindex-3.10 {compiled with calculated indices out of range, after end} { + list [lindex {a b c} end+1] [lindex {a b c} end+2] [lindex {a b c} end+3] +} [lrepeat 3 {}] # Indices relative to end diff --git a/tests/lrange.test b/tests/lrange.test index 17a757e..ba10354 100644 --- a/tests/lrange.test +++ b/tests/lrange.test @@ -90,6 +90,24 @@ test lrange-3.1 {Bug 3588366: end-offsets before start} { lrange $l 0 end-5 }} {1 2 3 4 5} } {} + +test lrange-3.2 {compiled with static indices out of range, negative} { + list [lrange {a b c} -1 -2] [lrange {a b c} -2 -1] [lrange {a b c} -3 -2] [lrange {a b c} -2 -3] +} [lrepeat 4 {}] +test lrange-3.3 {compiled with calculated indices out of range, negative constant} { + list [lrange {a b c} 0-1 -1-1] [lrange {a b c} -2+0 0-1] [lrange {a b c} -2-1 -2+1] [lrange {a b c} -2+1 -2-1] +} [lrepeat 4 {}] +test lrange-3.4 {compiled with calculated indices out of range, after end} { + list [lrange {a b c} end+1 end+2] [lrange {a b c} end+2 end+1] [lrange {a b c} end+2 end+3] [lrange {a b c} end+3 end+2] +} [lrepeat 4 {}] + +test lrange-3.5 {compiled with calculated indices, start out of range (negative)} { + list [lrange {a b c} -1 1] [lrange {a b c} -1+0 end-1] [lrange {a b c} -2 1] [lrange {a b c} -2+0 0+1] +} [lrepeat 4 {a b}] +test lrange-3.6 {compiled with calculated indices, end out of range (after end)} { + list [lrange {a b c} 1 end+1] [lrange {a b c} 1+0 2+1] [lrange {a b c} 1 end+1] [lrange {a b c} end-1 3+1] +} [lrepeat 4 {b c}] + # cleanup ::tcltest::cleanupTests -- cgit v0.12 From 8c2cbf32116e7f5f5952e0d063a90125fcd4147d Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 6 Mar 2018 22:27:47 +0000 Subject: test cases for "lsort": coverage for "missing from sublist" error case with negative index (-1-1, -2) --- tests/cmdIL.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/cmdIL.test b/tests/cmdIL.test index 23a5f96..7636adc 100644 --- a/tests/cmdIL.test +++ b/tests/cmdIL.test @@ -203,6 +203,18 @@ test cmdIL-3.4.1 {SortCompare procedure, -index option} -body { test cmdIL-3.5 {SortCompare procedure, -index option} -body { lsort -integer -index 2 {{20 10 13} {15}} } -returnCodes error -result {element 2 missing from sublist "15"} +test cmdIL-3.5.1 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index 1+3 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {element 4 missing from sublist "1 . c"} +test cmdIL-3.5.2 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index -1-1 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {element -2 missing from sublist "1 . c"} +test cmdIL-3.5.3 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index -2 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {element -2 missing from sublist "1 . c"} +test cmdIL-3.5.4 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index end-4 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {element -2 missing from sublist "1 . c"} test cmdIL-3.6 {SortCompare procedure, -index option} { lsort -integer -index 2 {{1 15 30} {2 5 25} {3 25 20}} } {{3 25 20} {2 5 25} {1 15 30}} -- cgit v0.12 From f003df5624ef84697f69a9b130bf014e90fb0d31 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 13:40:54 +0000 Subject: Tests of [assemble] use of compiled index values. --- tests/assemble.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/assemble.test b/tests/assemble.test index 5231048..40c132d 100644 --- a/tests/assemble.test +++ b/tests/assemble.test @@ -1584,6 +1584,12 @@ test assemble-15.7 {listIndexImm} { } -result c } +test assemble-15.8 {listIndexImm} { + assemble {push {a b c}; listIndexImm end+2} +} {} +test assemble-15.9 {listIndexImm} { + assemble {push {a b c}; listIndexImm -1-1} +} {} # assemble-16 - invokeStk -- cgit v0.12 From 612199cbe93e3fb4c39cd92afd245616115ad442 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 17:02:50 +0000 Subject: Rework TclGetIndexFromToken to make use of TclGetEndOffsetFromObj, and to lay out the index value encoding cases. --- generic/tclCompCmdsGR.c | 95 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index 2386b6d..8585bce 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -34,20 +34,50 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, * * TclGetIndexFromToken -- * - * Parse a token and get the encoded version of the index (as understood - * by TEBC), assuming it is at all knowable at compile time. Only handles - * indices that are integers or 'end' or 'end-integer'. + * Parse a token to determine if an index value is known at + * compile time. Two cases are possible. The compile time value + * of the token might be parsed as an absolute index value + * in the C signed int range. Note that this includes index + * values that are integers as presented as well as index + * arithmetic expressions that can be fully computed at compile + * time. The absolute index values that can be directly meaningful + * as an index into either a list or a string are those integer + * values >= 0 and < INT_MAX. The largest string supported in Tcl 8 + * has bytelength INT_MAX. This means the largest character supported + * length is also INT_MAX, and the index of the last character in a + * string of length INT_MAX is INT_MAX-1. + * + * Any absolute index value parsed outside that range is encoded + * using the minBoundary and maxBounday values passed in by the + * caller as the encoding to use for indices that are either + * less than or greater than the usable index range. INT_MAX + * is available as a good choice for most callers to use for + * maxBoundary. Likewise, the value -1 is good for most callers + * to use for minBoundary. + * + * A token can also be parsed as an end-relative index expression. + * All end-relative expressions that indicate an index larger + * than end (end+2, end--5) point beyond the end of the indexed + * collection, and can be encoded as maxBoundary. The end-relative + * expressions that indicate an index less than or equal to end + * are encoded relative to the value TCL_INDEX_END (-2). The + * index "end" is encoded as -2, down to the index "end-0x7ffffffe" + * which is encoded as INT_MIN. Since the largest index into a + * string possible in Tcl 8 is 0x7ffffffe, the interpretation of + * "end-0x7ffffffe" for that largest string would be 0. Thus, + * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed, + * they can be encoded with the minBoundary value. + * + * These details will require re-examination whenever string and + * list length limits are increased, but that will likely also + * mean a revised routine capable of returning Tcl_WideInt values. * * Returns: * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. - * The return value of *index is: - * >= 0 -- constant index from start, - * == minBoundary -- (TCL_INDEX_OUT_OF_RANGE, 0) if out of range (before start) - * == maxBoundary -- (INT_MAX, TCL_INDEX_OUT_OF_RANGE) if out of range (after end) - * <= -2 -- (<= TCL_INDEX_END) negative index from end. * * Side effects: - * Sets *index to the index value if successful. + * When TCL_OK is returned, the encoded index value is written + * to *index. * *---------------------------------------------------------------------- */ @@ -69,25 +99,46 @@ TclGetIndexFromToken( result = TclGetIntFromObj(NULL, tmpObj, &idx); if (result == TCL_OK) { - /* out of range (..., -2] */ - if (idx <= TCL_INDEX_END) { + /* We parsed a value in the range INT_MIN...INT_MAX */ + integerEncode: + if (idx < 0) { + /* All negative absolute indices are "before the beginning" */ idx = minBoundary; - /* before start */ + } else if (idx == INT_MAX) { + /* This index value is always "after the end" */ + idx = maxBoundary; } + /* usual case, the absolute index value encodes itself */ } else { - result = TclGetIntForIndexM(NULL, tmpObj, TCL_INDEX_END, &idx); + result = TclGetEndOffsetFromObj(NULL, tmpObj, 0, &idx); if (result == TCL_OK) { - int endSyntax = (tmpObj->length >= 3 && *tmpObj->bytes == 'e'); - /* - * Check computed index results to out of range (after end or negative constant), - * set it to -1 in order to avoid ambiguity with "end[+-integer] syntax" + /* + * We parsed an end+offset index value. + * idx holds the offset value in the range INT_MIN...INT_MAX. */ - if (idx > TCL_INDEX_END && endSyntax) { - /* after end [end+1, ...) */ - idx = maxBoundary; /* may be TCL_INDEX_OUT_OF_RANGE or INT_MAX */ - } else if (idx < 0 && !endSyntax) { - /* before start, negative constant (..., -1-1] */ + if (idx > 0) { + /* + * All end+postive or end-negative expressions + * always indicate "after the end". + */ + idx = maxBoundary; + } else if (idx < INT_MIN - TCL_INDEX_END) { + /* These indices alwasy indicate "before the beginning */ idx = minBoundary; + } else { + /* Encoded end-positive (or end+negative) are offset */ + idx += TCL_INDEX_END; + } + } else { + result = TclGetIntForIndexM(NULL, tmpObj, 0, &idx); + if (result == TCL_OK) { + /* + * Only reach this case when the index value is a + * constant index arithmetic expression, and idx + * holds the result. Treat it the same as if it were + * parsed as an absolute integer value. + */ + goto integerEncode; } } } -- cgit v0.12 From 2c44354d13f15b788a4213ab50441eed6ad54f75 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 18:39:44 +0000 Subject: Establish 4 symbols for categories of parsed index values: TCL_INDEX_START = 0 The start index. TCL_INDEX_END = -2 The "end" index. TCL_INDEX_BEFORE = -1 All indices less than start. TCL_INDEX_AFTER = INT_MAX All indices greater than "end". Then use these symbols among callers of TclGetIndexFromToken() so that index value parsing can directly implement the callers sense of when out of range indices ought to be treated the same as start or end positions. --- generic/tclAssembly.c | 9 +++- generic/tclCompCmdsGR.c | 116 ++++++++++++++++++++++++++---------------------- generic/tclCompCmdsSZ.c | 43 ++++++++++-------- generic/tclCompile.h | 4 +- 4 files changed, 99 insertions(+), 73 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 02c64bd..a3fac8f 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -2258,8 +2258,13 @@ GetListIndexOperand( Tcl_DecrRefCount(value); /* Convert to an integer, advance to the next token and return. */ - status = TclGetIndexFromToken(tokenPtr, result, TCL_INDEX_OUT_OF_RANGE, - TCL_INDEX_OUT_OF_RANGE); + /* + * NOTE: Indexing a list with an index before it yields the + * same result as indexing after it, and might be more easily portable + * when list size limits grow. + */ + status = TclGetIndexFromToken(tokenPtr, result, TCL_INDEX_BEFORE, + TCL_INDEX_BEFORE); *tokenPtrPtr = TokenAfter(tokenPtr); if (status == TCL_ERROR && interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%.*s\"", diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index 8585bce..501c7a4 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -42,18 +42,21 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, * arithmetic expressions that can be fully computed at compile * time. The absolute index values that can be directly meaningful * as an index into either a list or a string are those integer - * values >= 0 and < INT_MAX. The largest string supported in Tcl 8 - * has bytelength INT_MAX. This means the largest character supported - * length is also INT_MAX, and the index of the last character in a - * string of length INT_MAX is INT_MAX-1. + * values >= TCL_INDEX_START (0) and < TCL_INDEX_AFTER (INT_MAX). + * The largest string supported in Tcl 8 has bytelength INT_MAX. + * This means the largest character supported length is also INT_MAX, + * and the index of the last character in a string of length INT_MAX + * is INT_MAX-1. * * Any absolute index value parsed outside that range is encoded * using the minBoundary and maxBounday values passed in by the * caller as the encoding to use for indices that are either - * less than or greater than the usable index range. INT_MAX + * less than or greater than the usable index range. TCL_INDEX_AFTER * is available as a good choice for most callers to use for - * maxBoundary. Likewise, the value -1 is good for most callers - * to use for minBoundary. + * maxBoundary. Likewise, the value TCL_INDEX_BEFORE is good for + * most callers to use for minBoundary. Other values are possible + * when the caller knows it is helpful in producing its own behavior + * for indices before and after the indexed item. * * A token can also be parsed as an end-relative index expression. * All end-relative expressions that indicate an index larger @@ -101,7 +104,7 @@ TclGetIndexFromToken( if (result == TCL_OK) { /* We parsed a value in the range INT_MIN...INT_MAX */ integerEncode: - if (idx < 0) { + if (idx < TCL_INDEX_START) { /* All negative absolute indices are "before the beginning" */ idx = minBoundary; } else if (idx == INT_MAX) { @@ -123,7 +126,7 @@ TclGetIndexFromToken( */ idx = maxBoundary; } else if (idx < INT_MIN - TCL_INDEX_END) { - /* These indices alwasy indicate "before the beginning */ + /* These indices always indicate "before the beginning */ idx = minBoundary; } else { /* Encoded end-positive (or end+negative) are offset */ @@ -214,7 +217,7 @@ TclCompileGlobalCmd( return TCL_ERROR; } - /* TODO: Consider what value can pass throug the + /* TODO: Consider what value can pass through the * IndexTailVarIfKnown() screen. Full CompileWord() * likely does not apply here. Push known value instead. */ CompileWord(envPtr, varTokenPtr, interp, i); @@ -1174,15 +1177,14 @@ TclCompileLindexCmd( } idxTokenPtr = TokenAfter(valTokenPtr); - if (TclGetIndexFromToken(idxTokenPtr, &idx, - TCL_INDEX_OUT_OF_RANGE, TCL_INDEX_OUT_OF_RANGE) == TCL_OK) { + if (TclGetIndexFromToken(idxTokenPtr, &idx, TCL_INDEX_BEFORE, + TCL_INDEX_BEFORE) == TCL_OK) { /* - * All checks have been completed, and we have exactly one of these - * constructs: - * lindex - * lindex end- - * This is best compiled as a push of the arbitrary value followed by - * an "immediate lindex" which is the most efficient variety. + * The idxTokenPtr parsed as a valid index value and was + * encoded as expected by INST_LIST_INDEX_IMM. + * + * NOTE: that we rely on indexing before a list producing the + * same result as indexing after a list. */ CompileWord(envPtr, valTokenPtr, interp, 1); @@ -1403,24 +1405,25 @@ TclCompileLrangeCmd( } listTokenPtr = TokenAfter(parsePtr->tokenPtr); - /* - * Parse the indices. Will only compile if both are constants and not an - * _integer_ less than zero (since we reserve negative indices here for - * end-relative indexing) or an end-based index greater than 'end' itself. - */ - tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, + TCL_INDEX_AFTER) != TCL_OK) { return TCL_ERROR; } + /* + * Token was an index value, and we treat all "first" indices + * before the list same as the start of the list. + */ tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, + TCL_INDEX_END) != TCL_OK) { return TCL_ERROR; } - if (idx1 == INT_MAX && idx2 == INT_MAX) { - idx2 = TCL_INDEX_OUT_OF_RANGE; - } + /* + * Token was an index value, and we treat all "last" indices + * after the list same as the end of the list. + */ /* * Issue instructions. It's not safe to skip doing the LIST_RANGE, as @@ -1470,7 +1473,16 @@ TclCompileLinsertCmd( */ tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx, 0, INT_MAX) != TCL_OK) { + + /* + * NOTE: This command treats all inserts at indices before the list + * the same as inserts at the start of the list, and all inserts + * after the list the same as inserts at the end of the list. We + * make that transformation here so we can use the optimized bytecode + * as much as possible. + */ + if (TclGetIndexFromToken(tokenPtr, &idx, TCL_INDEX_START, + TCL_INDEX_END) != TCL_OK) { return TCL_ERROR; } @@ -1494,10 +1506,10 @@ TclCompileLinsertCmd( } TclEmitInstInt4( INST_LIST, i-3, envPtr); - if (idx == 0 /*start*/) { + if (idx == TCL_INDEX_START) { TclEmitInstInt4( INST_REVERSE, 2, envPtr); TclEmitOpcode( INST_LIST_CONCAT, envPtr); - } else if (idx == TCL_INDEX_END /*end*/) { + } else if (idx == TCL_INDEX_END) { TclEmitOpcode( INST_LIST_CONCAT, envPtr); } else { /* @@ -1558,42 +1570,42 @@ TclCompileLreplaceCmd( } listTokenPtr = TokenAfter(parsePtr->tokenPtr); - /* - * Parse the indices. Will only compile if both are constants and not an - * _integer_ less than zero (since we reserve negative indices here for - * end-relative indexing) or an end-based index greater than 'end' itself. - */ - tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, + TCL_INDEX_AFTER) != TCL_OK) { return TCL_ERROR; } tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, -1, TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, + TCL_INDEX_END) != TCL_OK) { return TCL_ERROR; } /* - * idx1, idx2 are now in canonical form: - * - * - integer: [0,len+1] - * - end index: TCL_INDEX_END - * - -ive offset: TCL_INDEX_END-[len-1,0] + * idx1, idx2 are the conventional encoded forms of the tokens parsed + * as all forms of index values. Values of idx1 that come before the + * list are treated the same as if they were the start of the list. + * Values of idx2 that come after the list are treated the same as if + * they were the end of the list. */ + if (idx1 == TCL_INDEX_AFTER) { + /* + * [lreplace] treats idx1 value end+1 differently from end+2, etc. + * The operand encoding cannot distinguish them, so we must bail + * out to direct evaluation. + */ + return TCL_ERROR; + } + +/* TODO: ...... */ /* * Compilation fails when one index is end-based but the other isn't. * Fixing this will require more bytecodes, but this is a workaround for * now. [Bug 47ac84309b] */ - if (idx1 == INT_MAX) { - /* consider special handling for too large first index - * "list doesn't contain element ...", so still not compiled */ - return TCL_ERROR; - } - if ((idx1 <= TCL_INDEX_END) != (idx2 <= TCL_INDEX_END)) { /* @@ -3049,7 +3061,7 @@ TclCompileVariableCmd( return TCL_ERROR; } - /* TODO: Consider what value can pass throug the + /* TODO: Consider what value can pass through the * IndexTailVarIfKnown() screen. Full CompileWord() * likely does not apply here. Push known value instead. */ CompileWord(envPtr, varTokenPtr, interp, i); diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index bf8c482..d10d1c1 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -934,15 +934,22 @@ TclCompileStringRangeCmd( * Parse the two indices. */ - if (TclGetIndexFromToken(fromTokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { + if (TclGetIndexFromToken(fromTokenPtr, &idx1, TCL_INDEX_START, + TCL_INDEX_AFTER) != TCL_OK) { goto nonConstantIndices; } - if (TclGetIndexFromToken(toTokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { + /* + * Token parsed as an index expression. We treat all indices before + * the string the same as the start of the string. + */ + if (TclGetIndexFromToken(toTokenPtr, &idx2, TCL_INDEX_BEFORE, + TCL_INDEX_END) != TCL_OK) { goto nonConstantIndices; } - if (idx1 == INT_MAX && idx2 == INT_MAX) { - idx2 = TCL_INDEX_OUT_OF_RANGE; - } + /* + * Token parsed as an index expression. We treat all indices after + * the string the same as the end of the string. + */ /* * Push the operand onto the stack and then the substring operation. @@ -987,27 +994,27 @@ TclCompileStringReplaceCmd( replacementTokenPtr = TokenAfter(tokenPtr); } - /* - * Parse the indices. Will only compile special cases if both are - * constants and not an _integer_ less than zero (since we reserve - * negative indices here for end-relative indexing) or an end-based index - * greater than 'end' itself. - */ - tokenPtr = TokenAfter(valueTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, + TCL_INDEX_AFTER) != TCL_OK) { goto genericReplace; } + /* + * Token parsed as an index value. Indices before the string are + * treated as index of start of string. + */ tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) { - goto genericReplace; - } - if (idx1 == INT_MAX && idx2 == INT_MAX) { - /* avoid replacement of last char in large string (just don't compile). */ + if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, + TCL_INDEX_END) != TCL_OK) { goto genericReplace; } + /* + * Token parsed as an index value. Indices after the string are + * treated as index of end of string. + */ +/* TODO...... */ /* * We handle these replacements specially: first character (where * idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 2f23b90..9501d93 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1690,7 +1690,9 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, */ #define TCL_INDEX_END (-2) -#define TCL_INDEX_OUT_OF_RANGE (-1) +#define TCL_INDEX_BEFORE (-1) +#define TCL_INDEX_START (0) +#define TCL_INDEX_AFTER (INT_MAX) /* * DTrace probe macros (NOPs if DTrace support is not enabled). -- cgit v0.12 From d7f41b448c046d176a3b6a931a12d09d7e75b626 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 19:55:23 +0000 Subject: Express INST_LIST_INDEX_IMM index processinig in terms of TCL_INDEX_END so that consistencies are maintained, and hardcoded values are a bit demystified. --- generic/tclExecute.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index d345899..6bc5485 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5090,15 +5090,10 @@ TEBCresume( } /* - * Select the list item based on the index. Negative operand means - * end-based indexing (-2, ...), and -1 means out of range. + * Decode end-offset index values. */ - if (opnd < -1) { - index = opnd+1 + objc; - } else { - index = opnd; - } + index = opnd + (opnd <= TCL_INDEX_END)*(objc - 1 - TCL_INDEX_END); pcAdjustment = 5; lindexFastPath: -- cgit v0.12 From ce6aa3ea082e901341852037de6a866850f28351 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 21:02:44 +0000 Subject: When index parsing alone tells you a [string range] is empty, just push it. --- generic/tclCompCmdsSZ.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index d10d1c1..f98d375 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -930,6 +930,9 @@ TclCompileStringRangeCmd( fromTokenPtr = TokenAfter(stringTokenPtr); toTokenPtr = TokenAfter(fromTokenPtr); + /* Every path must push the string argument */ + CompileWord(envPtr, stringTokenPtr, interp, 1); + /* * Parse the two indices. */ @@ -942,6 +945,14 @@ TclCompileStringRangeCmd( * Token parsed as an index expression. We treat all indices before * the string the same as the start of the string. */ + + if (idx1 == TCL_INDEX_AFTER) { + /* [string range $s end+1 $last] must be empty string */ + OP( POP); + PUSH( ""); + return TCL_OK; + } + if (TclGetIndexFromToken(toTokenPtr, &idx2, TCL_INDEX_BEFORE, TCL_INDEX_END) != TCL_OK) { goto nonConstantIndices; @@ -950,12 +961,17 @@ TclCompileStringRangeCmd( * Token parsed as an index expression. We treat all indices after * the string the same as the end of the string. */ + if (idx2 == TCL_INDEX_BEFORE) { + /* [string range $s $first -1] must be empty string */ + OP( POP); + PUSH( ""); + return TCL_OK; + } /* * Push the operand onto the stack and then the substring operation. */ - CompileWord(envPtr, stringTokenPtr, interp, 1); OP44( STR_RANGE_IMM, idx1, idx2); return TCL_OK; @@ -964,7 +980,6 @@ TclCompileStringRangeCmd( */ nonConstantIndices: - CompileWord(envPtr, stringTokenPtr, interp, 1); CompileWord(envPtr, fromTokenPtr, interp, 2); CompileWord(envPtr, toTokenPtr, interp, 3); OP( STR_RANGE); -- cgit v0.12 From 30e7e05895de1b9534837717365babc905955f7a Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 7 Mar 2018 21:45:12 +0000 Subject: Streamline index decoding in INST_STR_RANGE_IMM execution. --- generic/tclExecute.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 6bc5485..2e3fcb9 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5640,33 +5640,44 @@ TEBCresume( length = Tcl_GetCharLength(valuePtr); TRACE(("\"%.20s\" %d %d => ", O2S(valuePtr), fromIdx, toIdx)); - /* - * Adjust indices for end-based indexing. - */ + /* Every range of an empty value is an empty value */ + if (length == 0) { + TRACE_APPEND(("\n")); + NEXT_INST_F(9, 0, 0); + } - if (fromIdx == -1) { - fromIdx = 0; - } else if (fromIdx < -1) { - fromIdx += 1 + length; - if (fromIdx < 0) { - fromIdx = 0; + /* Decode index operands. */ + + assert ( toIdx != TCL_INDEX_BEFORE ); + assert ( toIdx != TCL_INDEX_AFTER); + + if (toIdx <= TCL_INDEX_END) { + toIdx += (length - 1 - TCL_INDEX_END); + if (toIdx < 0) { + goto emptyRange; } - } else if (fromIdx >= length) { - fromIdx = length; - } - if (toIdx < -1) { - toIdx += 1 + length; } else if (toIdx >= length) { toIdx = length - 1; } - /* - * Check if we can do a sane substring. - */ + assert ( toIdx >= 0 && toIdx < length ); + + assert ( fromIdx != TCL_INDEX_BEFORE ); + assert ( fromIdx != TCL_INDEX_AFTER); + + if (fromIdx <= TCL_INDEX_END) { + fromIdx += (length - 1 - TCL_INDEX_END); + if (fromIdx < 0) { + fromIdx = 0; + } + } + + assert ( fromIdx >= 0 ); if (fromIdx <= toIdx) { objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx); } else { + emptyRange: TclNewObj(objResultPtr); } TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); -- cgit v0.12 From 101ac8bca755a22b853816ef11db876e71d0ee29 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 02:28:49 +0000 Subject: Reduce the "clever" factor. (Fine line between clever and stupid.) --- generic/tclExecute.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 2e3fcb9..a6042bb 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5076,8 +5076,8 @@ TEBCresume( */ valuePtr = OBJ_AT_TOS; - opnd = TclGetInt4AtPtr(pc+1); - TRACE(("\"%.30s\" %d => ", O2S(valuePtr), opnd)); + index = TclGetInt4AtPtr(pc+1); + TRACE(("\"%.30s\" %d => ", O2S(valuePtr), index)); /* * Get the contents of the list, making sure that it really is a list @@ -5093,7 +5093,9 @@ TEBCresume( * Decode end-offset index values. */ - index = opnd + (opnd <= TCL_INDEX_END)*(objc - 1 - TCL_INDEX_END); + if (index <= TCL_INDEX_END) { + index += (objc - 1 - TCL_INDEX_END); + } pcAdjustment = 5; lindexFastPath: -- cgit v0.12 From 64038c5656f9d288cb8a710c5b0b397244911b88 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 17:26:49 +0000 Subject: Rollback the stealth change to [lreplace a 1 1] in Tcl 8.6.6. [409ea17e37]. Scratch rewrite of the [lreplace] compiler. --- generic/tclCmdIL.c | 2 +- generic/tclCompCmdsGR.c | 280 +++++++++++++----------------------------------- tests/lreplace.test | 8 +- 3 files changed, 82 insertions(+), 208 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index e3c5f10..0716afe 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -2762,7 +2762,7 @@ Tcl_LreplaceObjCmd( * (to allow for replacing the last elem). */ - if ((first > listLen) && (listLen > 0)) { + if ((first >= listLen) && (listLen > 0)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "list doesn't contain element %s", TclGetString(objv[2]))); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LREPLACE", "BADIDX", diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index 501c7a4..de02ee7 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -1562,8 +1562,8 @@ TclCompileLreplaceCmd( { Tcl_Token *tokenPtr, *listTokenPtr; DefineLineInformation; /* TIP #280 */ - Tcl_Obj *tmpObj; int idx1, idx2, i, offset, offset2; + int emptyPrefix, suffixStart = 0; if (parsePtr->numWords < 4) { return TCL_ERROR; @@ -1599,243 +1599,117 @@ TclCompileLreplaceCmd( return TCL_ERROR; } -/* TODO: ...... */ /* - * Compilation fails when one index is end-based but the other isn't. - * Fixing this will require more bytecodes, but this is a workaround for - * now. [Bug 47ac84309b] - */ - - if ((idx1 <= TCL_INDEX_END) != (idx2 <= TCL_INDEX_END)) { - - /* - * NOTE: when idx1 == 0 and idx2 == TCL_INDEX_END, - * we bail out here! Yet, down below - */ - return TCL_ERROR; - } - - if (idx1 == -1) { - /* linsert before start or replace from start */ - idx1 = 0; - } - - if (idx2 != TCL_INDEX_END && idx2 >= 0 && idx2 < idx1) { - idx2 = idx1 - 1; - } - - /* - * Work out what this [lreplace] is actually doing. + * General structure of the [lreplace] result is + * prefix replacement suffix + * In a few cases we can predict various parts will be empty and + * take advantage. + * + * The proper suffix begins with the greater of indices idx1 or + * idx2 + 1. If we cannot tell at compile time which is greater, + * we must defer to direct evaluation. */ - tmpObj = NULL; - CompileWord(envPtr, listTokenPtr, interp, 1); - if (parsePtr->numWords == 4) { - if (idx1 == 0) { - if (idx2 == TCL_INDEX_END) { - - /* Here we are down below! Now look somewhere else! */ - goto dropAll; - } - idx1 = idx2 + 1; /* TODO: Overflow? */ - idx2 = TCL_INDEX_END; - goto dropEnd; - } else if (idx2 == TCL_INDEX_END) { - idx2 = idx1 - 1; - idx1 = 0; - goto dropEnd; - } else { - if (idx2 < idx1) { - idx2 = idx1 - 1; - } - if (idx1 > 0) { - tmpObj = Tcl_NewIntObj(idx1); - Tcl_IncrRefCount(tmpObj); - } - goto dropRange; - } - } - - tokenPtr = TokenAfter(tokenPtr); - for (i=4 ; inumWords ; i++) { - CompileWord(envPtr, tokenPtr, interp, i); - tokenPtr = TokenAfter(tokenPtr); - } - TclEmitInstInt4( INST_LIST, i - 4, envPtr); - TclEmitInstInt4( INST_REVERSE, 2, envPtr); - if (idx1 == 0) { - if (idx2 == TCL_INDEX_END) { - /* Another Can't Happen. */ - goto replaceAll; - } - idx1 = idx2 + 1; /* TODO: Overflow? */ - idx2 = TCL_INDEX_END; - goto replaceHead; + if (idx2 == TCL_INDEX_BEFORE) { + suffixStart = idx1; } else if (idx2 == TCL_INDEX_END) { - idx2 = idx1 - 1; - idx1 = 0; - goto replaceTail; + suffixStart = TCL_INDEX_AFTER; + } else if (((idx2 < TCL_INDEX_END) && (idx1 <= TCL_INDEX_END)) + || ((idx2 >= TCL_INDEX_START) && (idx1 >= TCL_INDEX_START))) { + suffixStart = (idx1 > idx2 + 1) ? idx1 : idx2 + 1; } else { - if (idx2 < idx1) { - idx2 = idx1 - 1; - } - if (idx1 > 0) { - tmpObj = Tcl_NewIntObj(idx1); - Tcl_IncrRefCount(tmpObj); - } - goto replaceRange; + return TCL_ERROR; } - /* - * Issue instructions to perform the operations relating to configurations - * that just drop. The only argument pushed on the stack is the list to - * operate on. - */ + /* All paths start with computing/pushing the original value. */ + CompileWord(envPtr, listTokenPtr, interp, 1); - dropAll: /* This just ensures the arg is a list. */ /* - * And now we're here down below the down below where flow can never go. - * CONCLUSION: This code has no purpose. + * [lreplace] raises an error when idx1 points after the list, but + * only when the list is not empty. This is maximum stupidity. + * + * TODO: TIP this nonsense away! */ -Tcl_Panic("Can not get here."); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); - TclEmitOpcode( INST_POP, envPtr); - PushStringLiteral(envPtr, ""); - goto done; - - dropEnd: - TclEmitInstInt4( INST_LIST_RANGE_IMM, idx1, envPtr); - TclEmitInt4( idx2, envPtr); - goto done; - - dropRange: - if (tmpObj != NULL) { - /* - * Emit bytecode to check the list length. - */ - + if (idx1 >= TCL_INDEX_START) { TclEmitOpcode( INST_DUP, envPtr); TclEmitOpcode( INST_LIST_LENGTH, envPtr); - TclEmitPush(TclAddLiteralObj(envPtr, tmpObj, NULL), envPtr); - TclEmitOpcode( INST_GE, envPtr); + TclEmitOpcode( INST_DUP, envPtr); offset = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_TRUE1, 0, envPtr); - - /* - * Emit an error if we've been given an empty list. - */ + TclEmitInstInt1( INST_JUMP_FALSE1, 0, envPtr); -/* If we're generating bytecode to report an error, we've gone wrong. - * Just fallback to direct invocation. - */ - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); + /* List is not empty */ + TclEmitPush(TclAddLiteralObj(envPtr, Tcl_NewIntObj(idx1), + NULL), envPtr); + TclEmitOpcode( INST_GT, envPtr); offset2 = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_FALSE1, 0, envPtr); + TclEmitInstInt1( INST_JUMP_TRUE1, 0, envPtr); + + /* Idx1 >= list length ===> raise an error */ TclEmitPush(TclAddLiteralObj(envPtr, Tcl_ObjPrintf( "list doesn't contain element %d", idx1), NULL), envPtr); CompileReturnInternal(envPtr, INST_RETURN_IMM, TCL_ERROR, 0, Tcl_ObjPrintf("-errorcode {TCL OPERATION LREPLACE BADIDX}")); TclStoreInt1AtPtr(CurrentOffset(envPtr) - offset, envPtr->codeStart + offset + 1); + TclEmitOpcode( INST_POP, envPtr); TclStoreInt1AtPtr(CurrentOffset(envPtr) - offset2, envPtr->codeStart + offset2 + 1); - TclAdjustStackDepth(-1, envPtr); } - TclEmitOpcode( INST_DUP, envPtr); - TclEmitInstInt4( INST_LIST_RANGE_IMM, 0, envPtr); - TclEmitInt4( idx1 - 1, envPtr); - TclEmitInstInt4( INST_REVERSE, 2, envPtr); - TclEmitInstInt4( INST_LIST_RANGE_IMM, idx2 + 1, envPtr); - TclEmitInt4( TCL_INDEX_END, envPtr); - TclEmitOpcode( INST_LIST_CONCAT, envPtr); - goto done; - - /* - * Issue instructions to perform the operations relating to configurations - * that do real replacement. All arguments are pushed and assembled into a - * pair: the list of values to replace with, and the list to do the - * surgery on. - */ - - replaceAll: -Tcl_Panic("Can not get here."); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); - TclEmitOpcode( INST_POP, envPtr); - goto done; - replaceHead: - TclEmitInstInt4( INST_LIST_RANGE_IMM, idx1, envPtr); - TclEmitInt4( idx2, envPtr); - TclEmitOpcode( INST_LIST_CONCAT, envPtr); - goto done; - - replaceTail: - TclEmitInstInt4( INST_LIST_RANGE_IMM, idx1, envPtr); - TclEmitInt4( idx2, envPtr); - TclEmitInstInt4( INST_REVERSE, 2, envPtr); - TclEmitOpcode( INST_LIST_CONCAT, envPtr); - goto done; - - replaceRange: - if (tmpObj != NULL) { + if ((idx1 == suffixStart) && (parsePtr->numWords == 4)) { /* - * Emit bytecode to check the list length. + * This is a "no-op". Example: [lreplace {a b c} 2 0] + * We still do a list operation to get list-verification + * and canonicalization side effects. */ + TclEmitInstInt4( INST_LIST_RANGE_IMM, 0, envPtr); + TclEmitInt4( TCL_INDEX_END, envPtr); + return TCL_OK; + } + emptyPrefix = (idx1 == TCL_INDEX_START); + if (!emptyPrefix) { + /* Prefix may not be empty; generate bytecode to push it */ TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); - - /* - * Check the list length vs idx1. - */ - - TclEmitPush(TclAddLiteralObj(envPtr, tmpObj, NULL), envPtr); - TclEmitOpcode( INST_GE, envPtr); - offset = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_TRUE1, 0, envPtr); + TclEmitInstInt4( INST_LIST_RANGE_IMM, 0, envPtr); + TclEmitInt4( idx1 - 1, envPtr); + } - /* - * Emit an error if we've been given an empty list. - */ + if (parsePtr->numWords > 4) { + /* Push the replacement arguments */ + tokenPtr = TokenAfter(tokenPtr); + for (i=4 ; inumWords ; i++) { + CompileWord(envPtr, tokenPtr, interp, i); + tokenPtr = TokenAfter(tokenPtr); + } -/* If we're generating bytecode to report an error, we've gone wrong. - * Just fallback to direct invocation. - */ - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); - offset2 = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_FALSE1, 0, envPtr); - TclEmitPush(TclAddLiteralObj(envPtr, Tcl_ObjPrintf( - "list doesn't contain element %d", idx1), NULL), envPtr); - CompileReturnInternal(envPtr, INST_RETURN_IMM, TCL_ERROR, 0, - Tcl_ObjPrintf("-errorcode {TCL OPERATION LREPLACE BADIDX}")); - TclStoreInt1AtPtr(CurrentOffset(envPtr) - offset, - envPtr->codeStart + offset + 1); - TclStoreInt1AtPtr(CurrentOffset(envPtr) - offset2, - envPtr->codeStart + offset2 + 1); - TclAdjustStackDepth(-1, envPtr); + /* Make a list of them... */ + TclEmitInstInt4( INST_LIST, i - 4, envPtr); + if (!emptyPrefix) { + /* ...and join to the prefix, if any. */ + TclEmitOpcode( INST_LIST_CONCAT, envPtr); + } + emptyPrefix = 0; } - TclEmitOpcode( INST_DUP, envPtr); - TclEmitInstInt4( INST_LIST_RANGE_IMM, 0, envPtr); - TclEmitInt4( idx1 - 1, envPtr); - TclEmitInstInt4( INST_REVERSE, 2, envPtr); - TclEmitInstInt4( INST_LIST_RANGE_IMM, idx2 + 1, envPtr); - TclEmitInt4( TCL_INDEX_END, envPtr); - TclEmitInstInt4( INST_REVERSE, 3, envPtr); - TclEmitOpcode( INST_LIST_CONCAT, envPtr); - TclEmitInstInt4( INST_REVERSE, 2, envPtr); - TclEmitOpcode( INST_LIST_CONCAT, envPtr); - goto done; - /* - * Clean up the allocated memory. - */ + if (!emptyPrefix) { + TclEmitInstInt4( INST_REVERSE, 2, envPtr); + } - done: - if (tmpObj != NULL) { - Tcl_DecrRefCount(tmpObj); + if (suffixStart == TCL_INDEX_AFTER) { + TclEmitOpcode( INST_POP, envPtr); + if (emptyPrefix) { + PushStringLiteral(envPtr, ""); + } + } else { + /* Suffix may not be empty; generate bytecode to push it */ + TclEmitInstInt4( INST_LIST_RANGE_IMM, suffixStart, envPtr); + TclEmitInt4( TCL_INDEX_END, envPtr); + if (!emptyPrefix) { + TclEmitOpcode( INST_LIST_CONCAT, envPtr); + } } + return TCL_OK; } diff --git a/tests/lreplace.test b/tests/lreplace.test index d7f8226..392e84d 100644 --- a/tests/lreplace.test +++ b/tests/lreplace.test @@ -98,12 +98,12 @@ test lreplace-1.26 {lreplace command} { [set foo [lreplace $foo end end]] \ [set foo [lreplace $foo end end]] } {a {} {}} -test lreplace-1.27 {lreplace command} { +test lreplace-1.27 {lreplace command} -body { lreplace x 1 1 -} x -test lreplace-1.28 {lreplace command} { +} -returnCodes 1 -result {list doesn't contain element 1} +test lreplace-1.28 {lreplace command} -body { lreplace x 1 1 y -} {x y} +} -returnCodes 1 -result {list doesn't contain element 1} test lreplace-2.1 {lreplace errors} { list [catch lreplace msg] $msg -- cgit v0.12 From 0c5bd7480ec59acdb0b66d61e8efbec87ff38f0a Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 18:57:16 +0000 Subject: Streamline index decoding in INST_LIST_RANGE_IMM execution. Eliminte the internals intrusion in the unshared in-place operations case. Tcl_ListObjReplace() really ought to be good enough. --- generic/tclExecute.c | 72 ++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index a6042bb..7043179 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5243,60 +5243,50 @@ TEBCresume( } #endif - /* - * Adjust the indices for end-based handling. - */ + /* Decode index value operands. */ - if (fromIdx < -1) { - fromIdx += 1+objc; - if (fromIdx < -1) { - fromIdx = -1; - } - } else if (fromIdx > objc) { - fromIdx = objc; + assert ( toIdx != TCL_INDEX_AFTER); + + if ((toIdx == TCL_INDEX_BEFORE) || (fromIdx == TCL_INDEX_AFTER)) { + goto emptyList; } - if (toIdx < -1) { - toIdx += 1 + objc; - if (toIdx < -1) { - toIdx = -1; + if (toIdx <= TCL_INDEX_END) { + toIdx += (objc - 1 - TCL_INDEX_END); + if (toIdx < 0) { + goto emptyList; } - } else if (toIdx > objc) { - toIdx = objc; + } else if (toIdx >= objc) { + toIdx = objc - 1; } - /* - * Check if we are referring to a valid, non-empty list range, and if - * so, build the list of elements in that range. - */ + assert ( toIdx >= 0 && toIdx < objc); + assert ( fromIdx != TCL_INDEX_BEFORE ); + assert ( fromIdx != TCL_INDEX_AFTER); - if (fromIdx<=toIdx && fromIdx=0) { + if (fromIdx <= TCL_INDEX_END) { + fromIdx += (objc - 1 - TCL_INDEX_END); if (fromIdx < 0) { fromIdx = 0; } - if (toIdx >= objc) { - toIdx = objc-1; - } - if (fromIdx == 0 && toIdx != objc-1 && !Tcl_IsShared(valuePtr)) { - /* - * BEWARE! This is looking inside the implementation of the - * list type. - */ - - List *listPtr = valuePtr->internalRep.twoPtrValue.ptr1; + } + assert ( fromIdx >= 0 ); - if (listPtr->refCount == 1) { - for (index=toIdx+1; indexelemCount = toIdx+1; - listPtr->canonicalFlag = 1; - TclInvalidateStringRep(valuePtr); - TRACE_APPEND(("%.30s\n", O2S(valuePtr))); - NEXT_INST_F(9, 0, 0); + if (fromIdx <= toIdx) { + /* Construct the subsquence list */ + /* unshared optimization */ + if (Tcl_IsShared(valuePtr)) { + objResultPtr = Tcl_NewListObj(toIdx-fromIdx+1, objv+fromIdx); + } else { + if (toIdx != objc - 1) { + Tcl_ListObjReplace(NULL, valuePtr, toIdx + 1, + objc - 1 - toIdx, 0, NULL); } + Tcl_ListObjReplace(NULL, valuePtr, 0, fromIdx, 0, NULL); + TRACE_APPEND(("%.30s\n", O2S(valuePtr))); + NEXT_INST_F(9, 0, 0); } - objResultPtr = Tcl_NewListObj(toIdx-fromIdx+1, objv+fromIdx); } else { + emptyList: TclNewObj(objResultPtr); } -- cgit v0.12 From b8b87e52802d476aa5e7d6796ff233f89cf0156b Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 19:28:58 +0000 Subject: New test expose flaw in error ordering. --- tests/lreplace.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/lreplace.test b/tests/lreplace.test index 392e84d..7377869 100644 --- a/tests/lreplace.test +++ b/tests/lreplace.test @@ -104,6 +104,9 @@ test lreplace-1.27 {lreplace command} -body { test lreplace-1.28 {lreplace command} -body { lreplace x 1 1 y } -returnCodes 1 -result {list doesn't contain element 1} +test lreplace-1.29 {lreplace command} -body { + lreplace x 1 1 [error foo] +} -returnCodes 1 -result {foo} test lreplace-2.1 {lreplace errors} { list [catch lreplace msg] $msg -- cgit v0.12 From ed53b715acb5d9a060c898f5ad4cfa79d6c9db1d Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 19:43:57 +0000 Subject: Another error ordering test. --- tests/lreplace.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/lreplace.test b/tests/lreplace.test index 7377869..4a6b853 100644 --- a/tests/lreplace.test +++ b/tests/lreplace.test @@ -107,6 +107,9 @@ test lreplace-1.28 {lreplace command} -body { test lreplace-1.29 {lreplace command} -body { lreplace x 1 1 [error foo] } -returnCodes 1 -result {foo} +test lreplace-1.30 {lreplace command} -body { + lreplace {not {}alist} 0 0 [error foo] +} -returnCodes 1 -result {foo} test lreplace-2.1 {lreplace errors} { list [catch lreplace msg] $msg -- cgit v0.12 From 947fb98db5ca258e0f170c51a9392816a6f48a8f Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 8 Mar 2018 21:58:30 +0000 Subject: Stop failing error ordering tests in compiled [lreplace]. --- generic/tclCompCmdsGR.c | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index de02ee7..e2ddb11 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -1563,7 +1563,7 @@ TclCompileLreplaceCmd( Tcl_Token *tokenPtr, *listTokenPtr; DefineLineInformation; /* TIP #280 */ int idx1, idx2, i, offset, offset2; - int emptyPrefix, suffixStart = 0; + int emptyPrefix=1, suffixStart = 0; if (parsePtr->numWords < 4) { return TCL_ERROR; @@ -1625,13 +1625,35 @@ TclCompileLreplaceCmd( CompileWord(envPtr, listTokenPtr, interp, 1); /* + * Push all the replacement values next so any errors raised in + * creating them get raised first. + */ + if (parsePtr->numWords > 4) { + /* Push the replacement arguments */ + tokenPtr = TokenAfter(tokenPtr); + for (i=4 ; inumWords ; i++) { + CompileWord(envPtr, tokenPtr, interp, i); + tokenPtr = TokenAfter(tokenPtr); + } + + /* Make a list of them... */ + TclEmitInstInt4( INST_LIST, i - 4, envPtr); + + emptyPrefix = 0; + } + + /* * [lreplace] raises an error when idx1 points after the list, but * only when the list is not empty. This is maximum stupidity. * * TODO: TIP this nonsense away! */ if (idx1 >= TCL_INDEX_START) { - TclEmitOpcode( INST_DUP, envPtr); + if (emptyPrefix) { + TclEmitOpcode( INST_DUP, envPtr); + } else { + TclEmitInstInt4( INST_OVER, 1, envPtr); + } TclEmitOpcode( INST_LIST_LENGTH, envPtr); TclEmitOpcode( INST_DUP, envPtr); offset = CurrentOffset(envPtr); @@ -1667,26 +1689,17 @@ TclCompileLreplaceCmd( return TCL_OK; } - emptyPrefix = (idx1 == TCL_INDEX_START); - if (!emptyPrefix) { + if (idx1 != TCL_INDEX_START) { /* Prefix may not be empty; generate bytecode to push it */ - TclEmitOpcode( INST_DUP, envPtr); + if (emptyPrefix) { + TclEmitOpcode( INST_DUP, envPtr); + } else { + TclEmitInstInt4( INST_OVER, 1, envPtr); + } TclEmitInstInt4( INST_LIST_RANGE_IMM, 0, envPtr); TclEmitInt4( idx1 - 1, envPtr); - } - - if (parsePtr->numWords > 4) { - /* Push the replacement arguments */ - tokenPtr = TokenAfter(tokenPtr); - for (i=4 ; inumWords ; i++) { - CompileWord(envPtr, tokenPtr, interp, i); - tokenPtr = TokenAfter(tokenPtr); - } - - /* Make a list of them... */ - TclEmitInstInt4( INST_LIST, i - 4, envPtr); if (!emptyPrefix) { - /* ...and join to the prefix, if any. */ + TclEmitInstInt4( INST_REVERSE, 2, envPtr); TclEmitOpcode( INST_LIST_CONCAT, envPtr); } emptyPrefix = 0; -- cgit v0.12 From 88812125b1c2634264238a902c01c4ed85c9900c Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 15:28:35 +0000 Subject: New tests demonstrating index value encoding flaws in [lsearch]. --- tests/lsearch.test | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tests/lsearch.test b/tests/lsearch.test index f36e987..8d03e96 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -418,6 +418,31 @@ test lsearch-17.6 {lsearch -index option, basic functionality} { test lsearch-17.7 {lsearch -index option, basic functionality} { lsearch -all -index 1 -regexp {{ab cb} {ab bb} {ab ab}} {[cb]b} } {0 1} +test lsearch-17.8 {lsearch -index option, empty argument} { + lsearch -index {} a a +} 0 +test lsearch-17.9 {lsearch -index option, empty argument} { + lsearch -index {} a a +} [lsearch a a] +test lsearch-17.10 {lsearch -index option, empty argument} { + lsearch -index {} [list \{] \{ +} 0 +test lsearch-17.11 {lsearch -index option, empty argument} { + lsearch -index {} [list \{] \{ +} [lsearch [list \{] \{] +test lsearch-17.12 {lsearch -index option, encoding aliasing} -body { + lsearch -index -2 a a +} -returnCodes error -result {index "-2" cannot select an element from any list} +test lsearch-17.13 {lsearch -index option, encoding aliasing} -body { + lsearch -index -1-1 a a +} -returnCodes error -result {index "-1-1" cannot select an element from any list} +test lsearch-17.14 {lsearch -index option, encoding aliasing} -body { + lsearch -index end--1 a a +} -returnCodes error -result {index "end--1" cannot select an element from any list} +test lsearch-17.15 {lsearch -index option, encoding aliasing} -body { + lsearch -index end+1 a a +} -returnCodes error -result {index "end+1" cannot select an element from any list} + test lsearch-18.1 {lsearch -index option, list as index basic functionality} { lsearch -index {0 0} {{{x x} {x b} {a d}} {{a c} {a b} {a a}}} a @@ -435,21 +460,27 @@ test lsearch-18.5 {lsearch -index option, list as index basic functionality} { lsearch -all -index {0 0} -exact {{{a c} {a b} {d a}} {{a c} {a b} {d a}}} a } {0 1} -test lsearch-19.1 {lsearch -sunindices option} { +test lsearch-19.1 {lsearch -subindices option} { lsearch -subindices -index {0 0} {{{x x} {x b} {a d}} {{a c} {a b} {a a}}} a } {1 0 0} -test lsearch-19.2 {lsearch -sunindices option} { +test lsearch-19.2 {lsearch -subindices option} { lsearch -subindices -index {2 0} -exact {{{x x} {x b} {a d}} {{a c} {a b} {a a}}} a } {0 2 0} -test lsearch-19.3 {lsearch -sunindices option} { +test lsearch-19.3 {lsearch -subindices option} { lsearch -subindices -index {1 1} -glob {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} b* } {0 1 1} -test lsearch-19.4 {lsearch -sunindices option} { +test lsearch-19.4 {lsearch -subindices option} { lsearch -subindices -index {0 1} -regexp {{{ab cb} {ab bb} {ab ab}} {{ab cb} {ab bb} {ab ab}}} {[cb]b} } {0 0 1} -test lsearch-19.5 {lsearch -sunindices option} { +test lsearch-19.5 {lsearch -subindices option} { lsearch -subindices -all -index {0 0} -exact {{{a c} {a b} {d a}} {{a c} {a b} {d a}}} a } {{0 0 0} {1 0 0}} +test lsearch-19.6 {lsearch -subindices option} { + lsearch -subindices -index end {{1 a}} a +} {0 1} +test lsearch-19.7 {lsearch -subindices option} { + lsearch -subindices -all -index end {{1 a}} a +} {{0 1}} test lsearch-20.1 {lsearch -index option, index larger than sublists} -body { lsearch -index 2 {{a c} {a b} {a a}} a -- cgit v0.12 From 5fd318bc35bbaa27e006360af375ca2c078b763a Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 17:30:30 +0000 Subject: More demonstration tests of index value encoding flaws. --- generic/tclCmdIL.c | 3 +++ tests/cmdIL.test | 25 +++++++++++++++++++++++-- tests/lsearch.test | 3 +++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 0716afe..afb5b62 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -3933,6 +3933,9 @@ Tcl_LsortObjCmd( /* * Do not shrink the actual memory block used; that doesn't * work with TclStackAlloc-allocated memory. [Bug 2918962] + * + * TODO: Consider a pointer increment to replace this + * array shift. */ for (i = 0; i < sortInfo.indexc; i++) { diff --git a/tests/cmdIL.test b/tests/cmdIL.test index 7636adc..c83bd98 100644 --- a/tests/cmdIL.test +++ b/tests/cmdIL.test @@ -147,6 +147,12 @@ test cmdIL-1.36 {lsort -stride and -index: Bug 2918962} { {{b i g} 12345} {{d e m o} 34512} } } {{{b i g} 12345} {{d e m o} 34512} {{c o d e} 54321} {{b l a h} 94729}} +test cmdIL-1.37 {lsort -stride and -index} -body { + lsort -stride 2 -index -2 {a 2 b 1} +} -returnCodes error -result {index "-2" cannot select an element from any list} +test cmdIL-1.38 {lsort -stride and-index} -body { + lsort -stride 2 -index -1-1 {a 2 b 1} +} -returnCodes error -result {index "-1-1" cannot select an element from any list} # Can't think of any good tests for the MergeSort and MergeLists procedures, # except a bunch of random lists to sort. @@ -208,13 +214,28 @@ test cmdIL-3.5.1 {SortCompare procedure, -index option (out of range, calculated } -returnCodes error -result {element 4 missing from sublist "1 . c"} test cmdIL-3.5.2 {SortCompare procedure, -index option (out of range, calculated index)} -body { lsort -index -1-1 {{1 . c} {2 . b} {3 . a}} -} -returnCodes error -result {element -2 missing from sublist "1 . c"} +} -returnCodes error -result {index "-1-1" cannot select an element from any list} test cmdIL-3.5.3 {SortCompare procedure, -index option (out of range, calculated index)} -body { lsort -index -2 {{1 . c} {2 . b} {3 . a}} -} -returnCodes error -result {element -2 missing from sublist "1 . c"} +} -returnCodes error -result {index "-2" cannot select an element from any list} test cmdIL-3.5.4 {SortCompare procedure, -index option (out of range, calculated index)} -body { lsort -index end-4 {{1 . c} {2 . b} {3 . a}} } -returnCodes error -result {element -2 missing from sublist "1 . c"} +test cmdIL-3.5.5 {SortCompare procedure, -index option} { + lsort -index {} {a b} +} {a b} +test cmdIL-3.5.6 {SortCompare procedure, -index option} { + lsort -index {} [list a \{] +} {a \{} +test cmdIL-3.5.7 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index end--1 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {index "end--1" cannot select an element from any list} +test cmdIL-3.5.8 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index end+1 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {index "end+1" cannot select an element from any list} +test cmdIL-3.5.9 {SortCompare procedure, -index option (out of range, calculated index)} -body { + lsort -index end+2 {{1 . c} {2 . b} {3 . a}} +} -returnCodes error -result {index "end+2" cannot select an element from any list} test cmdIL-3.6 {SortCompare procedure, -index option} { lsort -integer -index 2 {{1 15 30} {2 5 25} {3 25 20}} } {{3 25 20} {2 5 25} {1 15 30}} diff --git a/tests/lsearch.test b/tests/lsearch.test index 8d03e96..d7f9414 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -442,6 +442,9 @@ test lsearch-17.14 {lsearch -index option, encoding aliasing} -body { test lsearch-17.15 {lsearch -index option, encoding aliasing} -body { lsearch -index end+1 a a } -returnCodes error -result {index "end+1" cannot select an element from any list} +test lsearch-17.16 {lsearch -index option, encoding aliasing} -body { + lsearch -index end+2 a a +} -returnCodes error -result {index "end+2" cannot select an element from any list} test lsearch-18.1 {lsearch -index option, list as index basic functionality} { -- cgit v0.12 From 6798b2404f883074422c40ca7d196ad174a2d1ef Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 19:23:42 +0000 Subject: Refactor the index value encode/decode machinery for broader use. Make use of it to fix index value flaws in [lsearch]. --- generic/tclCmdIL.c | 43 +++++++++------- generic/tclCompile.h | 9 ---- generic/tclInt.h | 15 ++++++ generic/tclUtil.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 27 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index afb5b62..7a13b71 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -64,8 +64,9 @@ typedef struct SortInfo { * SORTMODE_COMMAND. Pre-initialized to hold * base of command. */ int *indexv; /* If the -index option was specified, this - * holds the indexes contained in the list - * supplied as an argument to that option. + * holds an encoding of the indexes contained + * in the list supplied as an argument to + * that option. * NULL if no indexes supplied, and points to * singleIndex field when only one * supplied. */ @@ -2913,7 +2914,7 @@ Tcl_LsearchObjCmd( Tcl_Obj *const objv[]) /* Argument values. */ { const char *bytes, *patternBytes; - int i, match, index, result, listc, length, elemLen, bisect; + int i, match, index, result=TCL_OK, listc, length, elemLen, bisect; int dataType, isIncreasing, lower, upper, offset; Tcl_WideInt patWide, objWide; int allMatches, inlineReturn, negatedMatch, returnSubindices, noCase; @@ -3113,13 +3114,26 @@ Tcl_LsearchObjCmd( */ for (j=0 ; jresultCode = TCL_ERROR; return NULL; } - index = infoPtr->indexv[i]; - /* - * Adjust for end-based indexing. - */ - - if (index < SORTIDX_NONE) { - index += listLen + 1; - } + index = TclIndexDecode(infoPtr->indexv[i], listLen - 1); if (Tcl_ListObjIndex(infoPtr->interp, objPtr, index, ¤tObj) != TCL_OK) { diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 9501d93..d842fdd 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1686,15 +1686,6 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData, #define TCL_NO_ELEMENT 2 /* Do not push the array element. */ /* - * Special value used by TclGetIndexFromToken to encoding the "end" index. - */ - -#define TCL_INDEX_END (-2) -#define TCL_INDEX_BEFORE (-1) -#define TCL_INDEX_START (0) -#define TCL_INDEX_AFTER (INT_MAX) - -/* * DTrace probe macros (NOPs if DTrace support is not enabled). */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 1b1b078..3821e42 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4013,6 +4013,21 @@ MODULE_SCOPE unsigned TclHashObjKey(Tcl_HashTable *tablePtr, void *keyPtr); MODULE_SCOPE int TclFullFinalizationRequested(void); /* + * Utility routines for encoding index values as integers. Used by both + * some of the command compilers and by [lsort] and [lsearch]. + */ + +MODULE_SCOPE int TclIndexEncode(Tcl_Interp *interp, Tcl_Obj *objPtr, + int before, int after, int *indexPtr); +MODULE_SCOPE int TclIndexDecode(int encoded, int endValue); + +/* Constants used in index value encoding routines. */ +#define TCL_INDEX_END (-2) +#define TCL_INDEX_BEFORE (-1) +#define TCL_INDEX_START (0) +#define TCL_INDEX_AFTER (INT_MAX) + +/* *---------------------------------------------------------------- * Macros used by the Tcl core to create and release Tcl objects. * TclNewObj(objPtr) creates a new object denoting an empty string. diff --git a/generic/tclUtil.c b/generic/tclUtil.c index beeaae1..f6a92fc 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -3813,6 +3813,143 @@ SetEndOffsetFromAny( /* *---------------------------------------------------------------------- * + * TclIndexEncode -- + * + * Parse objPtr to determine if it is an index value. Two cases + * are possible. The value objPtr might be parsed as an absolute + * index value in the C signed int range. Note that this includes + * index values that are integers as presented and it includes index + * arithmetic expressions. The absolute index values that can be + * directly meaningful as an index into either a list or a string are + * those integer values >= TCL_INDEX_START (0) + * and < TCL_INDEX_AFTER (INT_MAX). + * The largest string supported in Tcl 8 has bytelength INT_MAX. + * This means the largest supported character length is also INT_MAX, + * and the index of the last character in a string of length INT_MAX + * is INT_MAX-1. + * + * Any absolute index value parsed outside that range is encoded + * using the before and after values passed in by the + * caller as the encoding to use for indices that are either + * less than or greater than the usable index range. TCL_INDEX_AFTER + * is available as a good choice for most callers to use for + * after. Likewise, the value TCL_INDEX_BEFORE is good for + * most callers to use for before. Other values are possible + * when the caller knows it is helpful in producing its own behavior + * for indices before and after the indexed item. + * + * A token can also be parsed as an end-relative index expression. + * All end-relative expressions that indicate an index larger + * than end (end+2, end--5) point beyond the end of the indexed + * collection, and can be encoded as after. The end-relative + * expressions that indicate an index less than or equal to end + * are encoded relative to the value TCL_INDEX_END (-2). The + * index "end" is encoded as -2, down to the index "end-0x7ffffffe" + * which is encoded as INT_MIN. Since the largest index into a + * string possible in Tcl 8 is 0x7ffffffe, the interpretation of + * "end-0x7ffffffe" for that largest string would be 0. Thus, + * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed, + * they can be encoded with the before value. + * + * These details will require re-examination whenever string and + * list length limits are increased, but that will likely also + * mean a revised routine capable of returning Tcl_WideInt values. + * + * Returns: + * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. + * + * Side effects: + * When TCL_OK is returned, the encoded index value is written + * to *indexPtr. + * + *---------------------------------------------------------------------- + */ + +int +TclIndexEncode( + Tcl_Interp *interp, /* For error reporting, may be NULL */ + Tcl_Obj *objPtr, /* Index value to parse */ + int before, /* Value to return for index before beginning */ + int after, /* Value to return for index after end */ + int *indexPtr) /* Where to write the encoded answer, not NULL */ +{ + int idx; + + if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &idx)) { + /* We parsed a value in the range INT_MIN...INT_MAX */ + integerEncode: + if (idx < TCL_INDEX_START) { + /* All negative absolute indices are "before the beginning" */ + idx = before; + } else if (idx == INT_MAX) { + /* This index value is always "after the end" */ + idx = after; + } + /* usual case, the absolute index value encodes itself */ + } else if (TCL_OK == TclGetEndOffsetFromObj(NULL, objPtr, 0, &idx)) { + /* + * We parsed an end+offset index value. + * idx holds the offset value in the range INT_MIN...INT_MAX. + */ + if (idx > 0) { + /* + * All end+postive or end-negative expressions + * always indicate "after the end". + */ + idx = after; + } else if (idx < INT_MIN - TCL_INDEX_END) { + /* These indices always indicate "before the beginning */ + idx = before; + } else { + /* Encoded end-positive (or end+negative) are offset */ + idx += TCL_INDEX_END; + } + + /* TODO: Consider flag to suppress repeated end-offset parse. */ + } else if (TCL_OK == TclGetIntForIndexM(interp, objPtr, 0, &idx)) { + /* + * Only reach this case when the index value is a + * constant index arithmetic expression, and idx + * holds the result. Treat it the same as if it were + * parsed as an absolute integer value. + */ + goto integerEncode; + } else { + return TCL_ERROR; + } + *indexPtr = idx; + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * TclIndexDecode -- + * + * Decodes a value previously encoded by TclIndexEncode. The argument + * endValue indicates what value of "end" should be used in the + * decoding. + * + * Results: + * The decoded index value. + * + *---------------------------------------------------------------------- + */ + +int +TclIndexDecode( + int encoded, /* Value to decode */ + int endValue) /* Meaning of "end" to use, > TCL_INDEX_END */ +{ + if (encoded <= TCL_INDEX_END) { + return (encoded - TCL_INDEX_END) + endValue; + } + return encoded; +} + +/* + *---------------------------------------------------------------------- + * * TclCheckBadOctal -- * * This function checks for a bad octal value and appends a meaningful -- cgit v0.12 From a9d546dfbc3ec1d105fe2a2a06fb202295e5301f Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 19:40:01 +0000 Subject: Use new machinery to repair index value flaws in [lsort]. --- generic/tclCmdIL.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 7a13b71..3b2cb19 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -94,14 +94,6 @@ typedef struct SortInfo { #define SORTMODE_ASCII_NC 8 /* - * Magic values for the index field of the SortInfo structure. Note that the - * index "end-1" will be translated to SORTIDX_END-1, etc. - */ - -#define SORTIDX_NONE -1 /* Not indexed; use whole value. */ -#define SORTIDX_END -2 /* Indexed from end. */ - -/* * Forward declarations for procedures defined in this file: */ @@ -3746,7 +3738,7 @@ Tcl_LsortObjCmd( sortInfo.isIncreasing = 1; break; case LSORT_INDEX: { - int indexc, dummy; + int indexc; Tcl_Obj **indexv; if (i == objc-2) { @@ -3772,8 +3764,20 @@ Tcl_LsortObjCmd( */ for (j=0 ; j= groupSize) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "when used with \"-stride\", the leading \"-index\"" -- cgit v0.12 From c13e34da3740d014ade6f5e1dadcae12e18e27ed Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 20:34:33 +0000 Subject: Update the command compilers and bytecode execution engine to use new machinery. --- generic/tclAssembly.c | 4 +- generic/tclCompCmdsGR.c | 128 +++++++----------------------------------------- generic/tclCompCmdsSZ.c | 16 +++--- generic/tclCompile.h | 4 +- generic/tclExecute.c | 39 +++++---------- 5 files changed, 44 insertions(+), 147 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index a3fac8f..608459e 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -2263,8 +2263,8 @@ GetListIndexOperand( * same result as indexing after it, and might be more easily portable * when list size limits grow. */ - status = TclGetIndexFromToken(tokenPtr, result, TCL_INDEX_BEFORE, - TCL_INDEX_BEFORE); + status = TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, + TCL_INDEX_BEFORE, result); *tokenPtrPtr = TokenAfter(tokenPtr); if (status == TCL_ERROR && interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%.*s\"", diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c index e2ddb11..396947c 100644 --- a/generic/tclCompCmdsGR.c +++ b/generic/tclCompCmdsGR.c @@ -35,45 +35,7 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, * TclGetIndexFromToken -- * * Parse a token to determine if an index value is known at - * compile time. Two cases are possible. The compile time value - * of the token might be parsed as an absolute index value - * in the C signed int range. Note that this includes index - * values that are integers as presented as well as index - * arithmetic expressions that can be fully computed at compile - * time. The absolute index values that can be directly meaningful - * as an index into either a list or a string are those integer - * values >= TCL_INDEX_START (0) and < TCL_INDEX_AFTER (INT_MAX). - * The largest string supported in Tcl 8 has bytelength INT_MAX. - * This means the largest character supported length is also INT_MAX, - * and the index of the last character in a string of length INT_MAX - * is INT_MAX-1. - * - * Any absolute index value parsed outside that range is encoded - * using the minBoundary and maxBounday values passed in by the - * caller as the encoding to use for indices that are either - * less than or greater than the usable index range. TCL_INDEX_AFTER - * is available as a good choice for most callers to use for - * maxBoundary. Likewise, the value TCL_INDEX_BEFORE is good for - * most callers to use for minBoundary. Other values are possible - * when the caller knows it is helpful in producing its own behavior - * for indices before and after the indexed item. - * - * A token can also be parsed as an end-relative index expression. - * All end-relative expressions that indicate an index larger - * than end (end+2, end--5) point beyond the end of the indexed - * collection, and can be encoded as maxBoundary. The end-relative - * expressions that indicate an index less than or equal to end - * are encoded relative to the value TCL_INDEX_END (-2). The - * index "end" is encoded as -2, down to the index "end-0x7ffffffe" - * which is encoded as INT_MIN. Since the largest index into a - * string possible in Tcl 8 is 0x7ffffffe, the interpretation of - * "end-0x7ffffffe" for that largest string would be 0. Thus, - * if the tokens "end-0x7fffffff" or "end+-0x80000000" are parsed, - * they can be encoded with the minBoundary value. - * - * These details will require re-examination whenever string and - * list length limits are increased, but that will likely also - * mean a revised routine capable of returning Tcl_WideInt values. + * compile time. * * Returns: * TCL_OK if parsing succeeded, and TCL_ERROR if it failed. @@ -88,69 +50,17 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp, int TclGetIndexFromToken( Tcl_Token *tokenPtr, - int *index, - int minBoundary, - int maxBoundary) + int before, + int after, + int *indexPtr) { Tcl_Obj *tmpObj = Tcl_NewObj(); - int result, idx; + int result = TCL_ERROR; - if (!TclWordKnownAtCompileTime(tokenPtr, tmpObj)) { - Tcl_DecrRefCount(tmpObj); - return TCL_ERROR; - } - - result = TclGetIntFromObj(NULL, tmpObj, &idx); - if (result == TCL_OK) { - /* We parsed a value in the range INT_MIN...INT_MAX */ - integerEncode: - if (idx < TCL_INDEX_START) { - /* All negative absolute indices are "before the beginning" */ - idx = minBoundary; - } else if (idx == INT_MAX) { - /* This index value is always "after the end" */ - idx = maxBoundary; - } - /* usual case, the absolute index value encodes itself */ - } else { - result = TclGetEndOffsetFromObj(NULL, tmpObj, 0, &idx); - if (result == TCL_OK) { - /* - * We parsed an end+offset index value. - * idx holds the offset value in the range INT_MIN...INT_MAX. - */ - if (idx > 0) { - /* - * All end+postive or end-negative expressions - * always indicate "after the end". - */ - idx = maxBoundary; - } else if (idx < INT_MIN - TCL_INDEX_END) { - /* These indices always indicate "before the beginning */ - idx = minBoundary; - } else { - /* Encoded end-positive (or end+negative) are offset */ - idx += TCL_INDEX_END; - } - } else { - result = TclGetIntForIndexM(NULL, tmpObj, 0, &idx); - if (result == TCL_OK) { - /* - * Only reach this case when the index value is a - * constant index arithmetic expression, and idx - * holds the result. Treat it the same as if it were - * parsed as an absolute integer value. - */ - goto integerEncode; - } - } + if (TclWordKnownAtCompileTime(tokenPtr, tmpObj)) { + result = TclIndexEncode(NULL, tmpObj, before, after, indexPtr); } Tcl_DecrRefCount(tmpObj); - - if (result == TCL_OK) { - *index = idx; - } - return result; } @@ -1177,8 +1087,8 @@ TclCompileLindexCmd( } idxTokenPtr = TokenAfter(valTokenPtr); - if (TclGetIndexFromToken(idxTokenPtr, &idx, TCL_INDEX_BEFORE, - TCL_INDEX_BEFORE) == TCL_OK) { + if (TclGetIndexFromToken(idxTokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_BEFORE, + &idx) == TCL_OK) { /* * The idxTokenPtr parsed as a valid index value and was * encoded as expected by INST_LIST_INDEX_IMM. @@ -1406,8 +1316,8 @@ TclCompileLrangeCmd( listTokenPtr = TokenAfter(parsePtr->tokenPtr); tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, - TCL_INDEX_AFTER) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, + &idx1) != TCL_OK) { return TCL_ERROR; } /* @@ -1416,8 +1326,8 @@ TclCompileLrangeCmd( */ tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, - TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, + &idx2) != TCL_OK) { return TCL_ERROR; } /* @@ -1481,8 +1391,8 @@ TclCompileLinsertCmd( * make that transformation here so we can use the optimized bytecode * as much as possible. */ - if (TclGetIndexFromToken(tokenPtr, &idx, TCL_INDEX_START, - TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_END, + &idx) != TCL_OK) { return TCL_ERROR; } @@ -1571,14 +1481,14 @@ TclCompileLreplaceCmd( listTokenPtr = TokenAfter(parsePtr->tokenPtr); tokenPtr = TokenAfter(listTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, - TCL_INDEX_AFTER) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, + &idx1) != TCL_OK) { return TCL_ERROR; } tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, - TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, + &idx2) != TCL_OK) { return TCL_ERROR; } diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index f98d375..5cd1c3b 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -937,8 +937,8 @@ TclCompileStringRangeCmd( * Parse the two indices. */ - if (TclGetIndexFromToken(fromTokenPtr, &idx1, TCL_INDEX_START, - TCL_INDEX_AFTER) != TCL_OK) { + if (TclGetIndexFromToken(fromTokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, + &idx1) != TCL_OK) { goto nonConstantIndices; } /* @@ -953,8 +953,8 @@ TclCompileStringRangeCmd( return TCL_OK; } - if (TclGetIndexFromToken(toTokenPtr, &idx2, TCL_INDEX_BEFORE, - TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(toTokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, + &idx2) != TCL_OK) { goto nonConstantIndices; } /* @@ -1010,8 +1010,8 @@ TclCompileStringReplaceCmd( } tokenPtr = TokenAfter(valueTokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START, - TCL_INDEX_AFTER) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER, + &idx1) != TCL_OK) { goto genericReplace; } /* @@ -1020,8 +1020,8 @@ TclCompileStringReplaceCmd( */ tokenPtr = TokenAfter(tokenPtr); - if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE, - TCL_INDEX_END) != TCL_OK) { + if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END, + &idx2) != TCL_OK) { goto genericReplace; } /* diff --git a/generic/tclCompile.h b/generic/tclCompile.h index d842fdd..6aaa855 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -1121,8 +1121,8 @@ MODULE_SCOPE int TclFixupForwardJump(CompileEnv *envPtr, int distThreshold); MODULE_SCOPE void TclFreeCompileEnv(CompileEnv *envPtr); MODULE_SCOPE void TclFreeJumpFixupArray(JumpFixupArray *fixupArrayPtr); -MODULE_SCOPE int TclGetIndexFromToken(Tcl_Token *tokenPtr, int *index, - int minBoundary, int maxBoundary); +MODULE_SCOPE int TclGetIndexFromToken(Tcl_Token *tokenPtr, + int before, int after, int *indexPtr); MODULE_SCOPE void TclInitByteCodeObj(Tcl_Obj *objPtr, CompileEnv *envPtr); MODULE_SCOPE void TclInitCompileEnv(Tcl_Interp *interp, diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 7043179..188eadc 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5093,9 +5093,7 @@ TEBCresume( * Decode end-offset index values. */ - if (index <= TCL_INDEX_END) { - index += (objc - 1 - TCL_INDEX_END); - } + index = TclIndexDecode(index, objc - 1); pcAdjustment = 5; lindexFastPath: @@ -5250,11 +5248,9 @@ TEBCresume( if ((toIdx == TCL_INDEX_BEFORE) || (fromIdx == TCL_INDEX_AFTER)) { goto emptyList; } - if (toIdx <= TCL_INDEX_END) { - toIdx += (objc - 1 - TCL_INDEX_END); - if (toIdx < 0) { - goto emptyList; - } + toIdx = TclIndexDecode(toIdx, objc - 1); + if (toIdx < 0) { + goto emptyList; } else if (toIdx >= objc) { toIdx = objc - 1; } @@ -5263,13 +5259,10 @@ TEBCresume( assert ( fromIdx != TCL_INDEX_BEFORE ); assert ( fromIdx != TCL_INDEX_AFTER); - if (fromIdx <= TCL_INDEX_END) { - fromIdx += (objc - 1 - TCL_INDEX_END); - if (fromIdx < 0) { - fromIdx = 0; - } + fromIdx = TclIndexDecode(fromIdx, objc - 1); + if (fromIdx < 0) { + fromIdx = 0; } - assert ( fromIdx >= 0 ); if (fromIdx <= toIdx) { /* Construct the subsquence list */ @@ -5643,11 +5636,9 @@ TEBCresume( assert ( toIdx != TCL_INDEX_BEFORE ); assert ( toIdx != TCL_INDEX_AFTER); - if (toIdx <= TCL_INDEX_END) { - toIdx += (length - 1 - TCL_INDEX_END); - if (toIdx < 0) { - goto emptyRange; - } + toIdx = TclIndexDecode(toIdx, length - 1); + if (toIdx < 0) { + goto emptyRange; } else if (toIdx >= length) { toIdx = length - 1; } @@ -5657,15 +5648,11 @@ TEBCresume( assert ( fromIdx != TCL_INDEX_BEFORE ); assert ( fromIdx != TCL_INDEX_AFTER); - if (fromIdx <= TCL_INDEX_END) { - fromIdx += (length - 1 - TCL_INDEX_END); - if (fromIdx < 0) { - fromIdx = 0; - } + fromIdx = TclIndexDecode(fromIdx, length - 1); + if (fromIdx < 0) { + fromIdx = 0; } - assert ( fromIdx >= 0 ); - if (fromIdx <= toIdx) { objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx); } else { -- cgit v0.12 From 06c3fd0c5be39e8d5407848be217194519845a43 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 20:46:54 +0000 Subject: Newer utility routine is more suitable. --- generic/tclAssembly.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 608459e..b6bebb6 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -2255,7 +2255,6 @@ GetListIndexOperand( if (GetNextOperand(assemEnvPtr, tokenPtrPtr, &value) != TCL_OK) { return TCL_ERROR; } - Tcl_DecrRefCount(value); /* Convert to an integer, advance to the next token and return. */ /* @@ -2263,14 +2262,11 @@ GetListIndexOperand( * same result as indexing after it, and might be more easily portable * when list size limits grow. */ - status = TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, - TCL_INDEX_BEFORE, result); + status = TclIndexEncode(interp, value, + TCL_INDEX_BEFORE,TCL_INDEX_BEFORE, result); + + Tcl_DecrRefCount(value); *tokenPtrPtr = TokenAfter(tokenPtr); - if (status == TCL_ERROR && interp) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%.*s\"", - tokenPtr->size, tokenPtr->start)); - Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADINDEX", NULL); - } return status; } -- cgit v0.12 From 954522812e2e6644f38fa1ee44ce949a6c83a9d1 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 21:07:12 +0000 Subject: Return routine to file scope. --- generic/tclInt.h | 2 -- generic/tclUtil.c | 12 +++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 3821e42..ef48126 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2978,8 +2978,6 @@ MODULE_SCOPE int TclGetChannelFromObj(Tcl_Interp *interp, MODULE_SCOPE CmdFrame * TclGetCmdFrameForProcedure(Proc *procPtr); MODULE_SCOPE int TclGetCompletionCodeFromObj(Tcl_Interp *interp, Tcl_Obj *value, int *code); -MODULE_SCOPE int TclGetEndOffsetFromObj(Tcl_Interp *interp, - Tcl_Obj *objPtr, int endValue, int *indexPtr); MODULE_SCOPE int TclGetNumberFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, ClientData *clientDataPtr, int *typePtr); diff --git a/generic/tclUtil.c b/generic/tclUtil.c index f6a92fc..768b2f5 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -107,6 +107,8 @@ static Tcl_ThreadDataKey precisionKey; static void ClearHash(Tcl_HashTable *tablePtr); static void FreeProcessGlobalValue(ClientData clientData); static void FreeThreadHash(ClientData clientData); +static int GetEndOffsetFromObj(Tcl_Interp *interp, + Tcl_Obj *objPtr, int endValue, int *indexPtr); static Tcl_HashTable * GetThreadHash(Tcl_ThreadDataKey *keyPtr); static int SetEndOffsetFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); @@ -3575,7 +3577,7 @@ TclGetIntForIndex( return TCL_OK; } - if (TclGetEndOffsetFromObj(NULL, objPtr, endValue, indexPtr) == TCL_OK) { + if (GetEndOffsetFromObj(NULL, objPtr, endValue, indexPtr) == TCL_OK) { return TCL_OK; } @@ -3678,7 +3680,7 @@ UpdateStringOfEndOffset( /* *---------------------------------------------------------------------- * - * TclGetEndOffsetFromObj -- + * GetEndOffsetFromObj -- * * Look for a string of the form "end[+-]offset" and convert it to an * internal representation holding the offset. @@ -3692,8 +3694,8 @@ UpdateStringOfEndOffset( *---------------------------------------------------------------------- */ -int -TclGetEndOffsetFromObj( +static int +GetEndOffsetFromObj( Tcl_Interp *interp, /* For error reporting, may be NULL. */ Tcl_Obj *objPtr, /* Pointer to the object to parse */ int endValue, /* The value to be stored at "indexPtr" if @@ -3886,7 +3888,7 @@ TclIndexEncode( idx = after; } /* usual case, the absolute index value encodes itself */ - } else if (TCL_OK == TclGetEndOffsetFromObj(NULL, objPtr, 0, &idx)) { + } else if (TCL_OK == GetEndOffsetFromObj(NULL, objPtr, 0, &idx)) { /* * We parsed an end+offset index value. * idx holds the offset value in the range INT_MIN...INT_MAX. -- cgit v0.12 From cd4e0ec5deffef9dbc4331768ca660fef4590501 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Mar 2018 21:18:19 +0000 Subject: Restore safety for legacy bytecode. --- generic/tclExecute.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 5d29db9..151a899 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5241,7 +5241,14 @@ TEBCresume( /* Decode index value operands. */ + /* assert ( toIdx != TCL_INDEX_AFTER); + * + * Extra safety for legacy bytecodes: + */ + if (toIdx == TCL_INDEX_AFTER) { + toIdx = TCL_INDEX_END; + } if ((toIdx == TCL_INDEX_BEFORE) || (fromIdx == TCL_INDEX_AFTER)) { goto emptyList; @@ -5254,8 +5261,14 @@ TEBCresume( } assert ( toIdx >= 0 && toIdx < objc); + /* assert ( fromIdx != TCL_INDEX_BEFORE ); - assert ( fromIdx != TCL_INDEX_AFTER); + * + * Extra safety for legacy bytecodes: + */ + if (fromIdx == TCL_INDEX_BEFORE) { + fromIdx = TCL_INDEX_START; + } fromIdx = TclIndexDecode(fromIdx, objc - 1); if (fromIdx < 0) { @@ -5631,8 +5644,18 @@ TEBCresume( /* Decode index operands. */ + /* assert ( toIdx != TCL_INDEX_BEFORE ); assert ( toIdx != TCL_INDEX_AFTER); + * + * Extra safety for legacy bytecodes: + */ + if (toIdx == TCL_INDEX_BEFORE) { + goto emptyRange; + } + if (toIdx == TCL_INDEX_AFTER) { + toIdx = TCL_INDEX_END; + } toIdx = TclIndexDecode(toIdx, length - 1); if (toIdx < 0) { @@ -5643,8 +5666,18 @@ TEBCresume( assert ( toIdx >= 0 && toIdx < length ); + /* assert ( fromIdx != TCL_INDEX_BEFORE ); assert ( fromIdx != TCL_INDEX_AFTER); + * + * Extra safety for legacy bytecodes: + */ + if (fromIdx == TCL_INDEX_BEFORE) { + fromIdx = TCL_INDEX_START; + } + if (fromIdx == TCL_INDEX_AFTER) { + goto emptyRange; + } fromIdx = TclIndexDecode(fromIdx, length - 1); if (fromIdx < 0) { -- cgit v0.12