summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclAssembly.c9
-rw-r--r--generic/tclCompCmdsGR.c116
-rw-r--r--generic/tclCompCmdsSZ.c43
-rw-r--r--generic/tclCompile.h4
4 files changed, 99 insertions, 73 deletions
diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c
index 02c64bd..a3fac8f 100644
--- a/generic/tclAssembly.c
+++ b/generic/tclAssembly.c
@@ -2258,8 +2258,13 @@ GetListIndexOperand(
Tcl_DecrRefCount(value);
/* Convert to an integer, advance to the next token and return. */
- status = TclGetIndexFromToken(tokenPtr, result, TCL_INDEX_OUT_OF_RANGE,
- TCL_INDEX_OUT_OF_RANGE);
+ /*
+ * NOTE: Indexing a list with an index before it yields the
+ * same result as indexing after it, and might be more easily portable
+ * when list size limits grow.
+ */
+ status = TclGetIndexFromToken(tokenPtr, result, TCL_INDEX_BEFORE,
+ TCL_INDEX_BEFORE);
*tokenPtrPtr = TokenAfter(tokenPtr);
if (status == TCL_ERROR && interp) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf("bad index \"%.*s\"",
diff --git a/generic/tclCompCmdsGR.c b/generic/tclCompCmdsGR.c
index 8585bce..501c7a4 100644
--- a/generic/tclCompCmdsGR.c
+++ b/generic/tclCompCmdsGR.c
@@ -42,18 +42,21 @@ static int IndexTailVarIfKnown(Tcl_Interp *interp,
* arithmetic expressions that can be fully computed at compile
* time. The absolute index values that can be directly meaningful
* as an index into either a list or a string are those integer
- * values >= 0 and < INT_MAX. The largest string supported in Tcl 8
- * has bytelength INT_MAX. This means the largest character supported
- * length is also INT_MAX, and the index of the last character in a
- * string of length INT_MAX is INT_MAX-1.
+ * values >= TCL_INDEX_START (0) and < TCL_INDEX_AFTER (INT_MAX).
+ * The largest string supported in Tcl 8 has bytelength INT_MAX.
+ * This means the largest character supported length is also INT_MAX,
+ * and the index of the last character in a string of length INT_MAX
+ * is INT_MAX-1.
*
* Any absolute index value parsed outside that range is encoded
* using the minBoundary and maxBounday values passed in by the
* caller as the encoding to use for indices that are either
- * less than or greater than the usable index range. INT_MAX
+ * less than or greater than the usable index range. TCL_INDEX_AFTER
* is available as a good choice for most callers to use for
- * maxBoundary. Likewise, the value -1 is good for most callers
- * to use for minBoundary.
+ * maxBoundary. Likewise, the value TCL_INDEX_BEFORE is good for
+ * most callers to use for minBoundary. Other values are possible
+ * when the caller knows it is helpful in producing its own behavior
+ * for indices before and after the indexed item.
*
* A token can also be parsed as an end-relative index expression.
* All end-relative expressions that indicate an index larger
@@ -101,7 +104,7 @@ TclGetIndexFromToken(
if (result == TCL_OK) {
/* We parsed a value in the range INT_MIN...INT_MAX */
integerEncode:
- if (idx < 0) {
+ if (idx < TCL_INDEX_START) {
/* All negative absolute indices are "before the beginning" */
idx = minBoundary;
} else if (idx == INT_MAX) {
@@ -123,7 +126,7 @@ TclGetIndexFromToken(
*/
idx = maxBoundary;
} else if (idx < INT_MIN - TCL_INDEX_END) {
- /* These indices alwasy indicate "before the beginning */
+ /* These indices always indicate "before the beginning */
idx = minBoundary;
} else {
/* Encoded end-positive (or end+negative) are offset */
@@ -214,7 +217,7 @@ TclCompileGlobalCmd(
return TCL_ERROR;
}
- /* TODO: Consider what value can pass throug the
+ /* TODO: Consider what value can pass through the
* IndexTailVarIfKnown() screen. Full CompileWord()
* likely does not apply here. Push known value instead. */
CompileWord(envPtr, varTokenPtr, interp, i);
@@ -1174,15 +1177,14 @@ TclCompileLindexCmd(
}
idxTokenPtr = TokenAfter(valTokenPtr);
- if (TclGetIndexFromToken(idxTokenPtr, &idx,
- TCL_INDEX_OUT_OF_RANGE, TCL_INDEX_OUT_OF_RANGE) == TCL_OK) {
+ if (TclGetIndexFromToken(idxTokenPtr, &idx, TCL_INDEX_BEFORE,
+ TCL_INDEX_BEFORE) == TCL_OK) {
/*
- * All checks have been completed, and we have exactly one of these
- * constructs:
- * lindex <arbitraryValue> <posInt>
- * lindex <arbitraryValue> end-<posInt>
- * This is best compiled as a push of the arbitrary value followed by
- * an "immediate lindex" which is the most efficient variety.
+ * The idxTokenPtr parsed as a valid index value and was
+ * encoded as expected by INST_LIST_INDEX_IMM.
+ *
+ * NOTE: that we rely on indexing before a list producing the
+ * same result as indexing after a list.
*/
CompileWord(envPtr, valTokenPtr, interp, 1);
@@ -1403,24 +1405,25 @@ TclCompileLrangeCmd(
}
listTokenPtr = TokenAfter(parsePtr->tokenPtr);
- /*
- * Parse the indices. Will only compile if both are constants and not an
- * _integer_ less than zero (since we reserve negative indices here for
- * end-relative indexing) or an end-based index greater than 'end' itself.
- */
-
tokenPtr = TokenAfter(listTokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START,
+ TCL_INDEX_AFTER) != TCL_OK) {
return TCL_ERROR;
}
+ /*
+ * Token was an index value, and we treat all "first" indices
+ * before the list same as the start of the list.
+ */
tokenPtr = TokenAfter(tokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE,
+ TCL_INDEX_END) != TCL_OK) {
return TCL_ERROR;
}
- if (idx1 == INT_MAX && idx2 == INT_MAX) {
- idx2 = TCL_INDEX_OUT_OF_RANGE;
- }
+ /*
+ * Token was an index value, and we treat all "last" indices
+ * after the list same as the end of the list.
+ */
/*
* Issue instructions. It's not safe to skip doing the LIST_RANGE, as
@@ -1470,7 +1473,16 @@ TclCompileLinsertCmd(
*/
tokenPtr = TokenAfter(listTokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx, 0, INT_MAX) != TCL_OK) {
+
+ /*
+ * NOTE: This command treats all inserts at indices before the list
+ * the same as inserts at the start of the list, and all inserts
+ * after the list the same as inserts at the end of the list. We
+ * make that transformation here so we can use the optimized bytecode
+ * as much as possible.
+ */
+ if (TclGetIndexFromToken(tokenPtr, &idx, TCL_INDEX_START,
+ TCL_INDEX_END) != TCL_OK) {
return TCL_ERROR;
}
@@ -1494,10 +1506,10 @@ TclCompileLinsertCmd(
}
TclEmitInstInt4( INST_LIST, i-3, envPtr);
- if (idx == 0 /*start*/) {
+ if (idx == TCL_INDEX_START) {
TclEmitInstInt4( INST_REVERSE, 2, envPtr);
TclEmitOpcode( INST_LIST_CONCAT, envPtr);
- } else if (idx == TCL_INDEX_END /*end*/) {
+ } else if (idx == TCL_INDEX_END) {
TclEmitOpcode( INST_LIST_CONCAT, envPtr);
} else {
/*
@@ -1558,42 +1570,42 @@ TclCompileLreplaceCmd(
}
listTokenPtr = TokenAfter(parsePtr->tokenPtr);
- /*
- * Parse the indices. Will only compile if both are constants and not an
- * _integer_ less than zero (since we reserve negative indices here for
- * end-relative indexing) or an end-based index greater than 'end' itself.
- */
-
tokenPtr = TokenAfter(listTokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START,
+ TCL_INDEX_AFTER) != TCL_OK) {
return TCL_ERROR;
}
tokenPtr = TokenAfter(tokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx2, -1, TCL_INDEX_END) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE,
+ TCL_INDEX_END) != TCL_OK) {
return TCL_ERROR;
}
/*
- * idx1, idx2 are now in canonical form:
- *
- * - integer: [0,len+1]
- * - end index: TCL_INDEX_END
- * - -ive offset: TCL_INDEX_END-[len-1,0]
+ * idx1, idx2 are the conventional encoded forms of the tokens parsed
+ * as all forms of index values. Values of idx1 that come before the
+ * list are treated the same as if they were the start of the list.
+ * Values of idx2 that come after the list are treated the same as if
+ * they were the end of the list.
*/
+ if (idx1 == TCL_INDEX_AFTER) {
+ /*
+ * [lreplace] treats idx1 value end+1 differently from end+2, etc.
+ * The operand encoding cannot distinguish them, so we must bail
+ * out to direct evaluation.
+ */
+ return TCL_ERROR;
+ }
+
+/* TODO: ...... */
/*
* Compilation fails when one index is end-based but the other isn't.
* Fixing this will require more bytecodes, but this is a workaround for
* now. [Bug 47ac84309b]
*/
- if (idx1 == INT_MAX) {
- /* consider special handling for too large first index
- * "list doesn't contain element ...", so still not compiled */
- return TCL_ERROR;
- }
-
if ((idx1 <= TCL_INDEX_END) != (idx2 <= TCL_INDEX_END)) {
/*
@@ -3049,7 +3061,7 @@ TclCompileVariableCmd(
return TCL_ERROR;
}
- /* TODO: Consider what value can pass throug the
+ /* TODO: Consider what value can pass through the
* IndexTailVarIfKnown() screen. Full CompileWord()
* likely does not apply here. Push known value instead. */
CompileWord(envPtr, varTokenPtr, interp, i);
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index bf8c482..d10d1c1 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -934,15 +934,22 @@ TclCompileStringRangeCmd(
* Parse the two indices.
*/
- if (TclGetIndexFromToken(fromTokenPtr, &idx1, -1, INT_MAX) != TCL_OK) {
+ if (TclGetIndexFromToken(fromTokenPtr, &idx1, TCL_INDEX_START,
+ TCL_INDEX_AFTER) != TCL_OK) {
goto nonConstantIndices;
}
- if (TclGetIndexFromToken(toTokenPtr, &idx2, -1, INT_MAX) != TCL_OK) {
+ /*
+ * Token parsed as an index expression. We treat all indices before
+ * the string the same as the start of the string.
+ */
+ if (TclGetIndexFromToken(toTokenPtr, &idx2, TCL_INDEX_BEFORE,
+ TCL_INDEX_END) != TCL_OK) {
goto nonConstantIndices;
}
- if (idx1 == INT_MAX && idx2 == INT_MAX) {
- idx2 = TCL_INDEX_OUT_OF_RANGE;
- }
+ /*
+ * Token parsed as an index expression. We treat all indices after
+ * the string the same as the end of the string.
+ */
/*
* Push the operand onto the stack and then the substring operation.
@@ -987,27 +994,27 @@ TclCompileStringReplaceCmd(
replacementTokenPtr = TokenAfter(tokenPtr);
}
- /*
- * Parse the indices. Will only compile special cases if both are
- * constants and not an _integer_ less than zero (since we reserve
- * negative indices here for end-relative indexing) or an end-based index
- * greater than 'end' itself.
- */
-
tokenPtr = TokenAfter(valueTokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx1, -1, INT_MAX) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, &idx1, TCL_INDEX_START,
+ TCL_INDEX_AFTER) != TCL_OK) {
goto genericReplace;
}
+ /*
+ * Token parsed as an index value. Indices before the string are
+ * treated as index of start of string.
+ */
tokenPtr = TokenAfter(tokenPtr);
- if (TclGetIndexFromToken(tokenPtr, &idx2, -1, INT_MAX) != TCL_OK) {
- goto genericReplace;
- }
- if (idx1 == INT_MAX && idx2 == INT_MAX) {
- /* avoid replacement of last char in large string (just don't compile). */
+ if (TclGetIndexFromToken(tokenPtr, &idx2, TCL_INDEX_BEFORE,
+ TCL_INDEX_END) != TCL_OK) {
goto genericReplace;
}
+ /*
+ * Token parsed as an index value. Indices after the string are
+ * treated as index of end of string.
+ */
+/* TODO...... */
/*
* We handle these replacements specially: first character (where
* idx1=idx2=0) and last character (where idx1=idx2=TCL_INDEX_END). Anything
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index 2f23b90..9501d93 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -1690,7 +1690,9 @@ MODULE_SCOPE int TclPushProcCallFrame(ClientData clientData,
*/
#define TCL_INDEX_END (-2)
-#define TCL_INDEX_OUT_OF_RANGE (-1)
+#define TCL_INDEX_BEFORE (-1)
+#define TCL_INDEX_START (0)
+#define TCL_INDEX_AFTER (INT_MAX)
/*
* DTrace probe macros (NOPs if DTrace support is not enabled).