summaryrefslogtreecommitdiffstats
path: root/generic/tclCompCmdsSZ.c
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2018-11-06 10:04:02 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2018-11-06 10:04:02 (GMT)
commit179e8f4df7568f6aca6e53525abfe0505fd2a578 (patch)
treec9328ec8e0afd8af8a96b9f3da12b152cec7c281 /generic/tclCompCmdsSZ.c
parentc5a85dbfdc7dce9328b7f5fffb0bae519f68cf9f (diff)
parent2798a075ee62ea5ab4aa80279d614a8634ba378a (diff)
downloadtcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.zip
tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.gz
tcl-179e8f4df7568f6aca6e53525abfe0505fd2a578.tar.bz2
merge core-8-branch
Diffstat (limited to 'generic/tclCompCmdsSZ.c')
-rw-r--r--generic/tclCompCmdsSZ.c343
1 files changed, 193 insertions, 150 deletions
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index b5676cb..8b54a99 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -107,58 +107,6 @@ const AuxDataType tclJumptableInfoType = {
#define INVOKE(name) \
TclEmitInvoke(envPtr,INST_##name)
-#define INDEX_END (-2)
-
-/*
- *----------------------------------------------------------------------
- *
- * GetIndexFromToken --
- *
- * Parse a token and get the encoded version of the index (as understood
- * by TEBC), assuming it is at all knowable at compile time. Only handles
- * indices that are integers or 'end' or 'end-integer'.
- *
- * Returns:
- * TCL_OK if parsing succeeded, and TCL_ERROR if it failed.
- *
- * Side effects:
- * Sets *index to the index value if successful.
- *
- *----------------------------------------------------------------------
- */
-
-static inline int
-GetIndexFromToken(
- Tcl_Token *tokenPtr,
- int *index)
-{
- Tcl_Obj *tmpObj = Tcl_NewObj();
- int result, idx;
-
- if (!TclWordKnownAtCompileTime(tokenPtr, tmpObj)) {
- Tcl_DecrRefCount(tmpObj);
- return TCL_ERROR;
- }
-
- result = TclGetIntFromObj(NULL, tmpObj, &idx);
- if (result == TCL_OK) {
- if (idx < 0) {
- result = TCL_ERROR;
- }
- } else {
- result = TclGetIntForIndexM(NULL, tmpObj, INDEX_END, &idx);
- if (result == TCL_OK && idx > INDEX_END) {
- result = TCL_ERROR;
- }
- }
- Tcl_DecrRefCount(tmpObj);
-
- if (result == TCL_OK) {
- *index = idx;
- }
-
- return result;
-}
/*
*----------------------------------------------------------------------
@@ -312,7 +260,7 @@ TclCompileStringCatCmd(
Tcl_DecrRefCount(obj);
if (folded) {
int len;
- const char *bytes = Tcl_GetStringFromObj(folded, &len);
+ const char *bytes = TclGetStringFromObj(folded, &len);
PushLiteral(envPtr, bytes, len);
Tcl_DecrRefCount(folded);
@@ -330,7 +278,7 @@ TclCompileStringCatCmd(
}
if (folded) {
int len;
- const char *bytes = Tcl_GetStringFromObj(folded, &len);
+ const char *bytes = TclGetStringFromObj(folded, &len);
PushLiteral(envPtr, bytes, len);
Tcl_DecrRefCount(folded);
@@ -743,14 +691,11 @@ TclCompileStringIsCmd(
}
switch (t) {
- case STR_IS_INT:
- PUSH( "1");
- OP( EQ);
- break;
case STR_IS_WIDE:
PUSH( "2");
OP( LE);
break;
+ case STR_IS_INT:
case STR_IS_ENTIER:
PUSH( "3");
OP( LE);
@@ -960,12 +905,12 @@ TclCompileStringMapCmd(
* correct semantics for mapping.
*/
- bytes = Tcl_GetStringFromObj(objv[0], &len);
+ bytes = TclGetStringFromObj(objv[0], &len);
if (len == 0) {
CompileWord(envPtr, stringTokenPtr, interp, 2);
} else {
PushLiteral(envPtr, bytes, len);
- bytes = Tcl_GetStringFromObj(objv[1], &len);
+ bytes = TclGetStringFromObj(objv[1], &len);
PushLiteral(envPtr, bytes, len);
CompileWord(envPtr, stringTokenPtr, interp, 2);
OP(STR_MAP);
@@ -994,22 +939,48 @@ TclCompileStringRangeCmd(
fromTokenPtr = TokenAfter(stringTokenPtr);
toTokenPtr = TokenAfter(fromTokenPtr);
+ /* Every path must push the string argument */
+ CompileWord(envPtr, stringTokenPtr, interp, 1);
+
/*
* Parse the two indices.
*/
- if (GetIndexFromToken(fromTokenPtr, &idx1) != TCL_OK) {
+ if (TclGetIndexFromToken(fromTokenPtr, TCL_INDEX_START, TCL_INDEX_AFTER,
+ &idx1) != TCL_OK) {
goto nonConstantIndices;
}
- if (GetIndexFromToken(toTokenPtr, &idx2) != TCL_OK) {
+ /*
+ * Token parsed as an index expression. We treat all indices before
+ * the string the same as the start of the string.
+ */
+
+ if (idx1 == TCL_INDEX_AFTER) {
+ /* [string range $s end+1 $last] must be empty string */
+ OP( POP);
+ PUSH( "");
+ return TCL_OK;
+ }
+
+ if (TclGetIndexFromToken(toTokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_END,
+ &idx2) != TCL_OK) {
goto nonConstantIndices;
}
+ /*
+ * Token parsed as an index expression. We treat all indices after
+ * the string the same as the end of the string.
+ */
+ if (idx2 == TCL_INDEX_BEFORE) {
+ /* [string range $s $first -1] must be empty string */
+ OP( POP);
+ PUSH( "");
+ return TCL_OK;
+ }
/*
* Push the operand onto the stack and then the substring operation.
*/
- CompileWord(envPtr, stringTokenPtr, interp, 1);
OP44( STR_RANGE_IMM, idx1, idx2);
return TCL_OK;
@@ -1018,7 +989,6 @@ TclCompileStringRangeCmd(
*/
nonConstantIndices:
- CompileWord(envPtr, stringTokenPtr, interp, 1);
CompileWord(envPtr, fromTokenPtr, interp, 2);
CompileWord(envPtr, toTokenPtr, interp, 3);
OP( STR_RANGE);
@@ -1034,124 +1004,197 @@ TclCompileStringReplaceCmd(
* compiled. */
CompileEnv *envPtr) /* Holds the resulting instructions. */
{
- Tcl_Token *tokenPtr, *valueTokenPtr, *replacementTokenPtr = NULL;
+ Tcl_Token *tokenPtr, *valueTokenPtr;
DefineLineInformation; /* TIP #280 */
- int idx1, idx2;
+ int first, last;
if (parsePtr->numWords < 4 || parsePtr->numWords > 5) {
return TCL_ERROR;
}
+
+ /* Bytecode to compute/push string argument being replaced */
valueTokenPtr = TokenAfter(parsePtr->tokenPtr);
- if (parsePtr->numWords == 5) {
- tokenPtr = TokenAfter(valueTokenPtr);
- tokenPtr = TokenAfter(tokenPtr);
- replacementTokenPtr = TokenAfter(tokenPtr);
- }
+ CompileWord(envPtr, valueTokenPtr, interp, 1);
/*
- * Parse the indices. Will only compile special cases if both are
- * constants and not an _integer_ less than zero (since we reserve
- * negative indices here for end-relative indexing) or an end-based index
- * greater than 'end' itself.
+ * Check for first index known and useful at compile time.
*/
-
tokenPtr = TokenAfter(valueTokenPtr);
- if (GetIndexFromToken(tokenPtr, &idx1) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+ &first) != TCL_OK) {
goto genericReplace;
}
+ /*
+ * Check for last index known and useful at compile time.
+ */
tokenPtr = TokenAfter(tokenPtr);
- if (GetIndexFromToken(tokenPtr, &idx2) != TCL_OK) {
+ if (TclGetIndexFromToken(tokenPtr, TCL_INDEX_BEFORE, TCL_INDEX_AFTER,
+ &last) != TCL_OK) {
goto genericReplace;
}
/*
- * We handle these replacements specially: first character (where
- * idx1=idx2=0) and last character (where idx1=idx2=INDEX_END). Anything
- * else and the semantics get rather screwy.
+ * [string replace] is an odd bird. For many arguments it is
+ * a conventional substring replacer. However it also goes out
+ * of its way to become a no-op for many cases where it would be
+ * replacing an empty substring. Precisely, it is a no-op when
+ *
+ * (last < first) OR
+ * (last < 0) OR
+ * (end < first)
+ *
+ * For some compile-time values we can detect these cases, and
+ * compile direct to bytecode implementing the no-op.
*/
- if (idx1 == 0 && idx2 == 0) {
- int notEq, end;
+ if ((last == TCL_INDEX_BEFORE) /* Know (last < 0) */
+ || (first == TCL_INDEX_AFTER) /* Know (first > end) */
/*
- * Just working with the first character.
+ * Tricky to determine when runtime (last < first) can be
+ * certainly known based on the encoded values. Consider the
+ * cases...
+ *
+ * (first <= TCL_INDEX_END) &&
+ * (last == TCL_INDEX_AFTER) => cannot tell REJECT
+ * (last <= TCL_INDEX END) && (last < first) => ACCEPT
+ * else => cannot tell REJECT
*/
-
- CompileWord(envPtr, valueTokenPtr, interp, 1);
- if (replacementTokenPtr == NULL) {
- /* Drop first */
- OP44( STR_RANGE_IMM, 1, INDEX_END);
- return TCL_OK;
+ || ((first <= TCL_INDEX_END) && (last <= TCL_INDEX_END)
+ && (last < first)) /* Know (last < first) */
+ /*
+ * (first == TCL_INDEX_BEFORE) &&
+ * (last == TCL_INDEX_AFTER) => (first < last) REJECT
+ * (last <= TCL_INDEX_END) => cannot tell REJECT
+ * else => (first < last) REJECT
+ *
+ * else [[first >= TCL_INDEX_START]] &&
+ * (last == TCL_INDEX_AFTER) => cannot tell REJECT
+ * (last <= TCL_INDEX_END) => cannot tell REJECT
+ * else [[last >= TCL_INDEX START]] && (last < first) => ACCEPT
+ */
+ || ((first >= TCL_INDEX_START) && (last >= TCL_INDEX_START)
+ && (last < first))) { /* Know (last < first) */
+ if (parsePtr->numWords == 5) {
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
+ OP( POP); /* Pop newString */
}
- /* Replace first */
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
- OP4( OVER, 1);
- PUSH( "");
- OP( STR_EQ);
- JUMP1( JUMP_FALSE, notEq);
- OP( POP);
- JUMP1( JUMP, end);
- FIXJUMP1(notEq);
- TclAdjustStackDepth(1, envPtr);
- OP4( REVERSE, 2);
- OP44( STR_RANGE_IMM, 1, INDEX_END);
- OP1( STR_CONCAT1, 2);
- FIXJUMP1(end);
+ /* Original string argument now on TOS as result */
return TCL_OK;
+ }
- } else if (idx1 == INDEX_END && idx2 == INDEX_END) {
- int notEq, end;
-
- /*
- * Just working with the last character.
- */
+ if (parsePtr->numWords == 5) {
+ /*
+ * When we have a string replacement, we have to take care about
+ * not replacing empty substrings that [string replace] promises
+ * not to replace
+ *
+ * The remaining index values might be suitable for conventional
+ * string replacement, but only if they cannot possibly meet the
+ * conditions described above at runtime. If there's a chance they
+ * might, we would have to emit bytecode to check and at that point
+ * we're paying more in bytecode execution time than would make
+ * things worthwhile. Trouble is we are very limited in
+ * how much we can detect that at compile time. After decoding,
+ * we need, first:
+ *
+ * (first <= end)
+ *
+ * The encoded indices (first <= TCL_INDEX END) and
+ * (first == TCL_INDEX_BEFORE) always meets this condition, but
+ * any other encoded first index has some list for which it fails.
+ *
+ * We also need, second:
+ *
+ * (last >= 0)
+ *
+ * The encoded indices (last >= TCL_INDEX_START) and
+ * (last == TCL_INDEX_AFTER) always meet this condition but any
+ * other encoded last index has some list for which it fails.
+ *
+ * Finally we need, third:
+ *
+ * (first <= last)
+ *
+ * Considered in combination with the constraints we already have,
+ * we see that we can proceed when (first == TCL_INDEX_BEFORE)
+ * or (last == TCL_INDEX_AFTER). These also permit simplification
+ * of the prefix|replace|suffix construction. The other constraints,
+ * though, interfere with getting a guarantee that first <= last.
+ */
- CompileWord(envPtr, valueTokenPtr, interp, 1);
- if (replacementTokenPtr == NULL) {
- /* Drop last */
- OP44( STR_RANGE_IMM, 0, INDEX_END-1);
- return TCL_OK;
- }
- /* Replace last */
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
- OP4( OVER, 1);
- PUSH( "");
- OP( STR_EQ);
- JUMP1( JUMP_FALSE, notEq);
- OP( POP);
- JUMP1( JUMP, end);
- FIXJUMP1(notEq);
- TclAdjustStackDepth(1, envPtr);
- OP4( REVERSE, 2);
- OP44( STR_RANGE_IMM, 0, INDEX_END-1);
+ if ((first == TCL_INDEX_BEFORE) && (last >= TCL_INDEX_START)) {
+ /* empty prefix */
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
OP4( REVERSE, 2);
+ if (last == TCL_INDEX_AFTER) {
+ OP( POP); /* Pop original */
+ } else {
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ OP1( STR_CONCAT1, 2);
+ }
+ return TCL_OK;
+ }
+
+ if ((last == TCL_INDEX_AFTER) && (first <= TCL_INDEX_END)) {
+ OP44( STR_RANGE_IMM, 0, first-1);
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
OP1( STR_CONCAT1, 2);
- FIXJUMP1(end);
return TCL_OK;
+ }
+
+ /* FLOW THROUGH TO genericReplace */
} else {
/*
- * Need to process indices at runtime. This could be because the
- * indices are not constants, or because we need to resolve them to
- * absolute indices to work out if a replacement is going to happen.
- * In any case, to runtime it is.
+ * When we have no replacement string to worry about, we may
+ * have more luck, because the forbidden empty string replacements
+ * are harmless when they are replaced by another empty string.
*/
+ if ((first == TCL_INDEX_BEFORE) || (first == TCL_INDEX_START)) {
+ /* empty prefix - build suffix only */
+
+ if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+ /* empty suffix too => empty result */
+ OP( POP); /* Pop original */
+ PUSH ( "");
+ return TCL_OK;
+ }
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ return TCL_OK;
+ } else {
+ if ((last == TCL_INDEX_END) || (last == TCL_INDEX_AFTER)) {
+ /* empty suffix - build prefix only */
+ OP44( STR_RANGE_IMM, 0, first-1);
+ return TCL_OK;
+ }
+ OP( DUP);
+ OP44( STR_RANGE_IMM, 0, first-1);
+ OP4( REVERSE, 2);
+ OP44( STR_RANGE_IMM, last + 1, TCL_INDEX_END);
+ OP1( STR_CONCAT1, 2);
+ return TCL_OK;
+ }
+ }
+
genericReplace:
- CompileWord(envPtr, valueTokenPtr, interp, 1);
tokenPtr = TokenAfter(valueTokenPtr);
CompileWord(envPtr, tokenPtr, interp, 2);
tokenPtr = TokenAfter(tokenPtr);
CompileWord(envPtr, tokenPtr, interp, 3);
- if (replacementTokenPtr != NULL) {
- CompileWord(envPtr, replacementTokenPtr, interp, 4);
+ if (parsePtr->numWords == 5) {
+ tokenPtr = TokenAfter(tokenPtr);
+ CompileWord(envPtr, tokenPtr, interp, 4);
} else {
PUSH( "");
}
OP( STR_REPLACE);
return TCL_OK;
- }
}
int
@@ -1319,7 +1362,7 @@ static int
UniCharIsHexDigit(
int character)
{
- return (character >= 0) && (character < 0x80) && isxdigit(character);
+ return (character >= 0) && (character < 0x80) && isxdigit(UCHAR(character));
}
StringClassDesc const tclStringClassTable[] = {
@@ -1468,8 +1511,8 @@ TclSubstCompile(
switch (tokenPtr->type) {
case TCL_TOKEN_TEXT:
- literal = TclRegisterNewLiteral(envPtr,
- tokenPtr->start, tokenPtr->size);
+ literal = TclRegisterLiteral(envPtr,
+ tokenPtr->start, tokenPtr->size, 0);
TclEmitPush(literal, envPtr);
TclAdvanceLines(&bline, tokenPtr->start,
tokenPtr->start + tokenPtr->size);
@@ -1478,7 +1521,7 @@ TclSubstCompile(
case TCL_TOKEN_BS:
length = TclParseBackslash(tokenPtr->start, tokenPtr->size,
NULL, buf);
- literal = TclRegisterNewLiteral(envPtr, buf, length);
+ literal = TclRegisterLiteral(envPtr, buf, length, 0);
TclEmitPush(literal, envPtr);
count++;
continue;
@@ -1914,10 +1957,10 @@ TclCompileSwitchCmd(
}
if (numWords % 2) {
abort:
- ckfree((char *) bodyToken);
- ckfree((char *) bodyTokenArray);
- ckfree((char *) bodyLines);
- ckfree((char *) bodyContLines);
+ ckfree(bodyToken);
+ ckfree(bodyTokenArray);
+ ckfree(bodyLines);
+ ckfree(bodyContLines);
return TCL_ERROR;
}
} else if (numWords % 2 || numWords == 0) {
@@ -2837,7 +2880,7 @@ TclCompileTryCmd(
}
if (objc > 0) {
int len;
- const char *varname = Tcl_GetStringFromObj(objv[0], &len);
+ const char *varname = TclGetStringFromObj(objv[0], &len);
resultVarIndices[i] = LocalScalar(varname, len, envPtr);
if (resultVarIndices[i] < 0) {
@@ -2849,7 +2892,7 @@ TclCompileTryCmd(
}
if (objc == 2) {
int len;
- const char *varname = Tcl_GetStringFromObj(objv[1], &len);
+ const char *varname = TclGetStringFromObj(objv[1], &len);
optionVarIndices[i] = LocalScalar(varname, len, envPtr);
if (optionVarIndices[i] < 0) {
@@ -3052,7 +3095,7 @@ IssueTryClausesInstructions(
OP4( DICT_GET, 1);
TclAdjustStackDepth(-1, envPtr);
OP44( LIST_RANGE_IMM, 0, len-1);
- p = Tcl_GetStringFromObj(matchClauses[i], &len);
+ p = TclGetStringFromObj(matchClauses[i], &len);
PushLiteral(envPtr, p, len);
OP( STR_EQ);
JUMP4( JUMP_FALSE, notECJumpSource);
@@ -3263,7 +3306,7 @@ IssueTryClausesFinallyInstructions(
OP4( DICT_GET, 1);
TclAdjustStackDepth(-1, envPtr);
OP44( LIST_RANGE_IMM, 0, len-1);
- p = Tcl_GetStringFromObj(matchClauses[i], &len);
+ p = TclGetStringFromObj(matchClauses[i], &len);
PushLiteral(envPtr, p, len);
OP( STR_EQ);
JUMP4( JUMP_FALSE, notECJumpSource);
@@ -3591,7 +3634,7 @@ TclCompileUnsetCmd(
const char *bytes;
int len;
- bytes = Tcl_GetStringFromObj(leadingWord, &len);
+ bytes = TclGetStringFromObj(leadingWord, &len);
if (i == 1 && len == 11 && !strncmp("-nocomplain", bytes, 11)) {
flags = 0;
haveFlags++;