summaryrefslogtreecommitdiffstats
path: root/generic/tclParse.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r--generic/tclParse.c304
1 files changed, 138 insertions, 166 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 96c2a10..3c984bf 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
-
+
#include "tclInt.h"
/*
@@ -182,13 +182,13 @@ static int ParseWhiteSpace(const char *src, int numBytes,
*
* TclParseInit --
*
- * Initialize the fields of a Tcl_Parse struct.
+ * Initialize the fields of a Tcl_Parse struct.
*
* Results:
- * None.
+ * None.
*
* Side effects:
- * The Tcl_Parse struct pointed to by parsePtr gets initialized.
+ * The Tcl_Parse struct pointed to by parsePtr gets initialized.
*
*----------------------------------------------------------------------
*/
@@ -251,7 +251,7 @@ Tcl_ParseCommand(
* command terminator. If zero, then close
* bracket has no special meaning. */
register Tcl_Parse *parsePtr)
- /* Structure to fill in with information about
+ /* Structure to fill in with information about
* the parsed command; any previous
* information in the structure is ignored. */
{
@@ -496,9 +496,10 @@ Tcl_ParseCommand(
* tokens representing the expanded list.
*/
- CONST char *listStart;
+ const char *listStart;
int growthNeeded = wordIndex + 2*elemCount
- parsePtr->numTokens;
+
parsePtr->numWords += elemCount - 1;
if (growthNeeded > 0) {
TclGrowParseTokenArray(parsePtr, growthNeeded);
@@ -672,7 +673,7 @@ ParseWhiteSpace(
if (p[1] != '\n') {
break;
}
- p+=2;
+ p += 2;
if (--numBytes == 0) {
*incompletePtr = 1;
break;
@@ -743,21 +744,21 @@ int
TclParseHex(
const char *src, /* First character to parse. */
int numBytes, /* Max number of byes to scan */
- Tcl_UniChar *resultPtr) /* Points to storage provided by caller where
- * the Tcl_UniChar resulting from the
+ int *resultPtr) /* Points to storage provided by caller where
+ * the character resulting from the
* conversion is to be written. */
{
- Tcl_UniChar result = 0;
+ int result = 0;
register const char *p = src;
while (numBytes--) {
unsigned char digit = UCHAR(*p);
- if (!isxdigit(digit)) {
+ if (!isxdigit(digit) || (result > 0x10fff)) {
break;
}
- ++p;
+ p++;
result <<= 4;
if (digit >= 'a') {
@@ -782,14 +783,14 @@ TclParseHex(
* sequence as defined by Tcl's parsing rules.
*
* Results:
- * Records at readPtr the number of bytes making up the backslash
- * sequence. Records at dst the UTF-8 encoded equivalent of that
- * backslash sequence. Returns the number of bytes written to dst, at
- * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
- * are not needed, but the return value is the same either way.
+ * Records at readPtr the number of bytes making up the backslash
+ * sequence. Records at dst the UTF-8 encoded equivalent of that
+ * backslash sequence. Returns the number of bytes written to dst, at
+ * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
+ * are not needed, but the return value is the same either way.
*
* Side effects:
- * None.
+ * None.
*
*----------------------------------------------------------------------
*/
@@ -807,7 +808,8 @@ TclParseBackslash(
* written there. */
{
register const char *p = src+1;
- Tcl_UniChar result;
+ Tcl_UniChar unichar;
+ int result;
int count;
char buf[TCL_UTF_MAX];
@@ -864,7 +866,7 @@ TclParseBackslash(
result = 0xb;
break;
case 'x':
- count += TclParseHex(p+1, numBytes-2, &result);
+ count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);
if (count == 2) {
/*
* No hexadigits -> This is just "x".
@@ -887,6 +889,15 @@ TclParseBackslash(
result = 'u';
}
break;
+ case 'U':
+ count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
+ if (count == 2) {
+ /*
+ * No hexadigits -> This is just "U".
+ */
+ result = 'U';
+ }
+ break;
case '\n':
count--;
do {
@@ -905,21 +916,21 @@ TclParseBackslash(
*/
if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */
- result = (unsigned char)(*p - '0');
+ result = *p - '0';
p++;
if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */
|| (UCHAR(*p) >= '8')) {
break;
}
count = 3;
- result = (unsigned char)((result << 3) + (*p - '0'));
+ result = (result << 3) + (*p - '0');
p++;
if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */
- || (UCHAR(*p) >= '8')) {
+ || (UCHAR(*p) >= '8') || (result >= 0x20)) {
break;
}
count = 4;
- result = (unsigned char)((result << 3) + (*p - '0'));
+ result = UCHAR((result << 3) + (*p - '0'));
break;
}
@@ -931,14 +942,15 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */
+ count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[TCL_UTF_MAX];
memcpy(utfBytes, p, (size_t) (numBytes - 1));
utfBytes[numBytes - 1] = '\0';
- count = Tcl_UtfToUniChar(utfBytes, &result) + 1;
+ count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;
}
+ result = unichar;
break;
}
@@ -946,7 +958,7 @@ TclParseBackslash(
if (readPtr != NULL) {
*readPtr = count;
}
- return Tcl_UniCharToUtf((int) result, dst);
+ return Tcl_UniCharToUtf(result, dst);
}
/*
@@ -958,11 +970,11 @@ TclParseBackslash(
* defined by Tcl's parsing rules.
*
* Results:
- * Records in parsePtr information about the parse. Returns the number of
- * bytes consumed.
+ * Records in parsePtr information about the parse. Returns the number of
+ * bytes consumed.
*
* Side effects:
- * None.
+ * None.
*
*----------------------------------------------------------------------
*/
@@ -1115,7 +1127,7 @@ ParseTokens(
}
/*
- * This is a variable reference. Call Tcl_ParseVarName to do all
+ * This is a variable reference. Call Tcl_ParseVarName to do all
* the dirty work of parsing the name.
*/
@@ -1139,15 +1151,14 @@ ParseTokens(
}
/*
- * Command substitution. Call Tcl_ParseCommand recursively (and
+ * Command substitution. Call Tcl_ParseCommand recursively (and
* repeatedly) to parse the nested command(s), then throw away the
* parse information.
*/
src++;
numBytes--;
- nestedPtr = (Tcl_Parse *)
- TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
+ nestedPtr = TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
while (1) {
if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1,
nestedPtr) != TCL_OK) {
@@ -1293,7 +1304,7 @@ Tcl_FreeParse(
* call to Tcl_ParseCommand. */
{
if (parsePtr->tokenPtr != parsePtr->staticTokens) {
- ckfree((char *) parsePtr->tokenPtr);
+ ckfree(parsePtr->tokenPtr);
parsePtr->tokenPtr = parsePtr->staticTokens;
}
}
@@ -1544,8 +1555,7 @@ Tcl_ParseVar(
{
register Tcl_Obj *objPtr;
int code;
- Tcl_Parse *parsePtr = (Tcl_Parse *)
- TclStackAlloc(interp, sizeof(Tcl_Parse));
+ Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse));
if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) {
TclStackFree(interp, parsePtr);
@@ -1628,7 +1638,7 @@ Tcl_ParseBraces(
* the string consists of all bytes up to the
* first null character. */
register Tcl_Parse *parsePtr,
- /* Structure to fill in with information about
+ /* Structure to fill in with information about
* the string. */
int append, /* Non-zero means append tokens to existing
* information in parsePtr; zero means ignore
@@ -1829,7 +1839,7 @@ Tcl_ParseQuotedString(
* the string consists of all bytes up to the
* first null character. */
register Tcl_Parse *parsePtr,
- /* Structure to fill in with information about
+ /* Structure to fill in with information about
* the string. */
int append, /* Non-zero means append tokens to existing
* information in parsePtr; zero means ignore
@@ -1877,33 +1887,42 @@ Tcl_ParseQuotedString(
/*
*----------------------------------------------------------------------
*
- * Tcl_SubstObj --
- *
- * This function performs the substitutions specified on the given string
- * as described in the user documentation for the "subst" Tcl command.
+ * TclSubstParse --
*
+ * Token parser used by the [subst] command. Parses the string made up of
+ * 'numBytes' bytes starting at 'bytes'. Parsing is controlled by the
+ * flags argument to provide support for the -nobackslashes, -nocommands,
+ * and -novariables options, as represented by the flag values
+ * TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES.
+ *
* Results:
- * A Tcl_Obj* containing the substituted string, or NULL to indicate that
- * an error occurred.
+ * None.
*
* Side effects:
- * See the user documentation.
+ * The Tcl_Parse struct '*parsePtr' is filled with parse results.
+ * The caller is expected to eventually call Tcl_FreeParse() to properly
+ * cleanup the value written there.
+ *
+ * If a parse error occurs, the Tcl_InterpState value '*statePtr' is
+ * filled with the state created by that error. When *statePtr is written
+ * to, the caller is expected to make the required calls to either
+ * Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the
+ * value written there.
*
*----------------------------------------------------------------------
*/
-Tcl_Obj *
-Tcl_SubstObj(
- Tcl_Interp *interp, /* Interpreter in which substitution occurs */
- Tcl_Obj *objPtr, /* The value to be substituted. */
- int flags) /* What substitutions to do. */
+void
+TclSubstParse(
+ Tcl_Interp *interp,
+ const char *bytes,
+ int numBytes,
+ int flags,
+ Tcl_Parse *parsePtr,
+ Tcl_InterpState *statePtr)
{
- int length, tokensLeft, code;
- Tcl_Token *endTokenPtr;
- Tcl_Obj *result, *errMsg = NULL;
- const char *p = TclGetStringFromObj(objPtr, &length);
- Tcl_Parse *parsePtr = (Tcl_Parse *)
- TclStackAlloc(interp, sizeof(Tcl_Parse));
+ int length = numBytes;
+ const char *p = bytes;
TclParseInit(interp, p, length, parsePtr);
@@ -1915,12 +1934,11 @@ Tcl_SubstObj(
if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {
/*
- * There was a parse error. Save the error message for possible
- * reporting later.
+ * There was a parse error. Save the interpreter state for possible
+ * error reporting later.
*/
- errMsg = Tcl_GetObjResult(interp);
- Tcl_IncrRefCount(errMsg);
+ *statePtr = Tcl_SaveInterpState(interp, TCL_ERROR);
/*
* We need to re-parse to get the portion of the string we can [subst]
@@ -1986,10 +2004,10 @@ Tcl_SubstObj(
parsePtr->tokenPtr + parsePtr->numTokens - 2;
if (varTokenPtr->type != TCL_TOKEN_VARIABLE) {
- Tcl_Panic("Tcl_SubstObj: programming error");
+ Tcl_Panic("TclSubstParse: programming error");
}
if (varTokenPtr[1].type != TCL_TOKEN_TEXT) {
- Tcl_Panic("Tcl_SubstObj: programming error");
+ Tcl_Panic("TclSubstParse: programming error");
}
parsePtr->numTokens -= 2;
}
@@ -2018,7 +2036,7 @@ Tcl_SubstObj(
Tcl_Token *tokenPtr;
const char *lastTerm = parsePtr->term;
- Tcl_Parse *nestedPtr = (Tcl_Parse *)
+ Tcl_Parse *nestedPtr =
TclStackAlloc(interp, sizeof(Tcl_Parse));
while (TCL_OK ==
@@ -2063,63 +2081,8 @@ Tcl_SubstObj(
break;
default:
- Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]);
- }
- }
-
- /*
- * Next, substitute the parsed tokens just as in normal Tcl evaluation.
- */
-
- endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens;
- tokensLeft = parsePtr->numTokens;
- code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
- &tokensLeft, 1, NULL, NULL);
- if (code == TCL_OK) {
- Tcl_FreeParse(parsePtr);
- TclStackFree(interp, parsePtr);
- if (errMsg != NULL) {
- Tcl_SetObjResult(interp, errMsg);
- Tcl_DecrRefCount(errMsg);
- return NULL;
- }
- return Tcl_GetObjResult(interp);
- }
-
- result = Tcl_NewObj();
- while (1) {
- switch (code) {
- case TCL_ERROR:
- Tcl_FreeParse(parsePtr);
- TclStackFree(interp, parsePtr);
- Tcl_DecrRefCount(result);
- if (errMsg != NULL) {
- Tcl_DecrRefCount(errMsg);
- }
- return NULL;
- case TCL_BREAK:
- tokensLeft = 0; /* Halt substitution */
- default:
- Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp));
+ Tcl_Panic("bad parse in TclSubstParse: %c", p[length]);
}
-
- if (tokensLeft == 0) {
- Tcl_FreeParse(parsePtr);
- TclStackFree(interp, parsePtr);
- if (errMsg != NULL) {
- if (code != TCL_BREAK) {
- Tcl_DecrRefCount(result);
- Tcl_SetObjResult(interp, errMsg);
- Tcl_DecrRefCount(errMsg);
- return NULL;
- }
- Tcl_DecrRefCount(errMsg);
- }
- return result;
- }
-
- code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
- &tokensLeft, 1, NULL, NULL);
}
}
@@ -2134,13 +2097,13 @@ Tcl_SubstObj(
* non-TCL_OK completion code arises.
*
* Results:
- * The return value is a standard Tcl completion code. The result in
- * interp is the substituted value, or an error message if TCL_ERROR is
- * returned. If tokensLeftPtr is not NULL, then it points to an int where
- * the number of tokens remaining to be processed is written.
+ * The return value is a standard Tcl completion code. The result in
+ * interp is the substituted value, or an error message if TCL_ERROR is
+ * returned. If tokensLeftPtr is not NULL, then it points to an int where
+ * the number of tokens remaining to be processed is written.
*
* Side effects:
- * Can be anything, depending on the types of substitution done.
+ * Can be anything, depending on the types of substitution done.
*
*----------------------------------------------------------------------
*/
@@ -2158,29 +2121,30 @@ TclSubstTokens(
* integer representing the number of tokens
* left to be substituted will be written */
int line, /* The line the script starts on. */
- int* clNextOuter, /* Information about an outer context for */
- CONST char* outerScript) /* continuation line data. This is set by
- * EvalEx() to properly handle [...]-nested
- * commands. The 'outerScript' refers to the
- * most-outer script containing the embedded
- * command, which is refered to by 'script'. The
- * 'clNextOuter' refers to the current entry in
- * the table of continuation lines in this
- * "master script", and the character offsets are
- * relative to the 'outerScript' as well.
- *
- * If outerScript == script, then this call is for
- * words in the outer-most script/command. See
- * Tcl_EvalEx() and TclEvalObjEx() for the places
- * generating arguments for which this is true.
- */
+ int *clNextOuter, /* Information about an outer context for */
+ const char *outerScript) /* continuation line data. This is set by
+ * EvalEx() to properly handle [...]-nested
+ * commands. The 'outerScript' refers to the
+ * most-outer script containing the embedded
+ * command, which is refered to by 'script'.
+ * The 'clNextOuter' refers to the current
+ * entry in the table of continuation lines in
+ * this "master script", and the character
+ * offsets are relative to the 'outerScript'
+ * as well.
+ *
+ * If outerScript == script, then this call is
+ * for words in the outer-most script or
+ * command. See Tcl_EvalEx and TclEvalObjEx
+ * for the places generating arguments for
+ * which this is true. */
{
Tcl_Obj *result;
int code = TCL_OK;
#define NUM_STATIC_POS 20
int isLiteral, maxNumCL, numCL, i, adjust;
- int* clPosition = NULL;
- Interp* iPtr = (Interp*) interp;
+ int *clPosition = NULL;
+ Interp *iPtr = (Interp *) interp;
int inFile = iPtr->evalFlags & TCL_EVAL_FILE;
/*
@@ -2197,24 +2161,24 @@ TclSubstTokens(
* For the handling of continuation lines in literals we first check if
* this is actually a literal. For if not we can forego the additional
* processing. Otherwise we pre-allocate a small table to store the
- * locations of all continuation lines we find in this literal, if
- * any. The table is extended if needed.
+ * locations of all continuation lines we find in this literal, if any.
+ * The table is extended if needed.
*/
- numCL = 0;
- maxNumCL = 0;
+ numCL = 0;
+ maxNumCL = 0;
isLiteral = 1;
for (i=0 ; i < count; i++) {
- if ((tokenPtr[i].type != TCL_TOKEN_TEXT) &&
- (tokenPtr[i].type != TCL_TOKEN_BS)) {
+ if ((tokenPtr[i].type != TCL_TOKEN_TEXT)
+ && (tokenPtr[i].type != TCL_TOKEN_BS)) {
isLiteral = 0;
break;
}
}
if (isLiteral) {
- maxNumCL = NUM_STATIC_POS;
- clPosition = (int*) ckalloc (maxNumCL*sizeof(int));
+ maxNumCL = NUM_STATIC_POS;
+ clPosition = ckalloc(maxNumCL * sizeof(int));
}
adjust = 0;
@@ -2235,6 +2199,7 @@ TclSubstTokens(
appendByteLength = TclParseBackslash(tokenPtr->start,
tokenPtr->size, NULL, utfCharBytes);
append = utfCharBytes;
+
/*
* If the backslash sequence we found is in a literal, and
* represented a continuation line, we compute and store its
@@ -2250,10 +2215,11 @@ TclSubstTokens(
* correction.
*/
- if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') &&
- (tokenPtr->start[1] == '\n')) {
+ if ((appendByteLength == 1) && (utfCharBytes[0] == ' ')
+ && (tokenPtr->start[1] == '\n')) {
if (isLiteral) {
int clPos;
+
if (result == 0) {
clPos = 0;
} else {
@@ -2262,19 +2228,18 @@ TclSubstTokens(
if (numCL >= maxNumCL) {
maxNumCL *= 2;
- clPosition = (int*) ckrealloc ((char*)clPosition,
- maxNumCL*sizeof(int));
+ clPosition = ckrealloc(clPosition,
+ maxNumCL * sizeof(int));
}
clPosition[numCL] = clPos;
- numCL ++;
+ numCL++;
}
- adjust ++;
+ adjust++;
}
break;
case TCL_TOKEN_COMMAND: {
- Interp *iPtr = (Interp *) interp;
-
+ /* TIP #280: Transfer line information to nested command */
iPtr->numLevels++;
code = TclInterpReady(interp);
if (code == TCL_OK) {
@@ -2283,21 +2248,27 @@ TclSubstTokens(
*/
int theline;
- TclAdvanceContinuations (&line, &clNextOuter,
- tokenPtr->start - outerScript);
+
+ TclAdvanceContinuations(&line, &clNextOuter,
+ tokenPtr->start - outerScript);
theline = line + adjust;
- /* TIP #280: Transfer line information to nested command */
code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2,
0, theline, clNextOuter, outerScript);
+
+ TclAdvanceLines(&line, tokenPtr->start+1,
+ tokenPtr->start + tokenPtr->size - 1);
+
/*
* Restore flag reset by nested eval for future bracketed
* commands and their cmdframe setup
*/
- if (inFile) {
+
+ if (inFile) {
iPtr->evalFlags |= TCL_EVAL_FILE;
}
}
iPtr->numLevels--;
+ TclResetCancellation(interp, 0);
appendObj = Tcl_GetObjResult(interp);
break;
}
@@ -2396,6 +2367,7 @@ TclSubstTokens(
if (code != TCL_ERROR) { /* Keep error message in result! */
if (result != NULL) {
Tcl_SetObjResult(interp, result);
+
/*
* If the code found continuation lines (which implies that this
* word is a literal), then we store the accumulated table of
@@ -2414,7 +2386,7 @@ TclSubstTokens(
*/
if (maxNumCL) {
- ckfree ((char*) clPosition);
+ ckfree(clPosition);
}
} else {
Tcl_ResetResult(interp);
@@ -2558,8 +2530,8 @@ TclIsLocalScalar(
const char *lastChar = src + (len - 1);
for (p=src ; p<=lastChar ; p++) {
- if ((CHAR_TYPE(*p) != TYPE_NORMAL) &&
- (CHAR_TYPE(*p) != TYPE_COMMAND_END)) {
+ if ((CHAR_TYPE(*p) != TYPE_NORMAL)
+ && (CHAR_TYPE(*p) != TYPE_COMMAND_END)) {
/*
* TCL_COMMAND_END is returned for the last character of the
* string. By this point we know it isn't an array or namespace