diff options
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r-- | generic/tclParse.c | 592 |
1 files changed, 326 insertions, 266 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 211a10b..ee0d4c4 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -11,11 +11,11 @@ * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclParse.c,v 1.59 2007/11/18 22:30:10 dkf Exp $ */ - + #include "tclInt.h" +#include "tclParse.h" +#include <assert.h> /* * The following table provides parsing information about each possible 8-bit @@ -43,18 +43,7 @@ * TYPE_BRACE - Character is a curly brace (either left or right). */ -#define TYPE_NORMAL 0 -#define TYPE_SPACE 0x1 -#define TYPE_COMMAND_END 0x2 -#define TYPE_SUBS 0x4 -#define TYPE_QUOTE 0x8 -#define TYPE_CLOSE_PAREN 0x10 -#define TYPE_CLOSE_BRACK 0x20 -#define TYPE_BRACE 0x40 - -#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)] - -static const char charTypeTable[] = { +const char tclCharTypeTable[] = { /* * Negative character values, from -128 to -1: */ @@ -184,13 +173,13 @@ static int ParseWhiteSpace(const char *src, int numBytes, * * TclParseInit -- * - * Initialize the fields of a Tcl_Parse struct. + * Initialize the fields of a Tcl_Parse struct. * * Results: - * None. + * None. * * Side effects: - * The Tcl_Parse struct pointed to by parsePtr gets initialized. + * The Tcl_Parse struct pointed to by parsePtr gets initialized. * *---------------------------------------------------------------------- */ @@ -198,7 +187,7 @@ static int ParseWhiteSpace(const char *src, int numBytes, void TclParseInit( Tcl_Interp *interp, /* Interpreter to use for error reporting */ - const char *string, /* String to be parsed. */ + const char *start, /* Start of string to be parsed. */ int numBytes, /* Total number of bytes in string. If < 0, * the script consists of all bytes up to the * first null character. */ @@ -208,8 +197,8 @@ TclParseInit( parsePtr->tokenPtr = parsePtr->staticTokens; parsePtr->numTokens = 0; parsePtr->tokensAvailable = NUM_STATIC_TOKENS; - parsePtr->string = string; - parsePtr->end = string + numBytes; + parsePtr->string = start; + parsePtr->end = start + numBytes; parsePtr->term = parsePtr->end; parsePtr->interp = interp; parsePtr->incomplete = 0; @@ -253,7 +242,7 @@ Tcl_ParseCommand( * command terminator. If zero, then close * bracket has no special meaning. */ register Tcl_Parse *parsePtr) - /* Structure to fill in with information about + /* Structure to fill in with information about * the parsed command; any previous * information in the structure is ignored. */ { @@ -270,7 +259,8 @@ Tcl_ParseCommand( if ((start == NULL) && (numBytes != 0)) { if (interp != NULL) { - Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC); + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "can't parse a NULL pointer", -1)); } return TCL_ERROR; } @@ -315,9 +305,7 @@ Tcl_ParseCommand( * Create the token for the word. */ - if (parsePtr->numTokens == parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 1); wordIndex = parsePtr->numTokens; tokenPtr = &parsePtr->tokenPtr[wordIndex]; tokenPtr->type = TCL_TOKEN_WORD; @@ -437,7 +425,7 @@ Tcl_ParseCommand( } if (isLiteral) { - int elemCount = 0, code = TCL_OK; + int elemCount = 0, code = TCL_OK, literal = 1; const char *nextElem, *listEnd, *elemStart; /* @@ -459,21 +447,28 @@ Tcl_ParseCommand( */ while (nextElem < listEnd) { + int size; + code = TclFindElement(NULL, nextElem, listEnd - nextElem, - &elemStart, &nextElem, NULL, NULL); - if (code != TCL_OK) break; + &elemStart, &nextElem, &size, &literal); + if ((code != TCL_OK) || !literal) { + break; + } if (elemStart < listEnd) { elemCount++; } } - if (code != TCL_OK) { + if ((code != TCL_OK) || !literal) { /* - * Some list element could not be parsed. This means the - * literal string was not in fact a valid list. Defer the - * handling of this to compile/eval time, where code is - * already in place to report the "attempt to expand a - * non-list" error. + * Some list element could not be parsed, or is not + * present as a literal substring of the script. The + * compiler cannot handle list elements that get generated + * by a call to TclCopyAndCollapse(). Defer the + * handling of this to compile/eval time, where code is + * already in place to report the "attempt to expand a + * non-list" error or expand lists that require + * substitution. */ tokenPtr->type = TCL_TOKEN_EXPAND_WORD; @@ -493,12 +488,16 @@ Tcl_ParseCommand( * tokens representing the expanded list. */ + const char *listStart; + int growthNeeded = wordIndex + 2*elemCount + - parsePtr->numTokens; + parsePtr->numWords += elemCount - 1; - parsePtr->numTokens = wordIndex + 2*elemCount; - while (parsePtr->numTokens >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); + if (growthNeeded > 0) { + TclGrowParseTokenArray(parsePtr, growthNeeded); + tokenPtr = &parsePtr->tokenPtr[wordIndex]; } - tokenPtr = &parsePtr->tokenPtr[wordIndex]; + parsePtr->numTokens = wordIndex + 2*elemCount; /* * Generate a TCL_TOKEN_SIMPLE_WORD token sequence for @@ -509,14 +508,12 @@ Tcl_ParseCommand( * word value. */ - nextElem = tokenPtr[1].start; - while (isspace(UCHAR(*nextElem))) { - nextElem++; - } + listStart = nextElem = tokenPtr[1].start; while (nextElem < listEnd) { + int quoted; + tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; tokenPtr->numComponents = 1; - tokenPtr->start = nextElem; tokenPtr++; tokenPtr->type = TCL_TOKEN_TEXT; @@ -524,14 +521,13 @@ Tcl_ParseCommand( TclFindElement(NULL, nextElem, listEnd - nextElem, &(tokenPtr->start), &nextElem, &(tokenPtr->size), NULL); - if (tokenPtr->start + tokenPtr->size == listEnd) { - tokenPtr[-1].size = listEnd - tokenPtr[-1].start; - } else { - tokenPtr[-1].size = tokenPtr->start - + tokenPtr->size - tokenPtr[-1].start; - tokenPtr[-1].size += (isspace(UCHAR( - tokenPtr->start[tokenPtr->size])) == 0); - } + + quoted = (tokenPtr->start[-1] == '{' + || tokenPtr->start[-1] == '"') + && tokenPtr->start > listStart; + tokenPtr[-1].start = tokenPtr->start - quoted; + tokenPtr[-1].size = tokenPtr->start + tokenPtr->size + - tokenPtr[-1].start + quoted; tokenPtr++; } @@ -574,14 +570,14 @@ Tcl_ParseCommand( } if (src[-1] == '"') { if (interp != NULL) { - Tcl_SetResult(interp, "extra characters after close-quote", - TCL_STATIC); + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "extra characters after close-quote", -1)); } parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; } else { if (interp != NULL) { - Tcl_SetResult(interp, "extra characters after close-brace", - TCL_STATIC); + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "extra characters after close-brace", -1)); } parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; } @@ -601,6 +597,30 @@ Tcl_ParseCommand( /* *---------------------------------------------------------------------- * + * TclIsSpaceProc -- + * + * Report whether byte is in the set of whitespace characters used by + * Tcl to separate words in scripts or elements in lists. + * + * Results: + * Returns 1, if byte is in the set, 0 otherwise. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclIsSpaceProc( + char byte) +{ + return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n'; +} + +/* + *---------------------------------------------------------------------- + * * ParseWhiteSpace -- * * Scans up to numBytes bytes starting at src, consuming white space @@ -645,7 +665,7 @@ ParseWhiteSpace( if (p[1] != '\n') { break; } - p+=2; + p += 2; if (--numBytes == 0) { *incompletePtr = 1; break; @@ -716,21 +736,21 @@ int TclParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ - Tcl_UniChar *resultPtr) /* Points to storage provided by caller where - * the Tcl_UniChar resulting from the + int *resultPtr) /* Points to storage provided by caller where + * the character resulting from the * conversion is to be written. */ { - Tcl_UniChar result = 0; + int result = 0; register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); - if (!isxdigit(digit)) { + if (!isxdigit(digit) || (result > 0x10fff)) { break; } - ++p; + p++; result <<= 4; if (digit >= 'a') { @@ -755,14 +775,14 @@ TclParseHex( * sequence as defined by Tcl's parsing rules. * * Results: - * Records at readPtr the number of bytes making up the backslash - * sequence. Records at dst the UTF-8 encoded equivalent of that - * backslash sequence. Returns the number of bytes written to dst, at - * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results - * are not needed, but the return value is the same either way. + * Records at readPtr the number of bytes making up the backslash + * sequence. Records at dst the UTF-8 encoded equivalent of that + * backslash sequence. Returns the number of bytes written to dst, at + * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + * are not needed, but the return value is the same either way. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -780,7 +800,8 @@ TclParseBackslash( * written there. */ { register const char *p = src+1; - Tcl_UniChar result; + Tcl_UniChar unichar; + int result; int count; char buf[TCL_UTF_MAX]; @@ -837,7 +858,7 @@ TclParseBackslash( result = 0xb; break; case 'x': - count += TclParseHex(p+1, numBytes-1, &result); + count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "x". @@ -852,7 +873,7 @@ TclParseBackslash( } break; case 'u': - count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result); + count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "u". @@ -860,6 +881,15 @@ TclParseBackslash( result = 'u'; } break; + case 'U': + count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); + if (count == 2) { + /* + * No hexadigits -> This is just "U". + */ + result = 'U'; + } + break; case '\n': count--; do { @@ -878,21 +908,21 @@ TclParseBackslash( */ if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ - result = (unsigned char)(*p - '0'); + result = *p - '0'; p++; if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 3; - result = (unsigned char)((result << 3) + (*p - '0')); + result = (result << 3) + (*p - '0'); p++; if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ - || (UCHAR(*p) >= '8')) { + || (UCHAR(*p) >= '8') || (result >= 0x20)) { break; } count = 4; - result = (unsigned char)((result << 3) + (*p - '0')); + result = UCHAR((result << 3) + (*p - '0')); break; } @@ -904,14 +934,15 @@ TclParseBackslash( */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ + count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; - count = Tcl_UtfToUniChar(utfBytes, &result) + 1; + count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1; } + result = unichar; break; } @@ -919,7 +950,7 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } - return Tcl_UniCharToUtf((int) result, dst); + return Tcl_UniCharToUtf(result, dst); } /* @@ -931,11 +962,11 @@ TclParseBackslash( * defined by Tcl's parsing rules. * * Results: - * Records in parsePtr information about the parse. Returns the number of - * bytes consumed. + * Records in parsePtr information about the parse. Returns the number of + * bytes consumed. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -954,9 +985,12 @@ ParseComment( char type; int scanned; - scanned = TclParseAllWhiteSpace(p, numBytes); - p += scanned; - numBytes -= scanned; + do { + scanned = ParseWhiteSpace(p, numBytes, + &parsePtr->incomplete, &type); + p += scanned; + numBytes -= scanned; + } while (numBytes && (*p == '\n') && (p++,numBytes--)); if ((numBytes == 0) || (*p != '#')) { break; @@ -1039,7 +1073,7 @@ ParseTokens( * termination information. */ { char type; - int originalTokens, varToken; + int originalTokens; int noSubstCmds = !(flags & TCL_SUBST_COMMANDS); int noSubstVars = !(flags & TCL_SUBST_VARIABLES); int noSubstBS = !(flags & TCL_SUBST_BACKSLASHES); @@ -1054,9 +1088,7 @@ ParseTokens( originalTokens = parsePtr->numTokens; while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) { - if (parsePtr->numTokens == parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 1); tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; tokenPtr->start = src; tokenPtr->numComponents = 0; @@ -1075,6 +1107,8 @@ ParseTokens( tokenPtr->size = src - tokenPtr->start; parsePtr->numTokens++; } else if (*src == '$') { + int varToken; + if (noSubstVars) { tokenPtr->type = TCL_TOKEN_TEXT; tokenPtr->size = 1; @@ -1116,8 +1150,7 @@ ParseTokens( src++; numBytes--; - nestedPtr = (Tcl_Parse *) - TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); + nestedPtr = TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); while (1) { if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, nestedPtr) != TCL_OK) { @@ -1144,8 +1177,8 @@ ParseTokens( } if (numBytes == 0) { if (parsePtr->interp != NULL) { - Tcl_SetResult(parsePtr->interp, - "missing close-bracket", TCL_STATIC); + Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( + "missing close-bracket", -1)); } parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->term = tokenPtr->start; @@ -1225,9 +1258,7 @@ ParseTokens( * empty range, so that there is always at least one token added. */ - if (parsePtr->numTokens == parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 1); tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; tokenPtr->start = src; tokenPtr->numComponents = 0; @@ -1265,7 +1296,7 @@ Tcl_FreeParse( * call to Tcl_ParseCommand. */ { if (parsePtr->tokenPtr != parsePtr->staticTokens) { - ckfree((char *) parsePtr->tokenPtr); + ckfree(parsePtr->tokenPtr); parsePtr->tokenPtr = parsePtr->staticTokens; } } @@ -1273,46 +1304,6 @@ Tcl_FreeParse( /* *---------------------------------------------------------------------- * - * TclExpandTokenArray -- - * - * This function is invoked when the current space for tokens in a - * Tcl_Parse structure fills up; it allocates memory to grow the token - * array - * - * Results: - * None. - * - * Side effects: - * Memory is allocated for a new larger token array; the memory for the - * old array is freed, if it had been dynamically allocated. - * - *---------------------------------------------------------------------- - */ - -void -TclExpandTokenArray( - Tcl_Parse *parsePtr) /* Parse structure whose token space has - * overflowed. */ -{ - int newCount = parsePtr->tokensAvailable*2; - - if (parsePtr->tokenPtr != parsePtr->staticTokens) { - parsePtr->tokenPtr = (Tcl_Token *) ckrealloc((char *) - parsePtr->tokenPtr, newCount * sizeof(Tcl_Token)); - } else { - Tcl_Token *newPtr = (Tcl_Token *) - ckalloc(newCount * sizeof(Tcl_Token)); - - memcpy(newPtr, parsePtr->tokenPtr, - (size_t) parsePtr->tokensAvailable * sizeof(Tcl_Token)); - parsePtr->tokenPtr = newPtr; - } - parsePtr->tokensAvailable = newCount; -} - -/* - *---------------------------------------------------------------------- - * * Tcl_ParseVarName -- * * Given a string starting with a $ sign, parse off a variable name and @@ -1377,9 +1368,7 @@ Tcl_ParseVarName( */ src = start; - if (parsePtr->numTokens+2 > parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 2); tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; tokenPtr->type = TCL_TOKEN_VARIABLE; tokenPtr->start = src; @@ -1424,8 +1413,8 @@ Tcl_ParseVarName( } if (numBytes == 0) { if (parsePtr->interp != NULL) { - Tcl_SetResult(parsePtr->interp, - "missing close-brace for variable name", TCL_STATIC); + Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( + "missing close-brace for variable name", -1)); } parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE; parsePtr->term = tokenPtr->start-1; @@ -1492,8 +1481,8 @@ Tcl_ParseVarName( } if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){ if (parsePtr->interp != NULL) { - Tcl_SetResult(parsePtr->interp, "missing )", - TCL_STATIC); + Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( + "missing )", -1)); } parsePtr->errorType = TCL_PARSE_MISSING_PAREN; parsePtr->term = src; @@ -1558,8 +1547,7 @@ Tcl_ParseVar( { register Tcl_Obj *objPtr; int code; - Tcl_Parse *parsePtr = (Tcl_Parse *) - TclStackAlloc(interp, sizeof(Tcl_Parse)); + Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { TclStackFree(interp, parsePtr); @@ -1579,7 +1567,8 @@ Tcl_ParseVar( } code = TclSubstTokens(interp, parsePtr->tokenPtr, parsePtr->numTokens, - NULL, 1); + NULL, 1, NULL, NULL); + Tcl_FreeParse(parsePtr); TclStackFree(interp, parsePtr); if (code != TCL_OK) { return NULL; @@ -1590,16 +1579,13 @@ Tcl_ParseVar( * At this point we should have an object containing the value of a * variable. Just return the string from that object. * - * This should have returned the object for the user to manage, but - * instead we have some weak reference to the string value in the object, - * which is why we make sure the object exists after resetting the result. - * This isn't ideal, but it's the best we can do with the current - * documented interface. -- hobbs + * Since TclSubstTokens above returned TCL_OK, we know that objPtr + * is shared. It is in both the interp result and the value of the + * variable. Returning the string relies on that to be true. */ - if (!Tcl_IsShared(objPtr)) { - Tcl_IncrRefCount(objPtr); - } + assert( Tcl_IsShared(objPtr) ); + Tcl_ResetResult(interp); return TclGetString(objPtr); } @@ -1642,7 +1628,7 @@ Tcl_ParseBraces( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1671,9 +1657,7 @@ Tcl_ParseBraces( src = start; startIndex = parsePtr->numTokens; - if (parsePtr->numTokens == parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 1); tokenPtr = &parsePtr->tokenPtr[startIndex]; tokenPtr->type = TCL_TOKEN_TEXT; tokenPtr->start = src+1; @@ -1736,9 +1720,7 @@ Tcl_ParseBraces( if (tokenPtr->size != 0) { parsePtr->numTokens++; } - if ((parsePtr->numTokens+1) >= parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 2); tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; tokenPtr->type = TCL_TOKEN_BS; tokenPtr->start = src; @@ -1773,7 +1755,8 @@ Tcl_ParseBraces( goto error; } - Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC); + Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( + "missing close-brace", -1)); /* * Guess if the problem is due to comments by searching the source string @@ -1794,9 +1777,9 @@ Tcl_ParseBraces( openBrace = 0; break; case '#' : - if (openBrace && isspace(UCHAR(src[-1]))) { - Tcl_AppendResult(parsePtr->interp, - ": possible unbalanced brace in comment", NULL); + if (openBrace && TclIsSpaceProc(src[-1])) { + Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), + ": possible unbalanced brace in comment", -1); goto error; } break; @@ -1847,7 +1830,7 @@ Tcl_ParseQuotedString( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1875,7 +1858,8 @@ Tcl_ParseQuotedString( } if (*parsePtr->term != '"') { if (parsePtr->interp != NULL) { - Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC); + Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( + "missing \"", -1)); } parsePtr->errorType = TCL_PARSE_MISSING_QUOTE; parsePtr->term = start; @@ -1895,33 +1879,42 @@ Tcl_ParseQuotedString( /* *---------------------------------------------------------------------- * - * Tcl_SubstObj -- - * - * This function performs the substitutions specified on the given string - * as described in the user documentation for the "subst" Tcl command. + * TclSubstParse -- * + * Token parser used by the [subst] command. Parses the string made up of + * 'numBytes' bytes starting at 'bytes'. Parsing is controlled by the + * flags argument to provide support for the -nobackslashes, -nocommands, + * and -novariables options, as represented by the flag values + * TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES. + * * Results: - * A Tcl_Obj* containing the substituted string, or NULL to indicate that - * an error occurred. + * None. * * Side effects: - * See the user documentation. + * The Tcl_Parse struct '*parsePtr' is filled with parse results. + * The caller is expected to eventually call Tcl_FreeParse() to properly + * cleanup the value written there. + * + * If a parse error occurs, the Tcl_InterpState value '*statePtr' is + * filled with the state created by that error. When *statePtr is written + * to, the caller is expected to make the required calls to either + * Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the + * value written there. * *---------------------------------------------------------------------- */ -Tcl_Obj * -Tcl_SubstObj( - Tcl_Interp *interp, /* Interpreter in which substitution occurs */ - Tcl_Obj *objPtr, /* The value to be substituted. */ - int flags) /* What substitutions to do. */ +void +TclSubstParse( + Tcl_Interp *interp, + const char *bytes, + int numBytes, + int flags, + Tcl_Parse *parsePtr, + Tcl_InterpState *statePtr) { - int length, tokensLeft, code; - Tcl_Token *endTokenPtr; - Tcl_Obj *result, *errMsg = NULL; - CONST char *p = TclGetStringFromObj(objPtr, &length); - Tcl_Parse *parsePtr = (Tcl_Parse *) - TclStackAlloc(interp, sizeof(Tcl_Parse)); + int length = numBytes; + const char *p = bytes; TclParseInit(interp, p, length, parsePtr); @@ -1933,12 +1926,11 @@ Tcl_SubstObj( if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) { /* - * There was a parse error. Save the error message for possible - * reporting later. + * There was a parse error. Save the interpreter state for possible + * error reporting later. */ - errMsg = Tcl_GetObjResult(interp); - Tcl_IncrRefCount(errMsg); + *statePtr = Tcl_SaveInterpState(interp, TCL_ERROR); /* * We need to re-parse to get the portion of the string we can [subst] @@ -2004,10 +1996,10 @@ Tcl_SubstObj( parsePtr->tokenPtr + parsePtr->numTokens - 2; if (varTokenPtr->type != TCL_TOKEN_VARIABLE) { - Tcl_Panic("Tcl_SubstObj: programming error"); + Tcl_Panic("TclSubstParse: programming error"); } if (varTokenPtr[1].type != TCL_TOKEN_TEXT) { - Tcl_Panic("Tcl_SubstObj: programming error"); + Tcl_Panic("TclSubstParse: programming error"); } parsePtr->numTokens -= 2; } @@ -2036,7 +2028,7 @@ Tcl_SubstObj( Tcl_Token *tokenPtr; const char *lastTerm = parsePtr->term; - Tcl_Parse *nestedPtr = (Tcl_Parse *) + Tcl_Parse *nestedPtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); while (TCL_OK == @@ -2070,9 +2062,7 @@ Tcl_SubstObj( * got parsed. */ - if (parsePtr->numTokens == parsePtr->tokensAvailable) { - TclExpandTokenArray(parsePtr); - } + TclGrowParseTokenArray(parsePtr, 1); tokenPtr = &(parsePtr->tokenPtr[parsePtr->numTokens]); tokenPtr->start = parsePtr->term; tokenPtr->numComponents = 0; @@ -2083,64 +2073,9 @@ Tcl_SubstObj( break; default: - Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]); + Tcl_Panic("bad parse in TclSubstParse: %c", p[length]); } } - - /* - * Next, substitute the parsed tokens just as in normal Tcl evaluation. - */ - - endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - tokensLeft = parsePtr->numTokens; - code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, - &tokensLeft, 1); - if (code == TCL_OK) { - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - if (errMsg != NULL) { - Tcl_SetObjResult(interp, errMsg); - Tcl_DecrRefCount(errMsg); - return NULL; - } - return Tcl_GetObjResult(interp); - } - - result = Tcl_NewObj(); - while (1) { - switch (code) { - case TCL_ERROR: - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - Tcl_DecrRefCount(result); - if (errMsg != NULL) { - Tcl_DecrRefCount(errMsg); - } - return NULL; - case TCL_BREAK: - tokensLeft = 0; /* Halt substitution */ - default: - Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp)); - } - - if (tokensLeft == 0) { - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - if (errMsg != NULL) { - if (code != TCL_BREAK) { - Tcl_DecrRefCount(result); - Tcl_SetObjResult(interp, errMsg); - Tcl_DecrRefCount(errMsg); - return NULL; - } - Tcl_DecrRefCount(errMsg); - } - return result; - } - - code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, - &tokensLeft, 1); - } } /* @@ -2154,13 +2089,13 @@ Tcl_SubstObj( * non-TCL_OK completion code arises. * * Results: - * The return value is a standard Tcl completion code. The result in - * interp is the substituted value, or an error message if TCL_ERROR is - * returned. If tokensLeftPtr is not NULL, then it points to an int where - * the number of tokens remaining to be processed is written. + * The return value is a standard Tcl completion code. The result in + * interp is the substituted value, or an error message if TCL_ERROR is + * returned. If tokensLeftPtr is not NULL, then it points to an int where + * the number of tokens remaining to be processed is written. * * Side effects: - * Can be anything, depending on the types of substitution done. + * Can be anything, depending on the types of substitution done. * *---------------------------------------------------------------------- */ @@ -2177,10 +2112,32 @@ TclSubstTokens( int *tokensLeftPtr, /* If not NULL, points to memory where an * integer representing the number of tokens * left to be substituted will be written */ - int line) /* The line the script starts on. */ + int line, /* The line the script starts on. */ + int *clNextOuter, /* Information about an outer context for */ + const char *outerScript) /* continuation line data. This is set by + * EvalEx() to properly handle [...]-nested + * commands. The 'outerScript' refers to the + * most-outer script containing the embedded + * command, which is refered to by 'script'. + * The 'clNextOuter' refers to the current + * entry in the table of continuation lines in + * this "master script", and the character + * offsets are relative to the 'outerScript' + * as well. + * + * If outerScript == script, then this call is + * for words in the outer-most script or + * command. See Tcl_EvalEx and TclEvalObjEx + * for the places generating arguments for + * which this is true. */ { Tcl_Obj *result; int code = TCL_OK; +#define NUM_STATIC_POS 20 + int isLiteral, maxNumCL, numCL, i, adjust; + int *clPosition = NULL; + Interp *iPtr = (Interp *) interp; + int inFile = iPtr->evalFlags & TCL_EVAL_FILE; /* * Each pass through this loop will substitute one token, and its @@ -2192,6 +2149,31 @@ TclSubstTokens( * of Tcl_SetObjResult(interp, Tcl_GetObjResult(interp)) and omit them. */ + /* + * For the handling of continuation lines in literals we first check if + * this is actually a literal. For if not we can forego the additional + * processing. Otherwise we pre-allocate a small table to store the + * locations of all continuation lines we find in this literal, if any. + * The table is extended if needed. + */ + + numCL = 0; + maxNumCL = 0; + isLiteral = 1; + for (i=0 ; i < count; i++) { + if ((tokenPtr[i].type != TCL_TOKEN_TEXT) + && (tokenPtr[i].type != TCL_TOKEN_BS)) { + isLiteral = 0; + break; + } + } + + if (isLiteral) { + maxNumCL = NUM_STATIC_POS; + clPosition = ckalloc(maxNumCL * sizeof(int)); + } + + adjust = 0; result = NULL; for (; count>0 && code==TCL_OK ; count--, tokenPtr++) { Tcl_Obj *appendObj = NULL; @@ -2206,22 +2188,79 @@ TclSubstTokens( break; case TCL_TOKEN_BS: - appendByteLength = Tcl_UtfBackslash(tokenPtr->start, NULL, - utfCharBytes); + appendByteLength = TclParseBackslash(tokenPtr->start, + tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; + + /* + * If the backslash sequence we found is in a literal, and + * represented a continuation line, we compute and store its + * location (as char offset to the beginning of the _result_ + * script). We may have to extend the table of locations. + * + * Note that the continuation line information is relevant even if + * the word we are processing is not a literal, as it can affect + * nested commands. See the branch for TCL_TOKEN_COMMAND below, + * where the adjustment we are tracking here is taken into + * account. The good thing is that we do not need a table of + * everything, just the number of lines we have to add as + * correction. + */ + + if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') + && (tokenPtr->start[1] == '\n')) { + if (isLiteral) { + int clPos; + + if (result == 0) { + clPos = 0; + } else { + Tcl_GetStringFromObj(result, &clPos); + } + + if (numCL >= maxNumCL) { + maxNumCL *= 2; + clPosition = ckrealloc(clPosition, + maxNumCL * sizeof(int)); + } + clPosition[numCL] = clPos; + numCL++; + } + adjust++; + } break; case TCL_TOKEN_COMMAND: { - Interp *iPtr = (Interp *) interp; - + /* TIP #280: Transfer line information to nested command */ iPtr->numLevels++; code = TclInterpReady(interp); if (code == TCL_OK) { - /* TIP #280: Transfer line information to nested command */ + /* + * Test cases: info-30.{6,8,9} + */ + + int theline; + + TclAdvanceContinuations(&line, &clNextOuter, + tokenPtr->start - outerScript); + theline = line + adjust; code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, - 0, line); + 0, theline, clNextOuter, outerScript); + + TclAdvanceLines(&line, tokenPtr->start+1, + tokenPtr->start + tokenPtr->size - 1); + + /* + * Restore flag reset by nested eval for future bracketed + * commands and their cmdframe setup + */ + + if (inFile) { + iPtr->evalFlags |= TCL_EVAL_FILE; + } } iPtr->numLevels--; + TclResetCancellation(interp, 0); appendObj = Tcl_GetObjResult(interp); break; } @@ -2236,7 +2275,7 @@ TclSubstTokens( */ code = TclSubstTokens(interp, tokenPtr+2, - tokenPtr->numComponents - 1, NULL, line); + tokenPtr->numComponents - 1, NULL, line, NULL, NULL); arrayIndex = Tcl_GetObjResult(interp); Tcl_IncrRefCount(arrayIndex); } @@ -2320,6 +2359,27 @@ TclSubstTokens( if (code != TCL_ERROR) { /* Keep error message in result! */ if (result != NULL) { Tcl_SetObjResult(interp, result); + + /* + * If the code found continuation lines (which implies that this + * word is a literal), then we store the accumulated table of + * locations in the thread-global data structure for the bytecode + * compiler to find later, assuming that the literal is a script + * which will be compiled. + */ + + if (numCL) { + TclContinuationsEnter(result, numCL, clPosition); + } + + /* + * Release the temp table we used to collect the locations of + * continuation lines, if any. + */ + + if (maxNumCL) { + ckfree(clPosition); + } } else { Tcl_ResetResult(interp); } @@ -2462,8 +2522,8 @@ TclIsLocalScalar( const char *lastChar = src + (len - 1); for (p=src ; p<=lastChar ; p++) { - if ((CHAR_TYPE(*p) != TYPE_NORMAL) && - (CHAR_TYPE(*p) != TYPE_COMMAND_END)) { + if ((CHAR_TYPE(*p) != TYPE_NORMAL) + && (CHAR_TYPE(*p) != TYPE_COMMAND_END)) { /* * TCL_COMMAND_END is returned for the last character of the * string. By this point we know it isn't an array or namespace |