diff options
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r-- | generic/tclParse.c | 304 |
1 files changed, 138 insertions, 166 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 96c2a10..3c984bf 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ - + #include "tclInt.h" /* @@ -182,13 +182,13 @@ static int ParseWhiteSpace(const char *src, int numBytes, * * TclParseInit -- * - * Initialize the fields of a Tcl_Parse struct. + * Initialize the fields of a Tcl_Parse struct. * * Results: - * None. + * None. * * Side effects: - * The Tcl_Parse struct pointed to by parsePtr gets initialized. + * The Tcl_Parse struct pointed to by parsePtr gets initialized. * *---------------------------------------------------------------------- */ @@ -251,7 +251,7 @@ Tcl_ParseCommand( * command terminator. If zero, then close * bracket has no special meaning. */ register Tcl_Parse *parsePtr) - /* Structure to fill in with information about + /* Structure to fill in with information about * the parsed command; any previous * information in the structure is ignored. */ { @@ -496,9 +496,10 @@ Tcl_ParseCommand( * tokens representing the expanded list. */ - CONST char *listStart; + const char *listStart; int growthNeeded = wordIndex + 2*elemCount - parsePtr->numTokens; + parsePtr->numWords += elemCount - 1; if (growthNeeded > 0) { TclGrowParseTokenArray(parsePtr, growthNeeded); @@ -672,7 +673,7 @@ ParseWhiteSpace( if (p[1] != '\n') { break; } - p+=2; + p += 2; if (--numBytes == 0) { *incompletePtr = 1; break; @@ -743,21 +744,21 @@ int TclParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ - Tcl_UniChar *resultPtr) /* Points to storage provided by caller where - * the Tcl_UniChar resulting from the + int *resultPtr) /* Points to storage provided by caller where + * the character resulting from the * conversion is to be written. */ { - Tcl_UniChar result = 0; + int result = 0; register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); - if (!isxdigit(digit)) { + if (!isxdigit(digit) || (result > 0x10fff)) { break; } - ++p; + p++; result <<= 4; if (digit >= 'a') { @@ -782,14 +783,14 @@ TclParseHex( * sequence as defined by Tcl's parsing rules. * * Results: - * Records at readPtr the number of bytes making up the backslash - * sequence. Records at dst the UTF-8 encoded equivalent of that - * backslash sequence. Returns the number of bytes written to dst, at - * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results - * are not needed, but the return value is the same either way. + * Records at readPtr the number of bytes making up the backslash + * sequence. Records at dst the UTF-8 encoded equivalent of that + * backslash sequence. Returns the number of bytes written to dst, at + * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + * are not needed, but the return value is the same either way. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -807,7 +808,8 @@ TclParseBackslash( * written there. */ { register const char *p = src+1; - Tcl_UniChar result; + Tcl_UniChar unichar; + int result; int count; char buf[TCL_UTF_MAX]; @@ -864,7 +866,7 @@ TclParseBackslash( result = 0xb; break; case 'x': - count += TclParseHex(p+1, numBytes-2, &result); + count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "x". @@ -887,6 +889,15 @@ TclParseBackslash( result = 'u'; } break; + case 'U': + count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); + if (count == 2) { + /* + * No hexadigits -> This is just "U". + */ + result = 'U'; + } + break; case '\n': count--; do { @@ -905,21 +916,21 @@ TclParseBackslash( */ if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ - result = (unsigned char)(*p - '0'); + result = *p - '0'; p++; if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 3; - result = (unsigned char)((result << 3) + (*p - '0')); + result = (result << 3) + (*p - '0'); p++; if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ - || (UCHAR(*p) >= '8')) { + || (UCHAR(*p) >= '8') || (result >= 0x20)) { break; } count = 4; - result = (unsigned char)((result << 3) + (*p - '0')); + result = UCHAR((result << 3) + (*p - '0')); break; } @@ -931,14 +942,15 @@ TclParseBackslash( */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ + count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; - count = Tcl_UtfToUniChar(utfBytes, &result) + 1; + count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1; } + result = unichar; break; } @@ -946,7 +958,7 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } - return Tcl_UniCharToUtf((int) result, dst); + return Tcl_UniCharToUtf(result, dst); } /* @@ -958,11 +970,11 @@ TclParseBackslash( * defined by Tcl's parsing rules. * * Results: - * Records in parsePtr information about the parse. Returns the number of - * bytes consumed. + * Records in parsePtr information about the parse. Returns the number of + * bytes consumed. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -1115,7 +1127,7 @@ ParseTokens( } /* - * This is a variable reference. Call Tcl_ParseVarName to do all + * This is a variable reference. Call Tcl_ParseVarName to do all * the dirty work of parsing the name. */ @@ -1139,15 +1151,14 @@ ParseTokens( } /* - * Command substitution. Call Tcl_ParseCommand recursively (and + * Command substitution. Call Tcl_ParseCommand recursively (and * repeatedly) to parse the nested command(s), then throw away the * parse information. */ src++; numBytes--; - nestedPtr = (Tcl_Parse *) - TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); + nestedPtr = TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); while (1) { if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, nestedPtr) != TCL_OK) { @@ -1293,7 +1304,7 @@ Tcl_FreeParse( * call to Tcl_ParseCommand. */ { if (parsePtr->tokenPtr != parsePtr->staticTokens) { - ckfree((char *) parsePtr->tokenPtr); + ckfree(parsePtr->tokenPtr); parsePtr->tokenPtr = parsePtr->staticTokens; } } @@ -1544,8 +1555,7 @@ Tcl_ParseVar( { register Tcl_Obj *objPtr; int code; - Tcl_Parse *parsePtr = (Tcl_Parse *) - TclStackAlloc(interp, sizeof(Tcl_Parse)); + Tcl_Parse *parsePtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { TclStackFree(interp, parsePtr); @@ -1628,7 +1638,7 @@ Tcl_ParseBraces( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1829,7 +1839,7 @@ Tcl_ParseQuotedString( * the string consists of all bytes up to the * first null character. */ register Tcl_Parse *parsePtr, - /* Structure to fill in with information about + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1877,33 +1887,42 @@ Tcl_ParseQuotedString( /* *---------------------------------------------------------------------- * - * Tcl_SubstObj -- - * - * This function performs the substitutions specified on the given string - * as described in the user documentation for the "subst" Tcl command. + * TclSubstParse -- * + * Token parser used by the [subst] command. Parses the string made up of + * 'numBytes' bytes starting at 'bytes'. Parsing is controlled by the + * flags argument to provide support for the -nobackslashes, -nocommands, + * and -novariables options, as represented by the flag values + * TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES. + * * Results: - * A Tcl_Obj* containing the substituted string, or NULL to indicate that - * an error occurred. + * None. * * Side effects: - * See the user documentation. + * The Tcl_Parse struct '*parsePtr' is filled with parse results. + * The caller is expected to eventually call Tcl_FreeParse() to properly + * cleanup the value written there. + * + * If a parse error occurs, the Tcl_InterpState value '*statePtr' is + * filled with the state created by that error. When *statePtr is written + * to, the caller is expected to make the required calls to either + * Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the + * value written there. * *---------------------------------------------------------------------- */ -Tcl_Obj * -Tcl_SubstObj( - Tcl_Interp *interp, /* Interpreter in which substitution occurs */ - Tcl_Obj *objPtr, /* The value to be substituted. */ - int flags) /* What substitutions to do. */ +void +TclSubstParse( + Tcl_Interp *interp, + const char *bytes, + int numBytes, + int flags, + Tcl_Parse *parsePtr, + Tcl_InterpState *statePtr) { - int length, tokensLeft, code; - Tcl_Token *endTokenPtr; - Tcl_Obj *result, *errMsg = NULL; - const char *p = TclGetStringFromObj(objPtr, &length); - Tcl_Parse *parsePtr = (Tcl_Parse *) - TclStackAlloc(interp, sizeof(Tcl_Parse)); + int length = numBytes; + const char *p = bytes; TclParseInit(interp, p, length, parsePtr); @@ -1915,12 +1934,11 @@ Tcl_SubstObj( if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) { /* - * There was a parse error. Save the error message for possible - * reporting later. + * There was a parse error. Save the interpreter state for possible + * error reporting later. */ - errMsg = Tcl_GetObjResult(interp); - Tcl_IncrRefCount(errMsg); + *statePtr = Tcl_SaveInterpState(interp, TCL_ERROR); /* * We need to re-parse to get the portion of the string we can [subst] @@ -1986,10 +2004,10 @@ Tcl_SubstObj( parsePtr->tokenPtr + parsePtr->numTokens - 2; if (varTokenPtr->type != TCL_TOKEN_VARIABLE) { - Tcl_Panic("Tcl_SubstObj: programming error"); + Tcl_Panic("TclSubstParse: programming error"); } if (varTokenPtr[1].type != TCL_TOKEN_TEXT) { - Tcl_Panic("Tcl_SubstObj: programming error"); + Tcl_Panic("TclSubstParse: programming error"); } parsePtr->numTokens -= 2; } @@ -2018,7 +2036,7 @@ Tcl_SubstObj( Tcl_Token *tokenPtr; const char *lastTerm = parsePtr->term; - Tcl_Parse *nestedPtr = (Tcl_Parse *) + Tcl_Parse *nestedPtr = TclStackAlloc(interp, sizeof(Tcl_Parse)); while (TCL_OK == @@ -2063,63 +2081,8 @@ Tcl_SubstObj( break; default: - Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]); - } - } - - /* - * Next, substitute the parsed tokens just as in normal Tcl evaluation. - */ - - endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; - tokensLeft = parsePtr->numTokens; - code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, - &tokensLeft, 1, NULL, NULL); - if (code == TCL_OK) { - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - if (errMsg != NULL) { - Tcl_SetObjResult(interp, errMsg); - Tcl_DecrRefCount(errMsg); - return NULL; - } - return Tcl_GetObjResult(interp); - } - - result = Tcl_NewObj(); - while (1) { - switch (code) { - case TCL_ERROR: - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - Tcl_DecrRefCount(result); - if (errMsg != NULL) { - Tcl_DecrRefCount(errMsg); - } - return NULL; - case TCL_BREAK: - tokensLeft = 0; /* Halt substitution */ - default: - Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp)); + Tcl_Panic("bad parse in TclSubstParse: %c", p[length]); } - - if (tokensLeft == 0) { - Tcl_FreeParse(parsePtr); - TclStackFree(interp, parsePtr); - if (errMsg != NULL) { - if (code != TCL_BREAK) { - Tcl_DecrRefCount(result); - Tcl_SetObjResult(interp, errMsg); - Tcl_DecrRefCount(errMsg); - return NULL; - } - Tcl_DecrRefCount(errMsg); - } - return result; - } - - code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, - &tokensLeft, 1, NULL, NULL); } } @@ -2134,13 +2097,13 @@ Tcl_SubstObj( * non-TCL_OK completion code arises. * * Results: - * The return value is a standard Tcl completion code. The result in - * interp is the substituted value, or an error message if TCL_ERROR is - * returned. If tokensLeftPtr is not NULL, then it points to an int where - * the number of tokens remaining to be processed is written. + * The return value is a standard Tcl completion code. The result in + * interp is the substituted value, or an error message if TCL_ERROR is + * returned. If tokensLeftPtr is not NULL, then it points to an int where + * the number of tokens remaining to be processed is written. * * Side effects: - * Can be anything, depending on the types of substitution done. + * Can be anything, depending on the types of substitution done. * *---------------------------------------------------------------------- */ @@ -2158,29 +2121,30 @@ TclSubstTokens( * integer representing the number of tokens * left to be substituted will be written */ int line, /* The line the script starts on. */ - int* clNextOuter, /* Information about an outer context for */ - CONST char* outerScript) /* continuation line data. This is set by - * EvalEx() to properly handle [...]-nested - * commands. The 'outerScript' refers to the - * most-outer script containing the embedded - * command, which is refered to by 'script'. The - * 'clNextOuter' refers to the current entry in - * the table of continuation lines in this - * "master script", and the character offsets are - * relative to the 'outerScript' as well. - * - * If outerScript == script, then this call is for - * words in the outer-most script/command. See - * Tcl_EvalEx() and TclEvalObjEx() for the places - * generating arguments for which this is true. - */ + int *clNextOuter, /* Information about an outer context for */ + const char *outerScript) /* continuation line data. This is set by + * EvalEx() to properly handle [...]-nested + * commands. The 'outerScript' refers to the + * most-outer script containing the embedded + * command, which is refered to by 'script'. + * The 'clNextOuter' refers to the current + * entry in the table of continuation lines in + * this "master script", and the character + * offsets are relative to the 'outerScript' + * as well. + * + * If outerScript == script, then this call is + * for words in the outer-most script or + * command. See Tcl_EvalEx and TclEvalObjEx + * for the places generating arguments for + * which this is true. */ { Tcl_Obj *result; int code = TCL_OK; #define NUM_STATIC_POS 20 int isLiteral, maxNumCL, numCL, i, adjust; - int* clPosition = NULL; - Interp* iPtr = (Interp*) interp; + int *clPosition = NULL; + Interp *iPtr = (Interp *) interp; int inFile = iPtr->evalFlags & TCL_EVAL_FILE; /* @@ -2197,24 +2161,24 @@ TclSubstTokens( * For the handling of continuation lines in literals we first check if * this is actually a literal. For if not we can forego the additional * processing. Otherwise we pre-allocate a small table to store the - * locations of all continuation lines we find in this literal, if - * any. The table is extended if needed. + * locations of all continuation lines we find in this literal, if any. + * The table is extended if needed. */ - numCL = 0; - maxNumCL = 0; + numCL = 0; + maxNumCL = 0; isLiteral = 1; for (i=0 ; i < count; i++) { - if ((tokenPtr[i].type != TCL_TOKEN_TEXT) && - (tokenPtr[i].type != TCL_TOKEN_BS)) { + if ((tokenPtr[i].type != TCL_TOKEN_TEXT) + && (tokenPtr[i].type != TCL_TOKEN_BS)) { isLiteral = 0; break; } } if (isLiteral) { - maxNumCL = NUM_STATIC_POS; - clPosition = (int*) ckalloc (maxNumCL*sizeof(int)); + maxNumCL = NUM_STATIC_POS; + clPosition = ckalloc(maxNumCL * sizeof(int)); } adjust = 0; @@ -2235,6 +2199,7 @@ TclSubstTokens( appendByteLength = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; + /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its @@ -2250,10 +2215,11 @@ TclSubstTokens( * correction. */ - if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') && - (tokenPtr->start[1] == '\n')) { + if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') + && (tokenPtr->start[1] == '\n')) { if (isLiteral) { int clPos; + if (result == 0) { clPos = 0; } else { @@ -2262,19 +2228,18 @@ TclSubstTokens( if (numCL >= maxNumCL) { maxNumCL *= 2; - clPosition = (int*) ckrealloc ((char*)clPosition, - maxNumCL*sizeof(int)); + clPosition = ckrealloc(clPosition, + maxNumCL * sizeof(int)); } clPosition[numCL] = clPos; - numCL ++; + numCL++; } - adjust ++; + adjust++; } break; case TCL_TOKEN_COMMAND: { - Interp *iPtr = (Interp *) interp; - + /* TIP #280: Transfer line information to nested command */ iPtr->numLevels++; code = TclInterpReady(interp); if (code == TCL_OK) { @@ -2283,21 +2248,27 @@ TclSubstTokens( */ int theline; - TclAdvanceContinuations (&line, &clNextOuter, - tokenPtr->start - outerScript); + + TclAdvanceContinuations(&line, &clNextOuter, + tokenPtr->start - outerScript); theline = line + adjust; - /* TIP #280: Transfer line information to nested command */ code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, 0, theline, clNextOuter, outerScript); + + TclAdvanceLines(&line, tokenPtr->start+1, + tokenPtr->start + tokenPtr->size - 1); + /* * Restore flag reset by nested eval for future bracketed * commands and their cmdframe setup */ - if (inFile) { + + if (inFile) { iPtr->evalFlags |= TCL_EVAL_FILE; } } iPtr->numLevels--; + TclResetCancellation(interp, 0); appendObj = Tcl_GetObjResult(interp); break; } @@ -2396,6 +2367,7 @@ TclSubstTokens( if (code != TCL_ERROR) { /* Keep error message in result! */ if (result != NULL) { Tcl_SetObjResult(interp, result); + /* * If the code found continuation lines (which implies that this * word is a literal), then we store the accumulated table of @@ -2414,7 +2386,7 @@ TclSubstTokens( */ if (maxNumCL) { - ckfree ((char*) clPosition); + ckfree(clPosition); } } else { Tcl_ResetResult(interp); @@ -2558,8 +2530,8 @@ TclIsLocalScalar( const char *lastChar = src + (len - 1); for (p=src ; p<=lastChar ; p++) { - if ((CHAR_TYPE(*p) != TYPE_NORMAL) && - (CHAR_TYPE(*p) != TYPE_COMMAND_END)) { + if ((CHAR_TYPE(*p) != TYPE_NORMAL) + && (CHAR_TYPE(*p) != TYPE_COMMAND_END)) { /* * TCL_COMMAND_END is returned for the last character of the * string. By this point we know it isn't an array or namespace |