diff options
Diffstat (limited to 'generic/tclParse.c')
| -rw-r--r-- | generic/tclParse.c | 715 |
1 files changed, 399 insertions, 316 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 281eee5..b40b636 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -5,8 +5,8 @@ * general-purpose fashion that can be used for many different purposes, * including compilation, direct execution, code analysis, etc. * - * Copyright © 1997 Sun Microsystems, Inc. - * Copyright © 1998-2000 Ajuba Solutions. + * Copyright (c) 1997 Sun Microsystems, Inc. + * Copyright (c) 1998-2000 Ajuba Solutions. * Contributions from Don Porter, NIST, 2002. (not subject to US copyright) * * See the file "license.terms" for information on usage and redistribution of @@ -14,12 +14,15 @@ */ #include "tclInt.h" -#include "tclParse.h" -#include <assert.h> /* * The following table provides parsing information about each possible 8-bit - * character. The table is designed to be referenced with unsigned characters. + * character. The table is designed to be referenced with either signed or + * unsigned characters, so it has 384 entries. The first 128 entries + * correspond to negative character values, the next 256 correspond to + * positive character values. The last 128 entries are identical to the first + * 128. The table is always indexed with a 128-byte offset (the 128th entry + * corresponds to a character value of 0). * * The macro CHAR_TYPE is used to index into the table and return information * about its character argument. The following return values are defined. @@ -38,7 +41,54 @@ * TYPE_BRACE - Character is a curly brace (either left or right). */ -const char tclCharTypeTable[] = { +#define TYPE_NORMAL 0 +#define TYPE_SPACE 0x1 +#define TYPE_COMMAND_END 0x2 +#define TYPE_SUBS 0x4 +#define TYPE_QUOTE 0x8 +#define TYPE_CLOSE_PAREN 0x10 +#define TYPE_CLOSE_BRACK 0x20 +#define TYPE_BRACE 0x40 + +#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)] + +static const char charTypeTable[] = { + /* + * Negative character values, from -128 to -1: + */ + + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, + TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, /* * Positive character values, from 0-127: @@ -119,30 +169,26 @@ const char tclCharTypeTable[] = { * Prototypes for local functions defined in this file: */ -static int CommandComplete(const char *script, int numBytes); +static inline int CommandComplete(const char *script, int numBytes); static int ParseComment(const char *src, int numBytes, Tcl_Parse *parsePtr); static int ParseTokens(const char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); static int ParseWhiteSpace(const char *src, int numBytes, int *incompletePtr, char *typePtr); -static int ParseAllWhiteSpace(const char *src, int numBytes, - int *incompletePtr); -static int ParseHex(const char *src, int numBytes, - int *resultPtr); /* *---------------------------------------------------------------------- * * TclParseInit -- * - * Initialize the fields of a Tcl_Parse struct. + * Initialize the fields of a Tcl_Parse struct. * * Results: - * None. + * None. * * Side effects: - * The Tcl_Parse struct pointed to by parsePtr gets initialized. + * The Tcl_Parse struct pointed to by parsePtr gets initialized. * *---------------------------------------------------------------------- */ @@ -197,19 +243,19 @@ Tcl_ParseCommand( * NULL, then no error message is provided. */ const char *start, /* First character of string containing one or * more Tcl commands. */ - int numBytes, /* Total number of bytes in string. If < 0, + register int numBytes, /* Total number of bytes in string. If < 0, * the script consists of all bytes up to the * first null character. */ int nested, /* Non-zero means this is a nested command: * close bracket should be considered a * command terminator. If zero, then close * bracket has no special meaning. */ - Tcl_Parse *parsePtr) - /* Structure to fill in with information about + register Tcl_Parse *parsePtr) + /* Structure to fill in with information about * the parsed command; any previous * information in the structure is ignored. */ { - const char *src; /* Points to current character in the + register const char *src; /* Points to current character in the * command. */ char type; /* Result returned by CHAR_TYPE(*src). */ Tcl_Token *tokenPtr; /* Pointer to token being filled in. */ @@ -220,17 +266,16 @@ Tcl_ParseCommand( * point to char after terminating one. */ int scanned; - if (numBytes < 0 && start) { - numBytes = strlen(start); - } - TclParseInit(interp, start, numBytes, parsePtr); if ((start == NULL) && (numBytes != 0)) { if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can't parse a NULL pointer", -1)); + Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC); } return TCL_ERROR; } + if (numBytes < 0) { + numBytes = strlen(start); + } + TclParseInit(interp, start, numBytes, parsePtr); parsePtr->commentStart = NULL; parsePtr->commentSize = 0; parsePtr->commandStart = NULL; @@ -261,43 +306,9 @@ Tcl_ParseCommand( */ parsePtr->commandStart = src; - type = CHAR_TYPE(*src); - scanned = 1; /* Can't have missing whitepsace before first word. */ while (1) { int expandWord = 0; - /* Are we at command termination? */ - - if ((numBytes == 0) || (type & terminators) != 0) { - parsePtr->term = src; - parsePtr->commandSize = src + (numBytes != 0) - - parsePtr->commandStart; - return TCL_OK; - } - - /* Are we missing white space after previous word? */ - - if (scanned == 0) { - if (src[-1] == '"') { - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-quote", -1)); - } - parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; - } else { - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-brace", -1)); - } - parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; - } - parsePtr->term = src; - error: - Tcl_FreeParse(parsePtr); - parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; - return TCL_ERROR; - } - /* * Create the token for the word. */ @@ -307,6 +318,23 @@ Tcl_ParseCommand( tokenPtr = &parsePtr->tokenPtr[wordIndex]; tokenPtr->type = TCL_TOKEN_WORD; + /* + * Skip white space before the word. Also skip a backslash-newline + * sequence: it should be treated just like white space. + */ + + scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); + src += scanned; + numBytes -= scanned; + if (numBytes == 0) { + parsePtr->term = src; + break; + } + if ((type & terminators) != 0) { + parsePtr->term = src; + src++; + break; + } tokenPtr->start = src; parsePtr->numTokens++; parsePtr->numWords++; @@ -468,10 +496,9 @@ Tcl_ParseCommand( * tokens representing the expanded list. */ - const char *listStart; + CONST char *listStart; int growthNeeded = wordIndex + 2*elemCount - parsePtr->numTokens; - parsePtr->numWords += elemCount - 1; if (growthNeeded > 0) { TclGrowParseTokenArray(parsePtr, growthNeeded); @@ -526,12 +553,52 @@ Tcl_ParseCommand( tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; } - /* Parse the whitespace between words. */ + /* + * Do two additional checks: (a) make sure we're really at the end of + * a word (there might have been garbage left after a quoted or braced + * word), and (b) check for the end of the command. + */ scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); - src += scanned; - numBytes -= scanned; + if (scanned) { + src += scanned; + numBytes -= scanned; + continue; + } + + if (numBytes == 0) { + parsePtr->term = src; + break; + } + if ((type & terminators) != 0) { + parsePtr->term = src; + src++; + break; + } + if (src[-1] == '"') { + if (interp != NULL) { + Tcl_SetResult(interp, "extra characters after close-quote", + TCL_STATIC); + } + parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; + } else { + if (interp != NULL) { + Tcl_SetResult(interp, "extra characters after close-brace", + TCL_STATIC); + } + parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; + } + parsePtr->term = src; + goto error; } + + parsePtr->commandSize = src - parsePtr->commandStart; + return TCL_OK; + + error: + Tcl_FreeParse(parsePtr); + parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; + return TCL_ERROR; } /* @@ -553,7 +620,7 @@ Tcl_ParseCommand( int TclIsSpaceProc( - int byte) + char byte) { return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n'; } @@ -582,7 +649,7 @@ TclIsSpaceProc( int TclIsBareword( - int byte) + char byte) { if (byte < '0' || byte > 'z') { return 0; @@ -622,14 +689,14 @@ TclIsBareword( static int ParseWhiteSpace( const char *src, /* First character to parse. */ - int numBytes, /* Max number of bytes to scan. */ + register int numBytes, /* Max number of bytes to scan. */ int *incompletePtr, /* Set this boolean memory to true if parsing * indicates an incomplete command. */ char *typePtr) /* Points to location to store character type * of character that ends run of whitespace */ { - char type = TYPE_NORMAL; - const char *p = src; + register char type = TYPE_NORMAL; + register const char *p = src; while (1) { while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) { @@ -646,7 +713,7 @@ ParseWhiteSpace( if (p[1] != '\n') { break; } - p += 2; + p+=2; if (--numBytes == 0) { *incompletePtr = 1; break; @@ -673,37 +740,28 @@ ParseWhiteSpace( *---------------------------------------------------------------------- */ -static int -ParseAllWhiteSpace( +int +TclParseAllWhiteSpace( const char *src, /* First character to parse. */ - int numBytes, /* Max number of byes to scan */ - int *incompletePtr) /* Set true if parse is incomplete. */ + int numBytes) /* Max number of byes to scan */ { + int dummy; char type; const char *p = src; do { - int scanned = ParseWhiteSpace(p, numBytes, incompletePtr, &type); + int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); p += scanned; numBytes -= scanned; } while (numBytes && (*p == '\n') && (p++, --numBytes)); return (p-src); } - -int -TclParseAllWhiteSpace( - const char *src, /* First character to parse. */ - int numBytes) /* Max number of byes to scan */ -{ - int dummy; - return ParseAllWhiteSpace(src, numBytes, &dummy); -} /* *---------------------------------------------------------------------- * - * ParseHex -- + * TclParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing * \x and \u escape sequences). At most numBytes bytes are scanned. @@ -723,24 +781,24 @@ TclParseAllWhiteSpace( */ int -ParseHex( +TclParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ - int *resultPtr) /* Points to storage provided by caller where - * the character resulting from the + Tcl_UniChar *resultPtr) /* Points to storage provided by caller where + * the Tcl_UniChar resulting from the * conversion is to be written. */ { - int result = 0; - const char *p = src; + Tcl_UniChar result = 0; + register const char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); - if (!isxdigit(digit) || (result > 0x10FFF)) { + if (!isxdigit(digit)) { break; } - p++; + ++p; result <<= 4; if (digit >= 'a') { @@ -765,34 +823,34 @@ ParseHex( * sequence as defined by Tcl's parsing rules. * * Results: - * Records at readPtr the number of bytes making up the backslash - * sequence. Records at dst the UTF-8 encoded equivalent of that - * backslash sequence. Returns the number of bytes written to dst, at - * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results - * are not needed, but the return value is the same either way. + * Records at readPtr the number of bytes making up the backslash + * sequence. Records at dst the UTF-8 encoded equivalent of that + * backslash sequence. Returns the number of bytes written to dst, at + * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results + * are not needed, but the return value is the same either way. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ int TclParseBackslash( - const char *src, /* Points to the backslash character of a + const char *src, /* Points to the backslash character of a a * backslash sequence. */ int numBytes, /* Max number of bytes to scan. */ int *readPtr, /* NULL, or points to storage where the number * of bytes scanned should be written. */ char *dst) /* NULL, or points to buffer where the UTF-8 * encoding of the backslash sequence is to be - * written. At most 4 bytes will be written there. */ + * written. At most TCL_UTF_MAX bytes will be + * written there. */ { - const char *p = src+1; - int unichar; - int result; + register const char *p = src+1; + Tcl_UniChar result; int count; - char buf[4] = ""; + char buf[TCL_UTF_MAX]; if (numBytes == 0) { if (readPtr != NULL) { @@ -818,7 +876,7 @@ TclParseBackslash( count = 2; switch (*p) { /* - * Note: in the conversions below, use absolute values (e.g., 0xA) + * Note: in the conversions below, use absolute values (e.g., 0xa) * rather than symbolic values (e.g. \n) that get converted by the * compiler. It's possible that compilers on some platforms will do * the symbolic conversions differently, which could result in @@ -832,25 +890,25 @@ TclParseBackslash( result = 0x8; break; case 'f': - result = 0xC; + result = 0xc; break; case 'n': - result = 0xA; + result = 0xa; break; case 'r': - result = 0xD; + result = 0xd; break; case 't': result = 0x9; break; case 'v': - result = 0xB; + result = 0xb; break; case 'x': - count += ParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); + count += TclParseHex(p+1, numBytes-2, &result); if (count == 2) { /* - * No hexdigits -> This is just "x". + * No hexadigits -> This is just "x". */ result = 'x'; @@ -858,38 +916,16 @@ TclParseBackslash( /* * Keep only the last byte (2 hex digits). */ - result = UCHAR(result); + result = (unsigned char) result; } break; case 'u': - count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); + count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* - * No hexdigits -> This is just "u". + * No hexadigits -> This is just "u". */ result = 'u'; - } else if (((result & 0xFC00) == 0xD800) && (count == 6) - && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { - /* If high surrogate is immediately followed by a low surrogate - * escape, combine them into one character. */ - int low; - int count2 = ParseHex(p+7, 4, &low); - if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { - result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; - count += count2 + 2; - } - } - break; - case 'U': - count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); - if (count == 2) { - /* - * No hexdigits -> This is just "U". - */ - result = 'U'; - } else if ((result | 0x7FF) == 0xDFFF) { - /* Upper or lower surrogate, not allowed in this syntax. */ - result = 0xFFFD; } break; case '\n': @@ -910,21 +946,21 @@ TclParseBackslash( */ if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ - result = *p - '0'; + result = (unsigned char)(*p - '0'); p++; if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 3; - result = (result << 3) + (*p - '0'); + result = (unsigned char)((result << 3) + (*p - '0')); p++; if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ - || (UCHAR(*p) >= '8') || (result >= 0x20)) { + || (UCHAR(*p) >= '8')) { break; } count = 4; - result = UCHAR((result << 3) + (*p - '0')); + result = (unsigned char)((result << 3) + (*p - '0')); break; } @@ -936,15 +972,14 @@ TclParseBackslash( */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ + count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ } else { - char utfBytes[8]; + char utfBytes[TCL_UTF_MAX]; - memcpy(utfBytes, p, numBytes - 1); + memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; - count = TclUtfToUniChar(utfBytes, &unichar) + 1; + count = Tcl_UtfToUniChar(utfBytes, &result) + 1; } - result = unichar; break; } @@ -952,12 +987,7 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } - count = Tcl_UniCharToUtf(result, dst); - if ((result >= 0xD800) && (count < 3)) { - /* Special case for handling high surrogates. */ - count += Tcl_UniCharToUtf(-1, dst + count); - } - return count; + return Tcl_UniCharToUtf((int) result, dst); } /* @@ -969,11 +999,11 @@ TclParseBackslash( * defined by Tcl's parsing rules. * * Results: - * Records in parsePtr information about the parse. Returns the number of - * bytes consumed. + * Records in parsePtr information about the parse. Returns the number of + * bytes consumed. * * Side effects: - * None. + * None. * *---------------------------------------------------------------------- */ @@ -981,18 +1011,23 @@ TclParseBackslash( static int ParseComment( const char *src, /* First character to parse. */ - int numBytes, /* Max number of bytes to scan. */ + register int numBytes, /* Max number of bytes to scan. */ Tcl_Parse *parsePtr) /* Information about parse in progress. * Updated if parsing indicates an incomplete * command. */ { - const char *p = src; - int incomplete = parsePtr->incomplete; + register const char *p = src; while (numBytes) { - int scanned = ParseAllWhiteSpace(p, numBytes, &incomplete); - p += scanned; - numBytes -= scanned; + char type; + int scanned; + + do { + scanned = ParseWhiteSpace(p, numBytes, + &parsePtr->incomplete, &type); + p += scanned; + numBytes -= scanned; + } while (numBytes && (*p == '\n') && (p++,numBytes--)); if ((numBytes == 0) || (*p != '#')) { break; @@ -1001,28 +1036,35 @@ ParseComment( parsePtr->commentStart = p; } - p++; - numBytes--; while (numBytes) { - if (*p == '\n') { - p++; - numBytes--; - break; - } if (*p == '\\') { + scanned = ParseWhiteSpace(p, numBytes, &parsePtr->incomplete, + &type); + if (scanned) { + p += scanned; + numBytes -= scanned; + } else { + /* + * General backslash substitution in comments isn't part + * of the formal spec, but test parse-15.47 and history + * indicate that it has been the de facto rule. Don't + * change it now. + */ + + TclParseBackslash(p, numBytes, &scanned, NULL); + p += scanned; + numBytes -= scanned; + } + } else { p++; numBytes--; - if (numBytes == 0) { + if (p[-1] == '\n') { break; } } - incomplete = (*p == '\n'); - p++; - numBytes--; } parsePtr->commentSize = p - parsePtr->commentStart; } - parsePtr->incomplete = incomplete; return (p - src); } @@ -1053,8 +1095,8 @@ ParseComment( static int ParseTokens( - const char *src, /* First character to parse. */ - int numBytes, /* Max number of bytes to scan. */ + register const char *src, /* First character to parse. */ + register int numBytes, /* Max number of bytes to scan. */ int mask, /* Specifies when to stop parsing. The parse * stops at the first unquoted character whose * CHAR_TYPE contains any of the bits in @@ -1114,7 +1156,7 @@ ParseTokens( } /* - * This is a variable reference. Call Tcl_ParseVarName to do all + * This is a variable reference. Call Tcl_ParseVarName to do all * the dirty work of parsing the name. */ @@ -1138,17 +1180,16 @@ ParseTokens( } /* - * Command substitution. Call Tcl_ParseCommand recursively (and + * Command substitution. Call Tcl_ParseCommand recursively (and * repeatedly) to parse the nested command(s), then throw away the * parse information. */ src++; numBytes--; - nestedPtr = (Tcl_Parse *)TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); + nestedPtr = (Tcl_Parse *) + TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse)); while (1) { - const char *curEnd; - if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, nestedPtr) != TCL_OK) { parsePtr->errorType = nestedPtr->errorType; @@ -1157,9 +1198,8 @@ ParseTokens( TclStackFree(parsePtr->interp, nestedPtr); return TCL_ERROR; } - curEnd = src + numBytes; src = nestedPtr->commandStart + nestedPtr->commandSize; - numBytes = curEnd - src; + numBytes = parsePtr->end - src; Tcl_FreeParse(nestedPtr); /* @@ -1175,8 +1215,8 @@ ParseTokens( } if (numBytes == 0) { if (parsePtr->interp != NULL) { - Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-bracket", -1)); + Tcl_SetResult(parsePtr->interp, + "missing close-bracket", TCL_STATIC); } parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->term = tokenPtr->start; @@ -1294,7 +1334,7 @@ Tcl_FreeParse( * call to Tcl_ParseCommand. */ { if (parsePtr->tokenPtr != parsePtr->staticTokens) { - ckfree(parsePtr->tokenPtr); + ckfree((char *) parsePtr->tokenPtr); parsePtr->tokenPtr = parsePtr->staticTokens; } } @@ -1332,7 +1372,7 @@ Tcl_ParseVarName( * NULL, then no error message is provided. */ const char *start, /* Start of variable substitution string. * First character must be "$". */ - int numBytes, /* Total number of bytes in string. If < 0, + register int numBytes, /* Total number of bytes in string. If < 0, * the string consists of all bytes up to the * first null character. */ Tcl_Parse *parsePtr, /* Structure to fill in with information about @@ -1343,19 +1383,20 @@ Tcl_ParseVarName( * reinitialize it. */ { Tcl_Token *tokenPtr; - const char *src; + register const char *src; int varIndex; unsigned array; - if (numBytes < 0 && start) { + if ((numBytes == 0) || (start == NULL)) { + return TCL_ERROR; + } + if (numBytes < 0) { numBytes = strlen(start); } + if (!append) { TclParseInit(interp, start, numBytes, parsePtr); } - if ((numBytes == 0) || (start == NULL)) { - return TCL_ERROR; - } /* * Generate one token for the variable, an additional token for the name, @@ -1408,8 +1449,8 @@ Tcl_ParseVarName( } if (numBytes == 0) { if (parsePtr->interp != NULL) { - Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-brace for variable name", -1)); + Tcl_SetResult(parsePtr->interp, + "missing close-brace for variable name", TCL_STATIC); } parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE; parsePtr->term = tokenPtr->start-1; @@ -1466,8 +1507,8 @@ Tcl_ParseVarName( } if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){ if (parsePtr->interp != NULL) { - Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing )", -1)); + Tcl_SetResult(parsePtr->interp, "missing )", + TCL_STATIC); } parsePtr->errorType = TCL_PARSE_MISSING_PAREN; parsePtr->term = src; @@ -1524,15 +1565,16 @@ Tcl_ParseVarName( const char * Tcl_ParseVar( Tcl_Interp *interp, /* Context for looking up variable. */ - const char *start, /* Start of variable substitution. First + register const char *start, /* Start of variable substitution. First * character must be "$". */ const char **termPtr) /* If non-NULL, points to word to fill in with * character just after last one in the * variable specifier. */ { - Tcl_Obj *objPtr; + register Tcl_Obj *objPtr; int code; - Tcl_Parse *parsePtr = (Tcl_Parse *)TclStackAlloc(interp, sizeof(Tcl_Parse)); + Tcl_Parse *parsePtr = (Tcl_Parse *) + TclStackAlloc(interp, sizeof(Tcl_Parse)); if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { TclStackFree(interp, parsePtr); @@ -1564,13 +1606,16 @@ Tcl_ParseVar( * At this point we should have an object containing the value of a * variable. Just return the string from that object. * - * Since TclSubstTokens above returned TCL_OK, we know that objPtr - * is shared. It is in both the interp result and the value of the - * variable. Returning the string relies on that to be true. + * This should have returned the object for the user to manage, but + * instead we have some weak reference to the string value in the object, + * which is why we make sure the object exists after resetting the result. + * This isn't ideal, but it's the best we can do with the current + * documented interface. -- hobbs */ - assert( Tcl_IsShared(objPtr) ); - + if (!Tcl_IsShared(objPtr)) { + Tcl_IncrRefCount(objPtr); + } Tcl_ResetResult(interp); return TclGetString(objPtr); } @@ -1609,11 +1654,11 @@ Tcl_ParseBraces( * NULL, then no error message is provided. */ const char *start, /* Start of string enclosed in braces. The * first character must be {'. */ - int numBytes, /* Total number of bytes in string. If < 0, + register int numBytes, /* Total number of bytes in string. If < 0, * the string consists of all bytes up to the * first null character. */ - Tcl_Parse *parsePtr, - /* Structure to fill in with information about + register Tcl_Parse *parsePtr, + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1625,18 +1670,19 @@ Tcl_ParseBraces( * successful. */ { Tcl_Token *tokenPtr; - const char *src; + register const char *src; int startIndex, level, length; - if (numBytes < 0 && start) { + if ((numBytes == 0) || (start == NULL)) { + return TCL_ERROR; + } + if (numBytes < 0) { numBytes = strlen(start); } + if (!append) { TclParseInit(interp, start, numBytes, parsePtr); } - if ((numBytes == 0) || (start == NULL)) { - return TCL_ERROR; - } src = start; startIndex = parsePtr->numTokens; @@ -1739,8 +1785,7 @@ Tcl_ParseBraces( goto error; } - Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-brace", -1)); + Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC); /* * Guess if the problem is due to comments by searching the source string @@ -1750,7 +1795,7 @@ Tcl_ParseBraces( */ { - int openBrace = 0; + register int openBrace = 0; while (--src > start) { switch (*src) { @@ -1761,9 +1806,9 @@ Tcl_ParseBraces( openBrace = 0; break; case '#' : - if (openBrace && TclIsSpaceProcM(src[-1])) { - Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), - ": possible unbalanced brace in comment", -1); + if (openBrace && TclIsSpaceProc(src[-1])) { + Tcl_AppendResult(parsePtr->interp, + ": possible unbalanced brace in comment", NULL); goto error; } break; @@ -1810,11 +1855,11 @@ Tcl_ParseQuotedString( * NULL, then no error message is provided. */ const char *start, /* Start of the quoted string. The first * character must be '"'. */ - int numBytes, /* Total number of bytes in string. If < 0, + register int numBytes, /* Total number of bytes in string. If < 0, * the string consists of all bytes up to the * first null character. */ - Tcl_Parse *parsePtr, - /* Structure to fill in with information about + register Tcl_Parse *parsePtr, + /* Structure to fill in with information about * the string. */ int append, /* Non-zero means append tokens to existing * information in parsePtr; zero means ignore @@ -1825,15 +1870,16 @@ Tcl_ParseQuotedString( * the quoted string's terminating close-quote * if the parse succeeds. */ { - if (numBytes < 0 && start) { + if ((numBytes == 0) || (start == NULL)) { + return TCL_ERROR; + } + if (numBytes < 0) { numBytes = strlen(start); } + if (!append) { TclParseInit(interp, start, numBytes, parsePtr); } - if ((numBytes == 0) || (start == NULL)) { - return TCL_ERROR; - } if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, TCL_SUBST_ALL, parsePtr)) { @@ -1841,8 +1887,7 @@ Tcl_ParseQuotedString( } if (*parsePtr->term != '"') { if (parsePtr->interp != NULL) { - Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing \"", -1)); + Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC); } parsePtr->errorType = TCL_PARSE_MISSING_QUOTE; parsePtr->term = start; @@ -1862,42 +1907,33 @@ Tcl_ParseQuotedString( /* *---------------------------------------------------------------------- * - * TclSubstParse -- + * Tcl_SubstObj -- * - * Token parser used by the [subst] command. Parses the string made up of - * 'numBytes' bytes starting at 'bytes'. Parsing is controlled by the - * flags argument to provide support for the -nobackslashes, -nocommands, - * and -novariables options, as represented by the flag values - * TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES. + * This function performs the substitutions specified on the given string + * as described in the user documentation for the "subst" Tcl command. * * Results: - * None. + * A Tcl_Obj* containing the substituted string, or NULL to indicate that + * an error occurred. * * Side effects: - * The Tcl_Parse struct '*parsePtr' is filled with parse results. - * The caller is expected to eventually call Tcl_FreeParse() to properly - * cleanup the value written there. - * - * If a parse error occurs, the Tcl_InterpState value '*statePtr' is - * filled with the state created by that error. When *statePtr is written - * to, the caller is expected to make the required calls to either - * Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the - * value written there. + * See the user documentation. * *---------------------------------------------------------------------- */ -void -TclSubstParse( - Tcl_Interp *interp, - const char *bytes, - int numBytes, - int flags, - Tcl_Parse *parsePtr, - Tcl_InterpState *statePtr) +Tcl_Obj * +Tcl_SubstObj( + Tcl_Interp *interp, /* Interpreter in which substitution occurs */ + Tcl_Obj *objPtr, /* The value to be substituted. */ + int flags) /* What substitutions to do. */ { - int length = numBytes; - const char *p = bytes; + int length, tokensLeft, code; + Tcl_Token *endTokenPtr; + Tcl_Obj *result, *errMsg = NULL; + const char *p = TclGetStringFromObj(objPtr, &length); + Tcl_Parse *parsePtr = (Tcl_Parse *) + TclStackAlloc(interp, sizeof(Tcl_Parse)); TclParseInit(interp, p, length, parsePtr); @@ -1909,11 +1945,12 @@ TclSubstParse( if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) { /* - * There was a parse error. Save the interpreter state for possible - * error reporting later. + * There was a parse error. Save the error message for possible + * reporting later. */ - *statePtr = Tcl_SaveInterpState(interp, TCL_ERROR); + errMsg = Tcl_GetObjResult(interp); + Tcl_IncrRefCount(errMsg); /* * We need to re-parse to get the portion of the string we can [subst] @@ -1979,10 +2016,10 @@ TclSubstParse( parsePtr->tokenPtr + parsePtr->numTokens - 2; if (varTokenPtr->type != TCL_TOKEN_VARIABLE) { - Tcl_Panic("TclSubstParse: programming error"); + Tcl_Panic("Tcl_SubstObj: programming error"); } if (varTokenPtr[1].type != TCL_TOKEN_TEXT) { - Tcl_Panic("TclSubstParse: programming error"); + Tcl_Panic("Tcl_SubstObj: programming error"); } parsePtr->numTokens -= 2; } @@ -2056,8 +2093,63 @@ TclSubstParse( break; default: - Tcl_Panic("bad parse in TclSubstParse: %c", p[length]); + Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]); + } + } + + /* + * Next, substitute the parsed tokens just as in normal Tcl evaluation. + */ + + endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens; + tokensLeft = parsePtr->numTokens; + code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, + &tokensLeft, 1, NULL, NULL); + if (code == TCL_OK) { + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); + if (errMsg != NULL) { + Tcl_SetObjResult(interp, errMsg); + Tcl_DecrRefCount(errMsg); + return NULL; + } + return Tcl_GetObjResult(interp); + } + + result = Tcl_NewObj(); + while (1) { + switch (code) { + case TCL_ERROR: + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); + Tcl_DecrRefCount(result); + if (errMsg != NULL) { + Tcl_DecrRefCount(errMsg); + } + return NULL; + case TCL_BREAK: + tokensLeft = 0; /* Halt substitution */ + default: + Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp)); } + + if (tokensLeft == 0) { + Tcl_FreeParse(parsePtr); + TclStackFree(interp, parsePtr); + if (errMsg != NULL) { + if (code != TCL_BREAK) { + Tcl_DecrRefCount(result); + Tcl_SetObjResult(interp, errMsg); + Tcl_DecrRefCount(errMsg); + return NULL; + } + Tcl_DecrRefCount(errMsg); + } + return result; + } + + code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft, + &tokensLeft, 1, NULL, NULL); } } @@ -2072,13 +2164,13 @@ TclSubstParse( * non-TCL_OK completion code arises. * * Results: - * The return value is a standard Tcl completion code. The result in - * interp is the substituted value, or an error message if TCL_ERROR is - * returned. If tokensLeftPtr is not NULL, then it points to an int where - * the number of tokens remaining to be processed is written. + * The return value is a standard Tcl completion code. The result in + * interp is the substituted value, or an error message if TCL_ERROR is + * returned. If tokensLeftPtr is not NULL, then it points to an int where + * the number of tokens remaining to be processed is written. * * Side effects: - * Can be anything, depending on the types of substitution done. + * Can be anything, depending on the types of substitution done. * *---------------------------------------------------------------------- */ @@ -2096,30 +2188,29 @@ TclSubstTokens( * integer representing the number of tokens * left to be substituted will be written */ int line, /* The line the script starts on. */ - int *clNextOuter, /* Information about an outer context for */ - const char *outerScript) /* continuation line data. This is set by - * EvalEx() to properly handle [...]-nested - * commands. The 'outerScript' refers to the - * most-outer script containing the embedded - * command, which is refered to by 'script'. - * The 'clNextOuter' refers to the current - * entry in the table of continuation lines in - * this "main script", and the character - * offsets are relative to the 'outerScript' - * as well. - * - * If outerScript == script, then this call is - * for words in the outer-most script or - * command. See Tcl_EvalEx and TclEvalObjEx - * for the places generating arguments for - * which this is true. */ + int* clNextOuter, /* Information about an outer context for */ + CONST char* outerScript) /* continuation line data. This is set by + * EvalEx() to properly handle [...]-nested + * commands. The 'outerScript' refers to the + * most-outer script containing the embedded + * command, which is refered to by 'script'. The + * 'clNextOuter' refers to the current entry in + * the table of continuation lines in this + * "master script", and the character offsets are + * relative to the 'outerScript' as well. + * + * If outerScript == script, then this call is for + * words in the outer-most script/command. See + * Tcl_EvalEx() and TclEvalObjEx() for the places + * generating arguments for which this is true. + */ { Tcl_Obj *result; int code = TCL_OK; #define NUM_STATIC_POS 20 int isLiteral, maxNumCL, numCL, i, adjust; - int *clPosition = NULL; - Interp *iPtr = (Interp *) interp; + int* clPosition = NULL; + Interp* iPtr = (Interp*) interp; int inFile = iPtr->evalFlags & TCL_EVAL_FILE; /* @@ -2133,27 +2224,27 @@ TclSubstTokens( */ /* - * For the handling of continuation lines in literals, first check if - * this is actually a literal. If not then forego the additional - * processing. Otherwise preallocate a small table to store the - * locations of all continuation lines we find in this literal, if any. - * The table is extended if needed. + * For the handling of continuation lines in literals we first check if + * this is actually a literal. For if not we can forego the additional + * processing. Otherwise we pre-allocate a small table to store the + * locations of all continuation lines we find in this literal, if + * any. The table is extended if needed. */ - numCL = 0; - maxNumCL = 0; + numCL = 0; + maxNumCL = 0; isLiteral = 1; for (i=0 ; i < count; i++) { - if ((tokenPtr[i].type != TCL_TOKEN_TEXT) - && (tokenPtr[i].type != TCL_TOKEN_BS)) { + if ((tokenPtr[i].type != TCL_TOKEN_TEXT) && + (tokenPtr[i].type != TCL_TOKEN_BS)) { isLiteral = 0; break; } } if (isLiteral) { - maxNumCL = NUM_STATIC_POS; - clPosition = (int *)ckalloc(maxNumCL * sizeof(int)); + maxNumCL = NUM_STATIC_POS; + clPosition = (int*) ckalloc (maxNumCL*sizeof(int)); } adjust = 0; @@ -2162,7 +2253,7 @@ TclSubstTokens( Tcl_Obj *appendObj = NULL; const char *append = NULL; int appendByteLength = 0; - char utfCharBytes[4] = ""; + char utfCharBytes[TCL_UTF_MAX]; switch (tokenPtr->type) { case TCL_TOKEN_TEXT: @@ -2174,7 +2265,6 @@ TclSubstTokens( appendByteLength = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfCharBytes); append = utfCharBytes; - /* * If the backslash sequence we found is in a literal, and * represented a continuation line, we compute and store its @@ -2190,31 +2280,31 @@ TclSubstTokens( * correction. */ - if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') - && (tokenPtr->start[1] == '\n')) { + if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') && + (tokenPtr->start[1] == '\n')) { if (isLiteral) { int clPos; - if (result == 0) { clPos = 0; } else { - TclGetStringFromObj(result, &clPos); + Tcl_GetStringFromObj(result, &clPos); } if (numCL >= maxNumCL) { maxNumCL *= 2; - clPosition = (int *)ckrealloc(clPosition, - maxNumCL * sizeof(int)); + clPosition = (int*) ckrealloc ((char*)clPosition, + maxNumCL*sizeof(int)); } clPosition[numCL] = clPos; - numCL++; + numCL ++; } - adjust++; + adjust ++; } break; case TCL_TOKEN_COMMAND: { - /* TIP #280: Transfer line information to nested command */ + Interp *iPtr = (Interp *) interp; + iPtr->numLevels++; code = TclInterpReady(interp); if (code == TCL_OK) { @@ -2223,27 +2313,21 @@ TclSubstTokens( */ int theline; - - TclAdvanceContinuations(&line, &clNextOuter, - tokenPtr->start - outerScript); + TclAdvanceContinuations (&line, &clNextOuter, + tokenPtr->start - outerScript); theline = line + adjust; + /* TIP #280: Transfer line information to nested command */ code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2, 0, theline, clNextOuter, outerScript); - - TclAdvanceLines(&line, tokenPtr->start+1, - tokenPtr->start + tokenPtr->size - 1); - /* * Restore flag reset by nested eval for future bracketed * commands and their cmdframe setup */ - - if (inFile) { + if (inFile) { iPtr->evalFlags |= TCL_EVAL_FILE; } } iPtr->numLevels--; - TclResetCancellation(interp, 0); appendObj = Tcl_GetObjResult(interp); break; } @@ -2342,7 +2426,6 @@ TclSubstTokens( if (code != TCL_ERROR) { /* Keep error message in result! */ if (result != NULL) { Tcl_SetObjResult(interp, result); - /* * If the code found continuation lines (which implies that this * word is a literal), then we store the accumulated table of @@ -2361,7 +2444,7 @@ TclSubstTokens( */ if (maxNumCL) { - ckfree(clPosition); + ckfree ((char*) clPosition); } } else { Tcl_ResetResult(interp); @@ -2396,7 +2479,7 @@ TclSubstTokens( *---------------------------------------------------------------------- */ -static int +static inline int CommandComplete( const char *script, /* Script to check. */ int numBytes) /* Number of bytes in script. */ @@ -2474,7 +2557,7 @@ TclObjCommandComplete( * check. */ { int length; - const char *script = TclGetStringFromObj(objPtr, &length); + const char *script = Tcl_GetStringFromObj(objPtr, &length); return CommandComplete(script, length); } |
