/*
* tclParse.c --
*
* This file contains functions that parse Tcl scripts. They do so in a
* general-purpose fashion that can be used for many different purposes,
* including compilation, direct execution, code analysis, etc.
*
* Copyright (c) 1997 Sun Microsystems, Inc.
* Copyright (c) 1998-2000 Ajuba Solutions.
* Contributions from Don Porter, NIST, 2002. (not subject to US copyright)
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
* RCS: @(#) $Id: tclParse.c,v 1.51 2007/04/10 22:13:20 dkf Exp $
*/
#include "tclInt.h"
/*
* For now, we enable the {expand} although it is deprecated - remove by final
*/
#ifndef ALLOW_EXPAND
#define ALLOW_EXPAND 1
#endif
/*
* The following table provides parsing information about each possible 8-bit
* character. The table is designed to be referenced with either signed or
* unsigned characters, so it has 384 entries. The first 128 entries
* correspond to negative character values, the next 256 correspond to
* positive character values. The last 128 entries are identical to the first
* 128. The table is always indexed with a 128-byte offset (the 128th entry
* corresponds to a character value of 0).
*
* The macro CHAR_TYPE is used to index into the table and return information
* about its character argument. The following return values are defined.
*
* TYPE_NORMAL - All characters that don't have special significance to
* the Tcl parser.
* TYPE_SPACE - The character is a whitespace character other than
* newline.
* TYPE_COMMAND_END - Character is newline or semicolon.
* TYPE_SUBS - Character begins a substitution or has other special
* meaning in ParseTokens: backslash, dollar sign, or
* open bracket.
* TYPE_QUOTE - Character is a double quote.
* TYPE_CLOSE_PAREN - Character is a right parenthesis.
* TYPE_CLOSE_BRACK - Character is a right square bracket.
* TYPE_BRACE - Character is a curly brace (either left or right).
*/
#define TYPE_NORMAL 0
#define TYPE_SPACE 0x1
#define TYPE_COMMAND_END 0x2
#define TYPE_SUBS 0x4
#define TYPE_QUOTE 0x8
#define TYPE_CLOSE_PAREN 0x10
#define TYPE_CLOSE_BRACK 0x20
#define TYPE_BRACE 0x40
#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)]
static CONST char charTypeTable[] = {
/*
* Negative character values, from -128 to -1:
*/
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
/*
* Positive character values, from 0-127:
*/
TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_SPACE, TYPE_COMMAND_END, TYPE_SPACE,
TYPE_SPACE, TYPE_SPACE, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_SPACE, TYPE_NORMAL, TYPE_QUOTE, TYPE_NORMAL,
TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_CLOSE_PAREN, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_COMMAND_END,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_SUBS,
TYPE_SUBS, TYPE_CLOSE_BRACK, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_BRACE,
TYPE_NORMAL, TYPE_BRACE, TYPE_NORMAL, TYPE_NORMAL,
/*
* Large unsigned character values, from 128-255:
*/
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
};
/*
* Prototypes for local functions defined in this file:
*/
static int CommandComplete(CONST char *script, int numBytes);
static int ParseComment(CONST char *src, int numBytes,
Tcl_Parse *parsePtr);
static int ParseTokens(CONST char *src, int numBytes,
int mask, int flags, Tcl_Parse *parsePtr);
static int ParseWhiteSpace(CONST char *src, int numBytes,
Tcl_Parse *parsePtr, char *typePtr);
/*
*----------------------------------------------------------------------
*
* TclParseInit --
*
* Initialize the fields of a Tcl_Parse struct.
*
* Results:
* None.
*
* Side effects:
* The Tcl_Parse struct pointed to by parsePtr gets initialized.
*
*----------------------------------------------------------------------
*/
void
TclParseInit(
Tcl_Interp *interp, /* Interpreter to use for error reporting */
CONST char *string, /* String to be parsed. */
int numBytes, /* Total number of bytes in string. If < 0,
* the script consists of all bytes up to the
* first null character. */
Tcl_Parse *parsePtr) /* Points to struct to initialize */
{
parsePtr->numWords = 0;
parsePtr->tokenPtr = parsePtr->staticTokens;
parsePtr->numTokens = 0;
parsePtr->tokensAvailable = NUM_STATIC_TOKENS;
parsePtr->string = string;
parsePtr->end = string + numBytes;
parsePtr->term = parsePtr->end;
parsePtr->interp = interp;
parsePtr->incomplete = 0;
parsePtr->errorType = TCL_PARSE_SUCCESS;
}
/*
*----------------------------------------------------------------------
*
* Tcl_ParseCommand --
*
* Given a string, this function parses the first Tcl command in the
* string and returns information about the structure of the command.
*
* Results:
* The return value is TCL_OK if the command was parsed successfully and
* TCL_ERROR otherwise. If an error occurs and interp isn't NULL then an
* error message is left in its result. On a successful return, parsePtr
* is filled in with information about the command that was parsed.
*
* Side effects:
* If there is insufficient space in parsePtr to hold all the information
* about the command, then additional space is malloc-ed. If the function
* returns TCL_OK then the caller must eventually invoke Tcl_FreeParse to
* release any additional space that was allocated.
*
*----------------------------------------------------------------------
*/
int
Tcl_ParseCommand(
Tcl_Interp *interp, /* Interpreter to use for error reporting; if
* NULL, then no error message is provided. */
CONST char *start, /* First character of string containing one or
* more Tcl commands. */
register int numBytes, /* Total number of bytes in string. If < 0,
* the script consists of all bytes up to the
* first null character. */
int nested, /* Non-zero means this is a nested command:
* close bracket should be considered a
* command terminator. If zero, then close
* bracket has no special meaning. */
register Tcl_Parse *parsePtr)
/* Structure to fill in with information about
* the parsed command; any previous
* information in the structure is ignored. */
{
register CONST char *src; /* Points to current character in the
* command. */
char type; /* Result returned by CHAR_TYPE(*src). */
Tcl_Token *tokenPtr; /* Pointer to token being filled in. */
int wordIndex; /* Index of word token for current word. */
int terminators; /* CHAR_TYPE bits that indicate the end of a
* command. */
CONST char *termPtr; /* Set by Tcl_ParseBraces/QuotedString to
* point to char after terminating one. */
int scanned;
if ((start == NULL) && (numBytes != 0)) {
if (interp != NULL) {
Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC);
}
return TCL_ERROR;
}
if (numBytes < 0) {
numBytes = strlen(start);
}
TclParseInit(interp, start, numBytes, parsePtr);
parsePtr->commentStart = NULL;
parsePtr->commentSize = 0;
parsePtr->commandStart = NULL;
parsePtr->commandSize = 0;
if (nested != 0) {
terminators = TYPE_COMMAND_END | TYPE_CLOSE_BRACK;
} else {
terminators = TYPE_COMMAND_END;
}
/*
* Parse any leading space and comments before the first word of the
* command.
*/
scanned = ParseComment(start, numBytes, parsePtr);
src = (start + scanned);
numBytes -= scanned;
if (numBytes == 0) {
if (nested) {
parsePtr->incomplete = nested;
}
}
/*
* The following loop parses the words of the command, one word in each
* iteration through the loop.
*/
parsePtr->commandStart = src;
while (1) {
int expandWord = 0;
/*
* Create the token for the word.
*/
if (parsePtr->numTokens == parsePtr->tokensAvailable) {
TclExpandTokenArray(parsePtr);
}
wordIndex = parsePtr->numTokens;
tokenPtr = &parsePtr->tokenPtr[wordIndex];
tokenPtr->type = TCL_TOKEN_WORD;
/*
* Skip white space before the word. Also skip a backslash-newline
* sequence: it should be treated just like white space.
*/
scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type);
src += scanned;
numBytes -= scanned;
if (numBytes == 0) {
parsePtr->term = src;
break;
}
if ((type & terminators) != 0) {
parsePtr->term = src;
src++;
break;
}
tokenPtr->start = src;
parsePtr->numTokens++;
parsePtr->numWords++;
/*
* At this point the word can have one of four forms: something
* enclosed in quotes, something enclosed in braces, and expanding
* word, or an unquoted word (anything else).
*/
parseWord:
if (*src == '"') {
|