summaryrefslogtreecommitdiffstats
path: root/generic/tclParse.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r--generic/tclParse.c715
1 files changed, 399 insertions, 316 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 281eee5..b40b636 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -5,8 +5,8 @@
* general-purpose fashion that can be used for many different purposes,
* including compilation, direct execution, code analysis, etc.
*
- * Copyright © 1997 Sun Microsystems, Inc.
- * Copyright © 1998-2000 Ajuba Solutions.
+ * Copyright (c) 1997 Sun Microsystems, Inc.
+ * Copyright (c) 1998-2000 Ajuba Solutions.
* Contributions from Don Porter, NIST, 2002. (not subject to US copyright)
*
* See the file "license.terms" for information on usage and redistribution of
@@ -14,12 +14,15 @@
*/
#include "tclInt.h"
-#include "tclParse.h"
-#include <assert.h>
/*
* The following table provides parsing information about each possible 8-bit
- * character. The table is designed to be referenced with unsigned characters.
+ * character. The table is designed to be referenced with either signed or
+ * unsigned characters, so it has 384 entries. The first 128 entries
+ * correspond to negative character values, the next 256 correspond to
+ * positive character values. The last 128 entries are identical to the first
+ * 128. The table is always indexed with a 128-byte offset (the 128th entry
+ * corresponds to a character value of 0).
*
* The macro CHAR_TYPE is used to index into the table and return information
* about its character argument. The following return values are defined.
@@ -38,7 +41,54 @@
* TYPE_BRACE - Character is a curly brace (either left or right).
*/
-const char tclCharTypeTable[] = {
+#define TYPE_NORMAL 0
+#define TYPE_SPACE 0x1
+#define TYPE_COMMAND_END 0x2
+#define TYPE_SUBS 0x4
+#define TYPE_QUOTE 0x8
+#define TYPE_CLOSE_PAREN 0x10
+#define TYPE_CLOSE_BRACK 0x20
+#define TYPE_BRACE 0x40
+
+#define CHAR_TYPE(c) (charTypeTable+128)[(int)(c)]
+
+static const char charTypeTable[] = {
+ /*
+ * Negative character values, from -128 to -1:
+ */
+
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
+ TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
/*
* Positive character values, from 0-127:
@@ -119,30 +169,26 @@ const char tclCharTypeTable[] = {
* Prototypes for local functions defined in this file:
*/
-static int CommandComplete(const char *script, int numBytes);
+static inline int CommandComplete(const char *script, int numBytes);
static int ParseComment(const char *src, int numBytes,
Tcl_Parse *parsePtr);
static int ParseTokens(const char *src, int numBytes, int mask,
int flags, Tcl_Parse *parsePtr);
static int ParseWhiteSpace(const char *src, int numBytes,
int *incompletePtr, char *typePtr);
-static int ParseAllWhiteSpace(const char *src, int numBytes,
- int *incompletePtr);
-static int ParseHex(const char *src, int numBytes,
- int *resultPtr);
/*
*----------------------------------------------------------------------
*
* TclParseInit --
*
- * Initialize the fields of a Tcl_Parse struct.
+ * Initialize the fields of a Tcl_Parse struct.
*
* Results:
- * None.
+ * None.
*
* Side effects:
- * The Tcl_Parse struct pointed to by parsePtr gets initialized.
+ * The Tcl_Parse struct pointed to by parsePtr gets initialized.
*
*----------------------------------------------------------------------
*/
@@ -197,19 +243,19 @@ Tcl_ParseCommand(
* NULL, then no error message is provided. */
const char *start, /* First character of string containing one or
* more Tcl commands. */
- int numBytes, /* Total number of bytes in string. If < 0,
+ register int numBytes, /* Total number of bytes in string. If < 0,
* the script consists of all bytes up to the
* first null character. */
int nested, /* Non-zero means this is a nested command:
* close bracket should be considered a
* command terminator. If zero, then close
* bracket has no special meaning. */
- Tcl_Parse *parsePtr)
- /* Structure to fill in with information about
+ register Tcl_Parse *parsePtr)
+ /* Structure to fill in with information about
* the parsed command; any previous
* information in the structure is ignored. */
{
- const char *src; /* Points to current character in the
+ register const char *src; /* Points to current character in the
* command. */
char type; /* Result returned by CHAR_TYPE(*src). */
Tcl_Token *tokenPtr; /* Pointer to token being filled in. */
@@ -220,17 +266,16 @@ Tcl_ParseCommand(
* point to char after terminating one. */
int scanned;
- if (numBytes < 0 && start) {
- numBytes = strlen(start);
- }
- TclParseInit(interp, start, numBytes, parsePtr);
if ((start == NULL) && (numBytes != 0)) {
if (interp != NULL) {
- Tcl_SetObjResult(interp, Tcl_NewStringObj(
- "can't parse a NULL pointer", -1));
+ Tcl_SetResult(interp, "can't parse a NULL pointer", TCL_STATIC);
}
return TCL_ERROR;
}
+ if (numBytes < 0) {
+ numBytes = strlen(start);
+ }
+ TclParseInit(interp, start, numBytes, parsePtr);
parsePtr->commentStart = NULL;
parsePtr->commentSize = 0;
parsePtr->commandStart = NULL;
@@ -261,43 +306,9 @@ Tcl_ParseCommand(
*/
parsePtr->commandStart = src;
- type = CHAR_TYPE(*src);
- scanned = 1; /* Can't have missing whitepsace before first word. */
while (1) {
int expandWord = 0;
- /* Are we at command termination? */
-
- if ((numBytes == 0) || (type & terminators) != 0) {
- parsePtr->term = src;
- parsePtr->commandSize = src + (numBytes != 0)
- - parsePtr->commandStart;
- return TCL_OK;
- }
-
- /* Are we missing white space after previous word? */
-
- if (scanned == 0) {
- if (src[-1] == '"') {
- if (interp != NULL) {
- Tcl_SetObjResult(interp, Tcl_NewStringObj(
- "extra characters after close-quote", -1));
- }
- parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA;
- } else {
- if (interp != NULL) {
- Tcl_SetObjResult(interp, Tcl_NewStringObj(
- "extra characters after close-brace", -1));
- }
- parsePtr->errorType = TCL_PARSE_BRACE_EXTRA;
- }
- parsePtr->term = src;
- error:
- Tcl_FreeParse(parsePtr);
- parsePtr->commandSize = parsePtr->end - parsePtr->commandStart;
- return TCL_ERROR;
- }
-
/*
* Create the token for the word.
*/
@@ -307,6 +318,23 @@ Tcl_ParseCommand(
tokenPtr = &parsePtr->tokenPtr[wordIndex];
tokenPtr->type = TCL_TOKEN_WORD;
+ /*
+ * Skip white space before the word. Also skip a backslash-newline
+ * sequence: it should be treated just like white space.
+ */
+
+ scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type);
+ src += scanned;
+ numBytes -= scanned;
+ if (numBytes == 0) {
+ parsePtr->term = src;
+ break;
+ }
+ if ((type & terminators) != 0) {
+ parsePtr->term = src;
+ src++;
+ break;
+ }
tokenPtr->start = src;
parsePtr->numTokens++;
parsePtr->numWords++;
@@ -468,10 +496,9 @@ Tcl_ParseCommand(
* tokens representing the expanded list.
*/
- const char *listStart;
+ CONST char *listStart;
int growthNeeded = wordIndex + 2*elemCount
- parsePtr->numTokens;
-
parsePtr->numWords += elemCount - 1;
if (growthNeeded > 0) {
TclGrowParseTokenArray(parsePtr, growthNeeded);
@@ -526,12 +553,52 @@ Tcl_ParseCommand(
tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;
}
- /* Parse the whitespace between words. */
+ /*
+ * Do two additional checks: (a) make sure we're really at the end of
+ * a word (there might have been garbage left after a quoted or braced
+ * word), and (b) check for the end of the command.
+ */
scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type);
- src += scanned;
- numBytes -= scanned;
+ if (scanned) {
+ src += scanned;
+ numBytes -= scanned;
+ continue;
+ }
+
+ if (numBytes == 0) {
+ parsePtr->term = src;
+ break;
+ }
+ if ((type & terminators) != 0) {
+ parsePtr->term = src;
+ src++;
+ break;
+ }
+ if (src[-1] == '"') {
+ if (interp != NULL) {
+ Tcl_SetResult(interp, "extra characters after close-quote",
+ TCL_STATIC);
+ }
+ parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA;
+ } else {
+ if (interp != NULL) {
+ Tcl_SetResult(interp, "extra characters after close-brace",
+ TCL_STATIC);
+ }
+ parsePtr->errorType = TCL_PARSE_BRACE_EXTRA;
+ }
+ parsePtr->term = src;
+ goto error;
}
+
+ parsePtr->commandSize = src - parsePtr->commandStart;
+ return TCL_OK;
+
+ error:
+ Tcl_FreeParse(parsePtr);
+ parsePtr->commandSize = parsePtr->end - parsePtr->commandStart;
+ return TCL_ERROR;
}
/*
@@ -553,7 +620,7 @@ Tcl_ParseCommand(
int
TclIsSpaceProc(
- int byte)
+ char byte)
{
return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n';
}
@@ -582,7 +649,7 @@ TclIsSpaceProc(
int
TclIsBareword(
- int byte)
+ char byte)
{
if (byte < '0' || byte > 'z') {
return 0;
@@ -622,14 +689,14 @@ TclIsBareword(
static int
ParseWhiteSpace(
const char *src, /* First character to parse. */
- int numBytes, /* Max number of bytes to scan. */
+ register int numBytes, /* Max number of bytes to scan. */
int *incompletePtr, /* Set this boolean memory to true if parsing
* indicates an incomplete command. */
char *typePtr) /* Points to location to store character type
* of character that ends run of whitespace */
{
- char type = TYPE_NORMAL;
- const char *p = src;
+ register char type = TYPE_NORMAL;
+ register const char *p = src;
while (1) {
while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) {
@@ -646,7 +713,7 @@ ParseWhiteSpace(
if (p[1] != '\n') {
break;
}
- p += 2;
+ p+=2;
if (--numBytes == 0) {
*incompletePtr = 1;
break;
@@ -673,37 +740,28 @@ ParseWhiteSpace(
*----------------------------------------------------------------------
*/
-static int
-ParseAllWhiteSpace(
+int
+TclParseAllWhiteSpace(
const char *src, /* First character to parse. */
- int numBytes, /* Max number of byes to scan */
- int *incompletePtr) /* Set true if parse is incomplete. */
+ int numBytes) /* Max number of byes to scan */
{
+ int dummy;
char type;
const char *p = src;
do {
- int scanned = ParseWhiteSpace(p, numBytes, incompletePtr, &type);
+ int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type);
p += scanned;
numBytes -= scanned;
} while (numBytes && (*p == '\n') && (p++, --numBytes));
return (p-src);
}
-
-int
-TclParseAllWhiteSpace(
- const char *src, /* First character to parse. */
- int numBytes) /* Max number of byes to scan */
-{
- int dummy;
- return ParseAllWhiteSpace(src, numBytes, &dummy);
-}
/*
*----------------------------------------------------------------------
*
- * ParseHex --
+ * TclParseHex --
*
* Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing
* \x and \u escape sequences). At most numBytes bytes are scanned.
@@ -723,24 +781,24 @@ TclParseAllWhiteSpace(
*/
int
-ParseHex(
+TclParseHex(
const char *src, /* First character to parse. */
int numBytes, /* Max number of byes to scan */
- int *resultPtr) /* Points to storage provided by caller where
- * the character resulting from the
+ Tcl_UniChar *resultPtr) /* Points to storage provided by caller where
+ * the Tcl_UniChar resulting from the
* conversion is to be written. */
{
- int result = 0;
- const char *p = src;
+ Tcl_UniChar result = 0;
+ register const char *p = src;
while (numBytes--) {
unsigned char digit = UCHAR(*p);
- if (!isxdigit(digit) || (result > 0x10FFF)) {
+ if (!isxdigit(digit)) {
break;
}
- p++;
+ ++p;
result <<= 4;
if (digit >= 'a') {
@@ -765,34 +823,34 @@ ParseHex(
* sequence as defined by Tcl's parsing rules.
*
* Results:
- * Records at readPtr the number of bytes making up the backslash
- * sequence. Records at dst the UTF-8 encoded equivalent of that
- * backslash sequence. Returns the number of bytes written to dst, at
- * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
- * are not needed, but the return value is the same either way.
+ * Records at readPtr the number of bytes making up the backslash
+ * sequence. Records at dst the UTF-8 encoded equivalent of that
+ * backslash sequence. Returns the number of bytes written to dst, at
+ * most TCL_UTF_MAX. Either readPtr or dst may be NULL, if the results
+ * are not needed, but the return value is the same either way.
*
* Side effects:
- * None.
+ * None.
*
*----------------------------------------------------------------------
*/
int
TclParseBackslash(
- const char *src, /* Points to the backslash character of a
+ const char *src, /* Points to the backslash character of a a
* backslash sequence. */
int numBytes, /* Max number of bytes to scan. */
int *readPtr, /* NULL, or points to storage where the number
* of bytes scanned should be written. */
char *dst) /* NULL, or points to buffer where the UTF-8
* encoding of the backslash sequence is to be
- * written. At most 4 bytes will be written there. */
+ * written. At most TCL_UTF_MAX bytes will be
+ * written there. */
{
- const char *p = src+1;
- int unichar;
- int result;
+ register const char *p = src+1;
+ Tcl_UniChar result;
int count;
- char buf[4] = "";
+ char buf[TCL_UTF_MAX];
if (numBytes == 0) {
if (readPtr != NULL) {
@@ -818,7 +876,7 @@ TclParseBackslash(
count = 2;
switch (*p) {
/*
- * Note: in the conversions below, use absolute values (e.g., 0xA)
+ * Note: in the conversions below, use absolute values (e.g., 0xa)
* rather than symbolic values (e.g. \n) that get converted by the
* compiler. It's possible that compilers on some platforms will do
* the symbolic conversions differently, which could result in
@@ -832,25 +890,25 @@ TclParseBackslash(
result = 0x8;
break;
case 'f':
- result = 0xC;
+ result = 0xc;
break;
case 'n':
- result = 0xA;
+ result = 0xa;
break;
case 'r':
- result = 0xD;
+ result = 0xd;
break;
case 't':
result = 0x9;
break;
case 'v':
- result = 0xB;
+ result = 0xb;
break;
case 'x':
- count += ParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);
+ count += TclParseHex(p+1, numBytes-2, &result);
if (count == 2) {
/*
- * No hexdigits -> This is just "x".
+ * No hexadigits -> This is just "x".
*/
result = 'x';
@@ -858,38 +916,16 @@ TclParseBackslash(
/*
* Keep only the last byte (2 hex digits).
*/
- result = UCHAR(result);
+ result = (unsigned char) result;
}
break;
case 'u':
- count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);
+ count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);
if (count == 2) {
/*
- * No hexdigits -> This is just "u".
+ * No hexadigits -> This is just "u".
*/
result = 'u';
- } else if (((result & 0xFC00) == 0xD800) && (count == 6)
- && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) {
- /* If high surrogate is immediately followed by a low surrogate
- * escape, combine them into one character. */
- int low;
- int count2 = ParseHex(p+7, 4, &low);
- if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) {
- result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
- count += count2 + 2;
- }
- }
- break;
- case 'U':
- count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
- if (count == 2) {
- /*
- * No hexdigits -> This is just "U".
- */
- result = 'U';
- } else if ((result | 0x7FF) == 0xDFFF) {
- /* Upper or lower surrogate, not allowed in this syntax. */
- result = 0xFFFD;
}
break;
case '\n':
@@ -910,21 +946,21 @@ TclParseBackslash(
*/
if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */
- result = *p - '0';
+ result = (unsigned char)(*p - '0');
p++;
if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */
|| (UCHAR(*p) >= '8')) {
break;
}
count = 3;
- result = (result << 3) + (*p - '0');
+ result = (unsigned char)((result << 3) + (*p - '0'));
p++;
if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */
- || (UCHAR(*p) >= '8') || (result >= 0x20)) {
+ || (UCHAR(*p) >= '8')) {
break;
}
count = 4;
- result = UCHAR((result << 3) + (*p - '0'));
+ result = (unsigned char)((result << 3) + (*p - '0'));
break;
}
@@ -936,15 +972,14 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */
} else {
- char utfBytes[8];
+ char utfBytes[TCL_UTF_MAX];
- memcpy(utfBytes, p, numBytes - 1);
+ memcpy(utfBytes, p, (size_t) (numBytes - 1));
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = Tcl_UtfToUniChar(utfBytes, &result) + 1;
}
- result = unichar;
break;
}
@@ -952,12 +987,7 @@ TclParseBackslash(
if (readPtr != NULL) {
*readPtr = count;
}
- count = Tcl_UniCharToUtf(result, dst);
- if ((result >= 0xD800) && (count < 3)) {
- /* Special case for handling high surrogates. */
- count += Tcl_UniCharToUtf(-1, dst + count);
- }
- return count;
+ return Tcl_UniCharToUtf((int) result, dst);
}
/*
@@ -969,11 +999,11 @@ TclParseBackslash(
* defined by Tcl's parsing rules.
*
* Results:
- * Records in parsePtr information about the parse. Returns the number of
- * bytes consumed.
+ * Records in parsePtr information about the parse. Returns the number of
+ * bytes consumed.
*
* Side effects:
- * None.
+ * None.
*
*----------------------------------------------------------------------
*/
@@ -981,18 +1011,23 @@ TclParseBackslash(
static int
ParseComment(
const char *src, /* First character to parse. */
- int numBytes, /* Max number of bytes to scan. */
+ register int numBytes, /* Max number of bytes to scan. */
Tcl_Parse *parsePtr) /* Information about parse in progress.
* Updated if parsing indicates an incomplete
* command. */
{
- const char *p = src;
- int incomplete = parsePtr->incomplete;
+ register const char *p = src;
while (numBytes) {
- int scanned = ParseAllWhiteSpace(p, numBytes, &incomplete);
- p += scanned;
- numBytes -= scanned;
+ char type;
+ int scanned;
+
+ do {
+ scanned = ParseWhiteSpace(p, numBytes,
+ &parsePtr->incomplete, &type);
+ p += scanned;
+ numBytes -= scanned;
+ } while (numBytes && (*p == '\n') && (p++,numBytes--));
if ((numBytes == 0) || (*p != '#')) {
break;
@@ -1001,28 +1036,35 @@ ParseComment(
parsePtr->commentStart = p;
}
- p++;
- numBytes--;
while (numBytes) {
- if (*p == '\n') {
- p++;
- numBytes--;
- break;
- }
if (*p == '\\') {
+ scanned = ParseWhiteSpace(p, numBytes, &parsePtr->incomplete,
+ &type);
+ if (scanned) {
+ p += scanned;
+ numBytes -= scanned;
+ } else {
+ /*
+ * General backslash substitution in comments isn't part
+ * of the formal spec, but test parse-15.47 and history
+ * indicate that it has been the de facto rule. Don't
+ * change it now.
+ */
+
+ TclParseBackslash(p, numBytes, &scanned, NULL);
+ p += scanned;
+ numBytes -= scanned;
+ }
+ } else {
p++;
numBytes--;
- if (numBytes == 0) {
+ if (p[-1] == '\n') {
break;
}
}
- incomplete = (*p == '\n');
- p++;
- numBytes--;
}
parsePtr->commentSize = p - parsePtr->commentStart;
}
- parsePtr->incomplete = incomplete;
return (p - src);
}
@@ -1053,8 +1095,8 @@ ParseComment(
static int
ParseTokens(
- const char *src, /* First character to parse. */
- int numBytes, /* Max number of bytes to scan. */
+ register const char *src, /* First character to parse. */
+ register int numBytes, /* Max number of bytes to scan. */
int mask, /* Specifies when to stop parsing. The parse
* stops at the first unquoted character whose
* CHAR_TYPE contains any of the bits in
@@ -1114,7 +1156,7 @@ ParseTokens(
}
/*
- * This is a variable reference. Call Tcl_ParseVarName to do all
+ * This is a variable reference. Call Tcl_ParseVarName to do all
* the dirty work of parsing the name.
*/
@@ -1138,17 +1180,16 @@ ParseTokens(
}
/*
- * Command substitution. Call Tcl_ParseCommand recursively (and
+ * Command substitution. Call Tcl_ParseCommand recursively (and
* repeatedly) to parse the nested command(s), then throw away the
* parse information.
*/
src++;
numBytes--;
- nestedPtr = (Tcl_Parse *)TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
+ nestedPtr = (Tcl_Parse *)
+ TclStackAlloc(parsePtr->interp, sizeof(Tcl_Parse));
while (1) {
- const char *curEnd;
-
if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1,
nestedPtr) != TCL_OK) {
parsePtr->errorType = nestedPtr->errorType;
@@ -1157,9 +1198,8 @@ ParseTokens(
TclStackFree(parsePtr->interp, nestedPtr);
return TCL_ERROR;
}
- curEnd = src + numBytes;
src = nestedPtr->commandStart + nestedPtr->commandSize;
- numBytes = curEnd - src;
+ numBytes = parsePtr->end - src;
Tcl_FreeParse(nestedPtr);
/*
@@ -1175,8 +1215,8 @@ ParseTokens(
}
if (numBytes == 0) {
if (parsePtr->interp != NULL) {
- Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj(
- "missing close-bracket", -1));
+ Tcl_SetResult(parsePtr->interp,
+ "missing close-bracket", TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
parsePtr->term = tokenPtr->start;
@@ -1294,7 +1334,7 @@ Tcl_FreeParse(
* call to Tcl_ParseCommand. */
{
if (parsePtr->tokenPtr != parsePtr->staticTokens) {
- ckfree(parsePtr->tokenPtr);
+ ckfree((char *) parsePtr->tokenPtr);
parsePtr->tokenPtr = parsePtr->staticTokens;
}
}
@@ -1332,7 +1372,7 @@ Tcl_ParseVarName(
* NULL, then no error message is provided. */
const char *start, /* Start of variable substitution string.
* First character must be "$". */
- int numBytes, /* Total number of bytes in string. If < 0,
+ register int numBytes, /* Total number of bytes in string. If < 0,
* the string consists of all bytes up to the
* first null character. */
Tcl_Parse *parsePtr, /* Structure to fill in with information about
@@ -1343,19 +1383,20 @@ Tcl_ParseVarName(
* reinitialize it. */
{
Tcl_Token *tokenPtr;
- const char *src;
+ register const char *src;
int varIndex;
unsigned array;
- if (numBytes < 0 && start) {
+ if ((numBytes == 0) || (start == NULL)) {
+ return TCL_ERROR;
+ }
+ if (numBytes < 0) {
numBytes = strlen(start);
}
+
if (!append) {
TclParseInit(interp, start, numBytes, parsePtr);
}
- if ((numBytes == 0) || (start == NULL)) {
- return TCL_ERROR;
- }
/*
* Generate one token for the variable, an additional token for the name,
@@ -1408,8 +1449,8 @@ Tcl_ParseVarName(
}
if (numBytes == 0) {
if (parsePtr->interp != NULL) {
- Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj(
- "missing close-brace for variable name", -1));
+ Tcl_SetResult(parsePtr->interp,
+ "missing close-brace for variable name", TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;
parsePtr->term = tokenPtr->start-1;
@@ -1466,8 +1507,8 @@ Tcl_ParseVarName(
}
if ((parsePtr->term == src+numBytes) || (*parsePtr->term != ')')){
if (parsePtr->interp != NULL) {
- Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj(
- "missing )", -1));
+ Tcl_SetResult(parsePtr->interp, "missing )",
+ TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_PAREN;
parsePtr->term = src;
@@ -1524,15 +1565,16 @@ Tcl_ParseVarName(
const char *
Tcl_ParseVar(
Tcl_Interp *interp, /* Context for looking up variable. */
- const char *start, /* Start of variable substitution. First
+ register const char *start, /* Start of variable substitution. First
* character must be "$". */
const char **termPtr) /* If non-NULL, points to word to fill in with
* character just after last one in the
* variable specifier. */
{
- Tcl_Obj *objPtr;
+ register Tcl_Obj *objPtr;
int code;
- Tcl_Parse *parsePtr = (Tcl_Parse *)TclStackAlloc(interp, sizeof(Tcl_Parse));
+ Tcl_Parse *parsePtr = (Tcl_Parse *)
+ TclStackAlloc(interp, sizeof(Tcl_Parse));
if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) {
TclStackFree(interp, parsePtr);
@@ -1564,13 +1606,16 @@ Tcl_ParseVar(
* At this point we should have an object containing the value of a
* variable. Just return the string from that object.
*
- * Since TclSubstTokens above returned TCL_OK, we know that objPtr
- * is shared. It is in both the interp result and the value of the
- * variable. Returning the string relies on that to be true.
+ * This should have returned the object for the user to manage, but
+ * instead we have some weak reference to the string value in the object,
+ * which is why we make sure the object exists after resetting the result.
+ * This isn't ideal, but it's the best we can do with the current
+ * documented interface. -- hobbs
*/
- assert( Tcl_IsShared(objPtr) );
-
+ if (!Tcl_IsShared(objPtr)) {
+ Tcl_IncrRefCount(objPtr);
+ }
Tcl_ResetResult(interp);
return TclGetString(objPtr);
}
@@ -1609,11 +1654,11 @@ Tcl_ParseBraces(
* NULL, then no error message is provided. */
const char *start, /* Start of string enclosed in braces. The
* first character must be {'. */
- int numBytes, /* Total number of bytes in string. If < 0,
+ register int numBytes, /* Total number of bytes in string. If < 0,
* the string consists of all bytes up to the
* first null character. */
- Tcl_Parse *parsePtr,
- /* Structure to fill in with information about
+ register Tcl_Parse *parsePtr,
+ /* Structure to fill in with information about
* the string. */
int append, /* Non-zero means append tokens to existing
* information in parsePtr; zero means ignore
@@ -1625,18 +1670,19 @@ Tcl_ParseBraces(
* successful. */
{
Tcl_Token *tokenPtr;
- const char *src;
+ register const char *src;
int startIndex, level, length;
- if (numBytes < 0 && start) {
+ if ((numBytes == 0) || (start == NULL)) {
+ return TCL_ERROR;
+ }
+ if (numBytes < 0) {
numBytes = strlen(start);
}
+
if (!append) {
TclParseInit(interp, start, numBytes, parsePtr);
}
- if ((numBytes == 0) || (start == NULL)) {
- return TCL_ERROR;
- }
src = start;
startIndex = parsePtr->numTokens;
@@ -1739,8 +1785,7 @@ Tcl_ParseBraces(
goto error;
}
- Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj(
- "missing close-brace", -1));
+ Tcl_SetResult(parsePtr->interp, "missing close-brace", TCL_STATIC);
/*
* Guess if the problem is due to comments by searching the source string
@@ -1750,7 +1795,7 @@ Tcl_ParseBraces(
*/
{
- int openBrace = 0;
+ register int openBrace = 0;
while (--src > start) {
switch (*src) {
@@ -1761,9 +1806,9 @@ Tcl_ParseBraces(
openBrace = 0;
break;
case '#' :
- if (openBrace && TclIsSpaceProcM(src[-1])) {
- Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp),
- ": possible unbalanced brace in comment", -1);
+ if (openBrace && TclIsSpaceProc(src[-1])) {
+ Tcl_AppendResult(parsePtr->interp,
+ ": possible unbalanced brace in comment", NULL);
goto error;
}
break;
@@ -1810,11 +1855,11 @@ Tcl_ParseQuotedString(
* NULL, then no error message is provided. */
const char *start, /* Start of the quoted string. The first
* character must be '"'. */
- int numBytes, /* Total number of bytes in string. If < 0,
+ register int numBytes, /* Total number of bytes in string. If < 0,
* the string consists of all bytes up to the
* first null character. */
- Tcl_Parse *parsePtr,
- /* Structure to fill in with information about
+ register Tcl_Parse *parsePtr,
+ /* Structure to fill in with information about
* the string. */
int append, /* Non-zero means append tokens to existing
* information in parsePtr; zero means ignore
@@ -1825,15 +1870,16 @@ Tcl_ParseQuotedString(
* the quoted string's terminating close-quote
* if the parse succeeds. */
{
- if (numBytes < 0 && start) {
+ if ((numBytes == 0) || (start == NULL)) {
+ return TCL_ERROR;
+ }
+ if (numBytes < 0) {
numBytes = strlen(start);
}
+
if (!append) {
TclParseInit(interp, start, numBytes, parsePtr);
}
- if ((numBytes == 0) || (start == NULL)) {
- return TCL_ERROR;
- }
if (TCL_OK != ParseTokens(start+1, numBytes-1, TYPE_QUOTE, TCL_SUBST_ALL,
parsePtr)) {
@@ -1841,8 +1887,7 @@ Tcl_ParseQuotedString(
}
if (*parsePtr->term != '"') {
if (parsePtr->interp != NULL) {
- Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj(
- "missing \"", -1));
+ Tcl_SetResult(parsePtr->interp, "missing \"", TCL_STATIC);
}
parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;
parsePtr->term = start;
@@ -1862,42 +1907,33 @@ Tcl_ParseQuotedString(
/*
*----------------------------------------------------------------------
*
- * TclSubstParse --
+ * Tcl_SubstObj --
*
- * Token parser used by the [subst] command. Parses the string made up of
- * 'numBytes' bytes starting at 'bytes'. Parsing is controlled by the
- * flags argument to provide support for the -nobackslashes, -nocommands,
- * and -novariables options, as represented by the flag values
- * TCL_SUBST_BACKSLASHES, TCL_SUBST_COMMANDS, TCL_SUBST_VARIABLES.
+ * This function performs the substitutions specified on the given string
+ * as described in the user documentation for the "subst" Tcl command.
*
* Results:
- * None.
+ * A Tcl_Obj* containing the substituted string, or NULL to indicate that
+ * an error occurred.
*
* Side effects:
- * The Tcl_Parse struct '*parsePtr' is filled with parse results.
- * The caller is expected to eventually call Tcl_FreeParse() to properly
- * cleanup the value written there.
- *
- * If a parse error occurs, the Tcl_InterpState value '*statePtr' is
- * filled with the state created by that error. When *statePtr is written
- * to, the caller is expected to make the required calls to either
- * Tcl_RestoreInterpState() or Tcl_DiscardInterpState() to dispose of the
- * value written there.
+ * See the user documentation.
*
*----------------------------------------------------------------------
*/
-void
-TclSubstParse(
- Tcl_Interp *interp,
- const char *bytes,
- int numBytes,
- int flags,
- Tcl_Parse *parsePtr,
- Tcl_InterpState *statePtr)
+Tcl_Obj *
+Tcl_SubstObj(
+ Tcl_Interp *interp, /* Interpreter in which substitution occurs */
+ Tcl_Obj *objPtr, /* The value to be substituted. */
+ int flags) /* What substitutions to do. */
{
- int length = numBytes;
- const char *p = bytes;
+ int length, tokensLeft, code;
+ Tcl_Token *endTokenPtr;
+ Tcl_Obj *result, *errMsg = NULL;
+ const char *p = TclGetStringFromObj(objPtr, &length);
+ Tcl_Parse *parsePtr = (Tcl_Parse *)
+ TclStackAlloc(interp, sizeof(Tcl_Parse));
TclParseInit(interp, p, length, parsePtr);
@@ -1909,11 +1945,12 @@ TclSubstParse(
if (TCL_OK != ParseTokens(p, length, /* mask */ 0, flags, parsePtr)) {
/*
- * There was a parse error. Save the interpreter state for possible
- * error reporting later.
+ * There was a parse error. Save the error message for possible
+ * reporting later.
*/
- *statePtr = Tcl_SaveInterpState(interp, TCL_ERROR);
+ errMsg = Tcl_GetObjResult(interp);
+ Tcl_IncrRefCount(errMsg);
/*
* We need to re-parse to get the portion of the string we can [subst]
@@ -1979,10 +2016,10 @@ TclSubstParse(
parsePtr->tokenPtr + parsePtr->numTokens - 2;
if (varTokenPtr->type != TCL_TOKEN_VARIABLE) {
- Tcl_Panic("TclSubstParse: programming error");
+ Tcl_Panic("Tcl_SubstObj: programming error");
}
if (varTokenPtr[1].type != TCL_TOKEN_TEXT) {
- Tcl_Panic("TclSubstParse: programming error");
+ Tcl_Panic("Tcl_SubstObj: programming error");
}
parsePtr->numTokens -= 2;
}
@@ -2056,8 +2093,63 @@ TclSubstParse(
break;
default:
- Tcl_Panic("bad parse in TclSubstParse: %c", p[length]);
+ Tcl_Panic("bad parse in Tcl_SubstObj: %c", p[length]);
+ }
+ }
+
+ /*
+ * Next, substitute the parsed tokens just as in normal Tcl evaluation.
+ */
+
+ endTokenPtr = parsePtr->tokenPtr + parsePtr->numTokens;
+ tokensLeft = parsePtr->numTokens;
+ code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
+ &tokensLeft, 1, NULL, NULL);
+ if (code == TCL_OK) {
+ Tcl_FreeParse(parsePtr);
+ TclStackFree(interp, parsePtr);
+ if (errMsg != NULL) {
+ Tcl_SetObjResult(interp, errMsg);
+ Tcl_DecrRefCount(errMsg);
+ return NULL;
+ }
+ return Tcl_GetObjResult(interp);
+ }
+
+ result = Tcl_NewObj();
+ while (1) {
+ switch (code) {
+ case TCL_ERROR:
+ Tcl_FreeParse(parsePtr);
+ TclStackFree(interp, parsePtr);
+ Tcl_DecrRefCount(result);
+ if (errMsg != NULL) {
+ Tcl_DecrRefCount(errMsg);
+ }
+ return NULL;
+ case TCL_BREAK:
+ tokensLeft = 0; /* Halt substitution */
+ default:
+ Tcl_AppendObjToObj(result, Tcl_GetObjResult(interp));
}
+
+ if (tokensLeft == 0) {
+ Tcl_FreeParse(parsePtr);
+ TclStackFree(interp, parsePtr);
+ if (errMsg != NULL) {
+ if (code != TCL_BREAK) {
+ Tcl_DecrRefCount(result);
+ Tcl_SetObjResult(interp, errMsg);
+ Tcl_DecrRefCount(errMsg);
+ return NULL;
+ }
+ Tcl_DecrRefCount(errMsg);
+ }
+ return result;
+ }
+
+ code = TclSubstTokens(interp, endTokenPtr - tokensLeft, tokensLeft,
+ &tokensLeft, 1, NULL, NULL);
}
}
@@ -2072,13 +2164,13 @@ TclSubstParse(
* non-TCL_OK completion code arises.
*
* Results:
- * The return value is a standard Tcl completion code. The result in
- * interp is the substituted value, or an error message if TCL_ERROR is
- * returned. If tokensLeftPtr is not NULL, then it points to an int where
- * the number of tokens remaining to be processed is written.
+ * The return value is a standard Tcl completion code. The result in
+ * interp is the substituted value, or an error message if TCL_ERROR is
+ * returned. If tokensLeftPtr is not NULL, then it points to an int where
+ * the number of tokens remaining to be processed is written.
*
* Side effects:
- * Can be anything, depending on the types of substitution done.
+ * Can be anything, depending on the types of substitution done.
*
*----------------------------------------------------------------------
*/
@@ -2096,30 +2188,29 @@ TclSubstTokens(
* integer representing the number of tokens
* left to be substituted will be written */
int line, /* The line the script starts on. */
- int *clNextOuter, /* Information about an outer context for */
- const char *outerScript) /* continuation line data. This is set by
- * EvalEx() to properly handle [...]-nested
- * commands. The 'outerScript' refers to the
- * most-outer script containing the embedded
- * command, which is refered to by 'script'.
- * The 'clNextOuter' refers to the current
- * entry in the table of continuation lines in
- * this "main script", and the character
- * offsets are relative to the 'outerScript'
- * as well.
- *
- * If outerScript == script, then this call is
- * for words in the outer-most script or
- * command. See Tcl_EvalEx and TclEvalObjEx
- * for the places generating arguments for
- * which this is true. */
+ int* clNextOuter, /* Information about an outer context for */
+ CONST char* outerScript) /* continuation line data. This is set by
+ * EvalEx() to properly handle [...]-nested
+ * commands. The 'outerScript' refers to the
+ * most-outer script containing the embedded
+ * command, which is refered to by 'script'. The
+ * 'clNextOuter' refers to the current entry in
+ * the table of continuation lines in this
+ * "master script", and the character offsets are
+ * relative to the 'outerScript' as well.
+ *
+ * If outerScript == script, then this call is for
+ * words in the outer-most script/command. See
+ * Tcl_EvalEx() and TclEvalObjEx() for the places
+ * generating arguments for which this is true.
+ */
{
Tcl_Obj *result;
int code = TCL_OK;
#define NUM_STATIC_POS 20
int isLiteral, maxNumCL, numCL, i, adjust;
- int *clPosition = NULL;
- Interp *iPtr = (Interp *) interp;
+ int* clPosition = NULL;
+ Interp* iPtr = (Interp*) interp;
int inFile = iPtr->evalFlags & TCL_EVAL_FILE;
/*
@@ -2133,27 +2224,27 @@ TclSubstTokens(
*/
/*
- * For the handling of continuation lines in literals, first check if
- * this is actually a literal. If not then forego the additional
- * processing. Otherwise preallocate a small table to store the
- * locations of all continuation lines we find in this literal, if any.
- * The table is extended if needed.
+ * For the handling of continuation lines in literals we first check if
+ * this is actually a literal. For if not we can forego the additional
+ * processing. Otherwise we pre-allocate a small table to store the
+ * locations of all continuation lines we find in this literal, if
+ * any. The table is extended if needed.
*/
- numCL = 0;
- maxNumCL = 0;
+ numCL = 0;
+ maxNumCL = 0;
isLiteral = 1;
for (i=0 ; i < count; i++) {
- if ((tokenPtr[i].type != TCL_TOKEN_TEXT)
- && (tokenPtr[i].type != TCL_TOKEN_BS)) {
+ if ((tokenPtr[i].type != TCL_TOKEN_TEXT) &&
+ (tokenPtr[i].type != TCL_TOKEN_BS)) {
isLiteral = 0;
break;
}
}
if (isLiteral) {
- maxNumCL = NUM_STATIC_POS;
- clPosition = (int *)ckalloc(maxNumCL * sizeof(int));
+ maxNumCL = NUM_STATIC_POS;
+ clPosition = (int*) ckalloc (maxNumCL*sizeof(int));
}
adjust = 0;
@@ -2162,7 +2253,7 @@ TclSubstTokens(
Tcl_Obj *appendObj = NULL;
const char *append = NULL;
int appendByteLength = 0;
- char utfCharBytes[4] = "";
+ char utfCharBytes[TCL_UTF_MAX];
switch (tokenPtr->type) {
case TCL_TOKEN_TEXT:
@@ -2174,7 +2265,6 @@ TclSubstTokens(
appendByteLength = TclParseBackslash(tokenPtr->start,
tokenPtr->size, NULL, utfCharBytes);
append = utfCharBytes;
-
/*
* If the backslash sequence we found is in a literal, and
* represented a continuation line, we compute and store its
@@ -2190,31 +2280,31 @@ TclSubstTokens(
* correction.
*/
- if ((appendByteLength == 1) && (utfCharBytes[0] == ' ')
- && (tokenPtr->start[1] == '\n')) {
+ if ((appendByteLength == 1) && (utfCharBytes[0] == ' ') &&
+ (tokenPtr->start[1] == '\n')) {
if (isLiteral) {
int clPos;
-
if (result == 0) {
clPos = 0;
} else {
- TclGetStringFromObj(result, &clPos);
+ Tcl_GetStringFromObj(result, &clPos);
}
if (numCL >= maxNumCL) {
maxNumCL *= 2;
- clPosition = (int *)ckrealloc(clPosition,
- maxNumCL * sizeof(int));
+ clPosition = (int*) ckrealloc ((char*)clPosition,
+ maxNumCL*sizeof(int));
}
clPosition[numCL] = clPos;
- numCL++;
+ numCL ++;
}
- adjust++;
+ adjust ++;
}
break;
case TCL_TOKEN_COMMAND: {
- /* TIP #280: Transfer line information to nested command */
+ Interp *iPtr = (Interp *) interp;
+
iPtr->numLevels++;
code = TclInterpReady(interp);
if (code == TCL_OK) {
@@ -2223,27 +2313,21 @@ TclSubstTokens(
*/
int theline;
-
- TclAdvanceContinuations(&line, &clNextOuter,
- tokenPtr->start - outerScript);
+ TclAdvanceContinuations (&line, &clNextOuter,
+ tokenPtr->start - outerScript);
theline = line + adjust;
+ /* TIP #280: Transfer line information to nested command */
code = TclEvalEx(interp, tokenPtr->start+1, tokenPtr->size-2,
0, theline, clNextOuter, outerScript);
-
- TclAdvanceLines(&line, tokenPtr->start+1,
- tokenPtr->start + tokenPtr->size - 1);
-
/*
* Restore flag reset by nested eval for future bracketed
* commands and their cmdframe setup
*/
-
- if (inFile) {
+ if (inFile) {
iPtr->evalFlags |= TCL_EVAL_FILE;
}
}
iPtr->numLevels--;
- TclResetCancellation(interp, 0);
appendObj = Tcl_GetObjResult(interp);
break;
}
@@ -2342,7 +2426,6 @@ TclSubstTokens(
if (code != TCL_ERROR) { /* Keep error message in result! */
if (result != NULL) {
Tcl_SetObjResult(interp, result);
-
/*
* If the code found continuation lines (which implies that this
* word is a literal), then we store the accumulated table of
@@ -2361,7 +2444,7 @@ TclSubstTokens(
*/
if (maxNumCL) {
- ckfree(clPosition);
+ ckfree ((char*) clPosition);
}
} else {
Tcl_ResetResult(interp);
@@ -2396,7 +2479,7 @@ TclSubstTokens(
*----------------------------------------------------------------------
*/
-static int
+static inline int
CommandComplete(
const char *script, /* Script to check. */
int numBytes) /* Number of bytes in script. */
@@ -2474,7 +2557,7 @@ TclObjCommandComplete(
* check. */
{
int length;
- const char *script = TclGetStringFromObj(objPtr, &length);
+ const char *script = Tcl_GetStringFromObj(objPtr, &length);
return CommandComplete(script, length);
}