diff options
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r-- | generic/tclParse.c | 218 |
1 files changed, 76 insertions, 142 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 74b02ce..a31d099 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -19,12 +19,7 @@ /* * The following table provides parsing information about each possible 8-bit - * character. The table is designed to be referenced with either signed or - * unsigned characters, so it has 384 entries. The first 128 entries - * correspond to negative character values, the next 256 correspond to - * positive character values. The last 128 entries are identical to the first - * 128. The table is always indexed with a 128-byte offset (the 128th entry - * corresponds to a character value of 0). + * character. The table is designed to be referenced with unsigned characters. * * The macro CHAR_TYPE is used to index into the table and return information * about its character argument. The following return values are defined. @@ -44,42 +39,6 @@ */ const char tclCharTypeTable[] = { - /* - * Negative character values, from -128 to -1: - */ - - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, - TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, /* * Positive character values, from 0-127: @@ -167,6 +126,8 @@ static int ParseTokens(const char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); static int ParseWhiteSpace(const char *src, int numBytes, int *incompletePtr, char *typePtr); +static int ParseAllWhiteSpace(const char *src, int numBytes, + int *incompletePtr); /* *---------------------------------------------------------------------- @@ -298,9 +259,43 @@ Tcl_ParseCommand( */ parsePtr->commandStart = src; + type = CHAR_TYPE(*src); + scanned = 1; /* Can't have missing whitepsace before first word. */ while (1) { int expandWord = 0; + /* Are we at command termination? */ + + if ((numBytes == 0) || (type & terminators) != 0) { + parsePtr->term = src; + parsePtr->commandSize = src + (numBytes != 0) + - parsePtr->commandStart; + return TCL_OK; + } + + /* Are we missing white space after previous word? */ + + if (scanned == 0) { + if (src[-1] == '"') { + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "extra characters after close-quote", -1)); + } + parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; + } else { + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "extra characters after close-brace", -1)); + } + parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; + } + parsePtr->term = src; + error: + Tcl_FreeParse(parsePtr); + parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; + return TCL_ERROR; + } + /* * Create the token for the word. */ @@ -310,23 +305,6 @@ Tcl_ParseCommand( tokenPtr = &parsePtr->tokenPtr[wordIndex]; tokenPtr->type = TCL_TOKEN_WORD; - /* - * Skip white space before the word. Also skip a backslash-newline - * sequence: it should be treated just like white space. - */ - - scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); - src += scanned; - numBytes -= scanned; - if (numBytes == 0) { - parsePtr->term = src; - break; - } - if ((type & terminators) != 0) { - parsePtr->term = src; - src++; - break; - } tokenPtr->start = src; parsePtr->numTokens++; parsePtr->numWords++; @@ -546,52 +524,12 @@ Tcl_ParseCommand( tokenPtr->type = TCL_TOKEN_SIMPLE_WORD; } - /* - * Do two additional checks: (a) make sure we're really at the end of - * a word (there might have been garbage left after a quoted or braced - * word), and (b) check for the end of the command. - */ + /* Parse the whitespace between words. */ scanned = ParseWhiteSpace(src,numBytes, &parsePtr->incomplete, &type); - if (scanned) { - src += scanned; - numBytes -= scanned; - continue; - } - - if (numBytes == 0) { - parsePtr->term = src; - break; - } - if ((type & terminators) != 0) { - parsePtr->term = src; - src++; - break; - } - if (src[-1] == '"') { - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-quote", -1)); - } - parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; - } else { - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-brace", -1)); - } - parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; - } - parsePtr->term = src; - goto error; + src += scanned; + numBytes -= scanned; } - - parsePtr->commandSize = src - parsePtr->commandStart; - return TCL_OK; - - error: - Tcl_FreeParse(parsePtr); - parsePtr->commandSize = parsePtr->end - parsePtr->commandStart; - return TCL_ERROR; } /* @@ -733,23 +671,32 @@ ParseWhiteSpace( *---------------------------------------------------------------------- */ -int -TclParseAllWhiteSpace( +static int +ParseAllWhiteSpace( const char *src, /* First character to parse. */ - int numBytes) /* Max number of byes to scan */ + int numBytes, /* Max number of byes to scan */ + int *incompletePtr) /* Set true if parse is incomplete. */ { - int dummy; char type; const char *p = src; do { - int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); + int scanned = ParseWhiteSpace(p, numBytes, incompletePtr, &type); p += scanned; numBytes -= scanned; } while (numBytes && (*p == '\n') && (p++, --numBytes)); return (p-src); } + +int +TclParseAllWhiteSpace( + const char *src, /* First character to parse. */ + int numBytes) /* Max number of byes to scan */ +{ + int dummy; + return ParseAllWhiteSpace(src, numBytes, &dummy); +} /* *---------------------------------------------------------------------- @@ -902,7 +849,7 @@ TclParseBackslash( count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "x". + * No hexdigits -> This is just "x". */ result = 'x'; @@ -917,7 +864,7 @@ TclParseBackslash( count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "u". + * No hexdigits -> This is just "u". */ result = 'u'; } @@ -926,7 +873,7 @@ TclParseBackslash( count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* - * No hexadigits -> This is just "U". + * No hexdigits -> This is just "U". */ result = 'U'; } @@ -992,11 +939,10 @@ TclParseBackslash( *readPtr = count; } count = Tcl_UniCharToUtf(result, dst); -#if TCL_UTF_MAX > 3 if (!count) { + /* Special case for handling high surrogates. */ count = Tcl_UniCharToUtf(-1, dst); } -#endif return count; } @@ -1027,17 +973,12 @@ ParseComment( * command. */ { register const char *p = src; + int incomplete = parsePtr->incomplete; while (numBytes) { - char type; - int scanned; - - do { - scanned = ParseWhiteSpace(p, numBytes, - &parsePtr->incomplete, &type); - p += scanned; - numBytes -= scanned; - } while (numBytes && (*p == '\n') && (p++,numBytes--)); + int scanned = ParseAllWhiteSpace(p, numBytes, &incomplete); + p += scanned; + numBytes -= scanned; if ((numBytes == 0) || (*p != '#')) { break; @@ -1046,35 +987,28 @@ ParseComment( parsePtr->commentStart = p; } + p++; + numBytes--; while (numBytes) { + if (*p == '\n') { + p++; + numBytes--; + break; + } if (*p == '\\') { - scanned = ParseWhiteSpace(p, numBytes, &parsePtr->incomplete, - &type); - if (scanned) { - p += scanned; - numBytes -= scanned; - } else { - /* - * General backslash substitution in comments isn't part - * of the formal spec, but test parse-15.47 and history - * indicate that it has been the de facto rule. Don't - * change it now. - */ - - TclParseBackslash(p, numBytes, &scanned, NULL); - p += scanned; - numBytes -= scanned; - } - } else { p++; numBytes--; - if (p[-1] == '\n') { + if (numBytes == 0) { break; } } + incomplete = (*p == '\n'); + p++; + numBytes--; } parsePtr->commentSize = p - parsePtr->commentStart; } + parsePtr->incomplete = incomplete; return (p - src); } @@ -2253,7 +2187,7 @@ TclSubstTokens( if (result == 0) { clPos = 0; } else { - Tcl_GetStringFromObj(result, &clPos); + TclGetStringFromObj(result, &clPos); } if (numCL >= maxNumCL) { @@ -2529,7 +2463,7 @@ TclObjCommandComplete( * check. */ { int length; - const char *script = Tcl_GetStringFromObj(objPtr, &length); + const char *script = TclGetStringFromObj(objPtr, &length); return CommandComplete(script, length); } |