diff options
author | dgp <dgp@users.sourceforge.net> | 2006-07-05 05:34:42 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2006-07-05 05:34:42 (GMT) |
commit | f8202fbf0e8d9c875afd03460d20b5b83c0aa10c (patch) | |
tree | dd2a47b4c7d1a184335dede04e8b90d2073b63cf /generic/tclParse.c | |
parent | 78a8ff907a6cc9f17b52a310bef7c37890273c3c (diff) | |
download | tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.zip tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.gz tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.bz2 |
* generic/tclParseExpr.c: Completely new expression parser
that builds a parse tree instead of operating with deep recursion.
This corrects reports of stack-blowing crashes parsing long
expressions [Bug 906201] and replaces a fundamentally O(N^2)
algorithm with an O(N) one [RFE 903765]. The new parser is better
able to generate error messages that clearly report both the nature
and context of the syntax error [Bugs 1029267, 1381715]. For now,
the code for the old parser is still present and can be activated
with a "#define OLD_EXPR_PARSER 1". This is for the sake of a clean
implementation patch, and for ease of benchmarking. The new parser
is non-recursive, so much lighter in stack consumption, but it
does use more heap, so there may be cases where parsing of long
expressions that succeeded with the old parser will lead to out
of memory panics with the new one. There are still more improvements
possible on that point, though significant progress may require
changes to the Tcl_Token specifications documented for the public
Tcl_Parse*() routines.
***POTENTIAL INCOMPATIBILITY*** for any callers that rely on the
exact (usually terrible) error messages generated by the old parser.
This includes a large number of tests in the test suite.
* generic/tclInt.h: Replaced TclParseWhiteSpace() with
* generic/tclParse.c: TclParseAllWhiteSpace() which is what
* generic/tclParseExpr.c: all the callers really needed.
Breaking whitespace runs at newlines is useful only to the command
parsing function, and it can call the file scoped routine
ParseWhiteSpace() to do that.
* tests/expr-old.test: Removed knownBug constraints that masked
* tests/expr.test: failures due to revised error messages.
* tests/parseExpr.test:
Diffstat (limited to 'generic/tclParse.c')
-rw-r--r-- | generic/tclParse.c | 64 |
1 files changed, 48 insertions, 16 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c index 5da1abb..9800537 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParse.c,v 1.45 2005/11/02 14:51:04 dkf Exp $ + * RCS: @(#) $Id: tclParse.c,v 1.46 2006/07/05 05:34:45 dgp Exp $ */ #include "tclInt.h" @@ -176,6 +176,9 @@ static int ParseComment(CONST char *src, int numBytes, Tcl_Parse *parsePtr); static int ParseTokens(CONST char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); +static int ParseWhiteSpace(CONST char *src, int numBytes, + Tcl_Parse *parsePtr, char *typePtr); + /* *---------------------------------------------------------------------- @@ -325,7 +328,7 @@ Tcl_ParseCommand( * sequence: it should be treated just like white space. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); src += scanned; numBytes -= scanned; if (numBytes == 0) { @@ -390,8 +393,7 @@ Tcl_ParseCommand( ) /* Is the prefix */ && (numBytes > 0) - && (TclParseWhiteSpace(termPtr, numBytes, parsePtr, - &type) == 0) + && (ParseWhiteSpace(termPtr, numBytes, parsePtr, &type) == 0) && (type != TYPE_COMMAND_END) /* Non-whitespace follows */ ) { @@ -435,7 +437,7 @@ Tcl_ParseCommand( * word), and (b) check for the end of the command. */ - scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type); if (scanned) { src += scanned; numBytes -= scanned; @@ -480,10 +482,10 @@ Tcl_ParseCommand( /* *---------------------------------------------------------------------- * - * TclParseWhiteSpace -- + * ParseWhiteSpace -- * - * Scans up to numBytes bytes starting at src, consuming white space as - * defined by Tcl's parsing rules. + * Scans up to numBytes bytes starting at src, consuming white space + * between words as defined by Tcl's parsing rules. * * Results: * Returns the number of bytes recognized as white space. Records at @@ -497,8 +499,8 @@ Tcl_ParseCommand( *---------------------------------------------------------------------- */ -int -TclParseWhiteSpace( +static int +ParseWhiteSpace( CONST char *src, /* First character to parse. */ register int numBytes, /* Max number of bytes to scan. */ Tcl_Parse *parsePtr, /* Information about parse in progress. @@ -541,6 +543,38 @@ TclParseWhiteSpace( /* *---------------------------------------------------------------------- * + * TclParseAllWhiteSpace -- + * + * Scans up to numBytes bytes starting at src, consuming all white space + * including the command-terminating newline characters. + * + * Results: + * Returns the number of bytes recognized as white space. + * + *---------------------------------------------------------------------- + */ + +int +TclParseAllWhiteSpace( + CONST char *src, /* First character to parse. */ + int numBytes) /* Max number of byes to scan */ +{ + Tcl_Parse dummy; /* Since we know ParseWhiteSpace() generates + * no tokens, there's no need for a call to + * Tcl_FreeParse() in this routine */ + char type; + CONST char *p = src; + do { + int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type); + p += scanned; + numBytes -= scanned; + } while (numBytes && (*p == '\n') && (p++, --numBytes)); + return (p-src); +} + +/* + *---------------------------------------------------------------------- + * * TclParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing @@ -801,11 +835,9 @@ ParseComment( char type; int scanned; - do { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); - p += scanned; - numBytes -= scanned; - } while (numBytes && (*p == '\n') && (p++,numBytes--)); + scanned = TclParseAllWhiteSpace(p, numBytes); + p += scanned; + numBytes -= scanned; if ((numBytes == 0) || (*p != '#')) { break; @@ -816,7 +848,7 @@ ParseComment( while (numBytes) { if (*p == '\\') { - scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); + scanned = ParseWhiteSpace(p, numBytes, parsePtr, &type); if (scanned) { p += scanned; numBytes -= scanned; |