* generic/tclParseExpr.c: Completely new expression parser

that builds a parse tree instead of operating with deep recursion. This corrects reports of stack-blowing crashes parsing long expressions [Bug 906201] and replaces a fundamentally O(N^2) algorithm with an O(N) one [RFE 903765]. The new parser is better able to generate error messages that clearly report both the nature and context of the syntax error [Bugs 1029267, 1381715]. For now, the code for the old parser is still present and can be activated with a "#define OLD_EXPR_PARSER 1". This is for the sake of a clean implementation patch, and for ease of benchmarking. The new parser is non-recursive, so much lighter in stack consumption, but it does use more heap, so there may be cases where parsing of long expressions that succeeded with the old parser will lead to out of memory panics with the new one. There are still more improvements possible on that point, though significant progress may require changes to the Tcl_Token specifications documented for the public Tcl_Parse*() routines. ***POTENTIAL INCOMPATIBILITY*** for any callers that rely on the exact (usually terrible) error messages generated by the old parser. This includes a large number of tests in the test suite. * generic/tclInt.h: Replaced TclParseWhiteSpace() with * generic/tclParse.c: TclParseAllWhiteSpace() which is what * generic/tclParseExpr.c: all the callers really needed. Breaking whitespace runs at newlines is useful only to the command parsing function, and it can call the file scoped routine ParseWhiteSpace() to do that. * tests/expr-old.test: Removed knownBug constraints that masked * tests/expr.test: failures due to revised error messages. * tests/parseExpr.test:
author: dgp <dgp@users.sourceforge.net> 2006-07-05 05:34:42 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2006-07-05 05:34:42 (GMT)
commit: f8202fbf0e8d9c875afd03460d20b5b83c0aa10c (patch)
tree: dd2a47b4c7d1a184335dede04e8b90d2073b63cf /generic/tclParse.c
parent: 78a8ff907a6cc9f17b52a310bef7c37890273c3c (diff)
download: tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.zip
tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.gz
tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.bz2
1 files changed, 48 insertions, 16 deletions
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 5da1abb..9800537 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclParse.c,v 1.45 2005/11/02 14:51:04 dkf Exp $
+ * RCS: @(#) $Id: tclParse.c,v 1.46 2006/07/05 05:34:45 dgp Exp $
  */
 
 #include "tclInt.h"
@@ -176,6 +176,9 @@ static int		ParseComment(CONST char *src, int numBytes,
 			    Tcl_Parse *parsePtr);
 static int		ParseTokens(CONST char *src, int numBytes,
 			    int mask, int flags, Tcl_Parse *parsePtr);
+static int		ParseWhiteSpace(CONST char *src, int numBytes,
+			    Tcl_Parse *parsePtr, char *typePtr);
+
 
 /*
  *----------------------------------------------------------------------
@@ -325,7 +328,7 @@ Tcl_ParseCommand(
 	 * sequence: it should be treated just like white space.
 	 */
 
-	scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type);
+	scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type);
 	src += scanned;
 	numBytes -= scanned;
 	if (numBytes == 0) {
@@ -390,8 +393,7 @@ Tcl_ParseCommand(
 		    )
 		/* Is the prefix */
 		&& (numBytes > 0)
-		&& (TclParseWhiteSpace(termPtr, numBytes, parsePtr,
-			    &type) == 0)
+		&& (ParseWhiteSpace(termPtr, numBytes, parsePtr, &type) == 0)
 		&& (type != TYPE_COMMAND_END)
 		/* Non-whitespace follows */
 		) {
@@ -435,7 +437,7 @@ Tcl_ParseCommand(
 	 * word), and (b) check for the end of the command.
 	 */
 
-	scanned = TclParseWhiteSpace(src, numBytes, parsePtr, &type);
+	scanned = ParseWhiteSpace(src, numBytes, parsePtr, &type);
 	if (scanned) {
 	    src += scanned;
 	    numBytes -= scanned;
@@ -480,10 +482,10 @@ Tcl_ParseCommand(
 /*
  *----------------------------------------------------------------------
  *
- * TclParseWhiteSpace --
+ * ParseWhiteSpace --
  *
- *	Scans up to numBytes bytes starting at src, consuming white space as
- *	defined by Tcl's parsing rules.
+ *	Scans up to numBytes bytes starting at src, consuming white space
+ *	between words as defined by Tcl's parsing rules.
  *
  * Results:
  *	Returns the number of bytes recognized as white space. Records at
@@ -497,8 +499,8 @@ Tcl_ParseCommand(
  *----------------------------------------------------------------------
  */
 
-int
-TclParseWhiteSpace(
+static int
+ParseWhiteSpace(
     CONST char *src,		/* First character to parse. */
     register int numBytes,	/* Max number of bytes to scan. */
     Tcl_Parse *parsePtr,	/* Information about parse in progress.
@@ -541,6 +543,38 @@ TclParseWhiteSpace(
 /*
  *----------------------------------------------------------------------
  *
+ * TclParseAllWhiteSpace --
+ *
+ *	Scans up to numBytes bytes starting at src, consuming all white space
+ *	including the command-terminating newline characters.
+ *
+ * Results:
+ *	Returns the number of bytes recognized as white space. 
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclParseAllWhiteSpace(
+    CONST char *src,		/* First character to parse. */
+    int numBytes)		/* Max number of byes to scan */
+{
+    Tcl_Parse dummy;	/* Since we know ParseWhiteSpace() generates
+			 * no tokens, there's no need for a call to
+			 * Tcl_FreeParse() in this routine */
+    char type;
+    CONST char *p = src;
+    do {
+	int scanned = ParseWhiteSpace(p, numBytes, &dummy, &type);
+	p += scanned;
+	numBytes -= scanned;
+    } while (numBytes && (*p == '\n') && (p++, --numBytes));
+    return (p-src);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * TclParseHex --
  *
  *	Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing
@@ -801,11 +835,9 @@ ParseComment(
 	char type;
 	int scanned;
 
-	do {
-	    scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type);
-	    p += scanned;
-	    numBytes -= scanned;
-	} while (numBytes && (*p == '\n') && (p++,numBytes--));
+	scanned = TclParseAllWhiteSpace(p, numBytes);
+	p += scanned;
+	numBytes -= scanned;
 
 	if ((numBytes == 0) || (*p != '#')) {
 	    break;
@@ -816,7 +848,7 @@ ParseComment(
 
 	while (numBytes) {
 	    if (*p == '\\') {
-		scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type);
+		scanned = ParseWhiteSpace(p, numBytes, parsePtr, &type);
 		if (scanned) {
 		    p += scanned;
 		    numBytes -= scanned;
author	dgp <dgp@users.sourceforge.net>	2006-07-05 05:34:42 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2006-07-05 05:34:42 (GMT)
commit	f8202fbf0e8d9c875afd03460d20b5b83c0aa10c (patch)
tree	dd2a47b4c7d1a184335dede04e8b90d2073b63cf /generic/tclParse.c
parent	78a8ff907a6cc9f17b52a310bef7c37890273c3c (diff)
download	tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.zip tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.gz tcl-f8202fbf0e8d9c875afd03460d20b5b83c0aa10c.tar.bz2