1 files changed, 1334 insertions, 0 deletions
diff --git a/generic/Lscanner.l b/generic/Lscanner.l
new file mode 100644
index 0000000..126b019
--- /dev/null
+++ b/generic/Lscanner.l
@@ -0,0 +1,1334 @@
+%option	noyywrap
+%option noyy_top_state
+%option stack
+%option noinput
+%x re_delim
+%x re_modifier
+%x re_arg_split
+%x re_arg_case
+%x glob_re
+%x subst_re
+%x comment
+%x str_double
+%x str_single
+%x str_backtick
+%x interpol
+%x here_doc_interp
+%x here_doc_nointerp
+%x eat_through_eol
+%x lhtml
+%x lhtml_expr_start
+ID	([a-zA-Z_]|::)([0-9a-zA-Z_]|::)*
+HEX	[a-fA-F0-9]
+%{
+/*
+ * Copyright (c) 2006-2008 BitMover, Inc.
+ */
+#include <string.h>
+#define	_PWD_H			// Some solaris9 conflict, we don't need pwd.h
+#include "tclInt.h"
+#include "Lcompile.h"
+#include "Lgrammar.h"
+#include "tommath.h"
+
+private void	extract_re_delims(char c);
+private int	include_pop();
+private int	include_push(Tcl_Channel chan, char *name);
+private Tcl_Channel include_search(char *file, char **path, int cwdOnly);
+private Tcl_Channel include_try(Tcl_Obj *fileObj, int *found);
+private void	inject(char *s);
+private void	interpol_lbrace();
+private void	interpol_pop();
+private int	interpol_push();
+private int	interpol_rbrace();
+private void	put_back(char c);
+private void	tally_newlines(char *s, int len, int tally);
+
+// Max nesting depth of string interpolations.
+#define INTERPOL_STACK_SZ	10
+
+// Stack for tracking include() statements.
+#define INCLUDE_STACK_SZ	10
+typedef struct {
+	char	*name;
+	char	*dir;
+	int	line;
+	YY_BUFFER_STATE	buf;
+} Include;
+
+private char	re_start_delim;	// delimiters for m|regexp| form
+private char	re_end_delim;
+private Tcl_Obj	*str;		// string collection buffer
+private int	str_beg;	// source offset of string
+private char	*here_delim = NULL;
+private char	*here_pfx = NULL;
+private int	include_top;
+private Include	include_stk[INCLUDE_STACK_SZ+1];
+private Tcl_HashTable *include_table = NULL;
+private	int	interpol_top = -1;
+private	int	interpol_stk[INTERPOL_STACK_SZ+1];
+private int	in_lhtml = 0;	// Lhtml mode
+
+#define STRBUF_START(beg)			\
+	do {					\
+		str = Tcl_NewObj();		\
+		Tcl_IncrRefCount(str);		\
+		str_beg = (beg);		\
+	} while (0)
+
+
+#define STRBUF_STRING()		Tcl_GetString(str)
+
+#define STRBUF_STARTED()	(str != NULL)
+
+#define STRBUF_ADD(s, len)	Tcl_AppendToObj(str, s, len)
+
+#define STRBUF_STOP(e)				\
+	do {					\
+		Tcl_DecrRefCount(str);		\
+		str = NULL;			\
+		L_lloc.beg = str_beg;		\
+		L_lloc.end = (e);		\
+	} while (0)
+
+/*
+ * Keep track of the current offset in the input string.
+ * YY_USER_ACTION is run before each action.  Note that some actions
+ * further modify L_lloc.
+ */
+
+#define YY_USER_ACTION	yy_user_action();
+
+private void
+yy_user_action()
+{
+	L->prev_token_off = L->token_off;
+	L->token_off     += L->prev_token_len;
+	L->prev_token_len = yyleng;
+
+	L_lloc.beg = L->token_off;
+	L_lloc.end = L->token_off + yyleng;
+
+	tally_newlines(yytext, yyleng, 1);
+	L_lloc.line = L->line;
+
+	L_lloc.file = L->file;
+
+	/*
+	 * Build up in L->script the text that the scanner scans.
+	 * The compiler later passes this on to tcl as the script
+	 * source.  This allows include() stmts to be handled properly.
+	 */
+	Tcl_AppendToObj(L->script, yytext, yyleng);
+	L->script_len += yyleng;
+}
+
+/*
+ * Un-do the effects of the YY_USER_ACTION on the token offset
+ * tracking.  This is useful in include() processing where the
+ * characters in the '#include "file"' must be ignored.
+ */
+private void
+undo_yy_user_action()
+{
+	L->prev_token_len = L->token_off - L->prev_token_off;
+	L->token_off      = L->prev_token_off;
+
+	L_lloc.beg = L->prev_token_off;
+	L_lloc.end = L->prev_token_off + L->prev_token_len;
+
+	tally_newlines(yytext, yyleng, -1);
+	L_lloc.line = L->line;
+
+	L->script_len -= yyleng;
+	Tcl_SetObjLength(L->script, L->script_len);
+}
+
+/*
+ * Inject the given string into the L script text, but do not give it
+ * to the scanner.  This is useful for inserting #line directives (for
+ * #include's) which need to remain in the script so Tcl can see them
+ * but which aren't parsed.
+ */
+private void
+inject(char *s)
+{
+	int	len = strlen(s);
+
+	L->prev_token_len += len;
+
+	Tcl_AppendToObj(L->script, s, len);
+	L->script_len += len;
+}
+
+/*
+ * Count the newlines in a string and add the number to L->line.  Pass
+ * in tally == 1 to count them and tally == -1 to undo it.
+ */
+private void
+tally_newlines(char *s, int len, int tally)
+{
+	char	*end, *p;
+
+	for (p = s, end = p + len; p < end; p++) {
+		if (*p == '\n') {
+			L->line += tally;
+		} else if ((*p == '\r') && ((p+1) < end) && (*(p+1) != '\n')) {
+			/* Mac line endings. */
+			L->line += tally;
+		}
+	}
+}
+
+private Tcl_Channel
+include_try(Tcl_Obj *fileObj, int *found)
+{
+	int		new;
+	Tcl_Channel	chan;
+	char		*file = Tcl_GetString(fileObj);
+	char		*path;
+	Tcl_Obj		*pathObj;
+
+	/*
+	 * See if the normalized path has been included before.  If the path
+	 * isn't absolute, consider it to be relative to where L->file is.
+	 */
+	if (Tcl_FSGetPathType(fileObj) == TCL_PATH_ABSOLUTE) {
+		if ((pathObj = Tcl_FSGetNormalizedPath(NULL, fileObj)) == NULL){
+			L_err("unable to normalize include file %s", file);
+			return (NULL);
+		}
+	} else {
+		pathObj = Tcl_ObjPrintf("%s/%s", L->dir, file);
+	}
+	Tcl_IncrRefCount(pathObj);
+
+	path = Tcl_GetString(pathObj);
+	Tcl_CreateHashEntry(include_table, path, &new);
+	if (new) {
+		chan = Tcl_FSOpenFileChannel(L->interp, pathObj, "r", 0666);
+		*found = (chan != NULL);
+		return (chan);
+	} else {
+		*found = 1;  // already included
+		return (NULL);
+	}
+	Tcl_DecrRefCount(pathObj);
+}
+
+/*
+ * Search for an include file.  If the path is absolute, use it.
+ * Else, for #include <file> (cwdOnly == 0) try
+ *    $BIN/include  (where BIN is where the running tclsh lives)
+ *    /usr/local/include/L
+ *    /usr/include/L
+ * For #include "file" (cwdOnly == 1) look only in the directory
+ * where the script doing the #include resides.
+ */
+private Tcl_Channel
+include_search(char *file, char **path, int cwdOnly)
+{
+	int		found, len;
+	Tcl_Channel	chan;
+	Tcl_Obj		*binObj = NULL;
+	Tcl_Obj		*fileObj;
+
+	unless (include_table) {
+		include_table = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable));
+		Tcl_InitHashTable(include_table, TCL_STRING_KEYS);
+	}
+
+	fileObj = Tcl_NewStringObj(file, -1);
+	Tcl_IncrRefCount(fileObj);
+	if ((Tcl_FSGetPathType(fileObj) == TCL_PATH_ABSOLUTE) || cwdOnly) {
+		chan = include_try(fileObj, &found);
+	} else {
+		/* Try $BIN/include */
+		binObj = TclGetObjNameOfExecutable();
+		Tcl_GetStringFromObj(binObj, &len);
+		if (len > 0) {
+			Tcl_DecrRefCount(fileObj);
+			/* TclPathPart bumps the ref count. */
+			fileObj = TclPathPart(L->interp, binObj,
+			    TCL_PATH_DIRNAME);
+			Tcl_AppendPrintfToObj(fileObj, "/include/%s", file);
+			chan = include_try(fileObj, &found);
+			if (found) goto done;
+		}
+		/* Try /usr/local/include/L */
+		Tcl_DecrRefCount(fileObj);
+		fileObj = Tcl_ObjPrintf("/usr/local/include/L/%s", file);
+		Tcl_IncrRefCount(fileObj);
+		chan = include_try(fileObj, &found);
+		if (found) goto done;
+		/* Try /usr/include/L */
+		Tcl_DecrRefCount(fileObj);
+		fileObj = Tcl_ObjPrintf("/usr/include/L/%s", file);
+		Tcl_IncrRefCount(fileObj);
+		chan = include_try(fileObj, &found);
+	}
+ done:
+	unless (found) {
+		L_err("cannot find include file %s", file);
+	}
+	if (path) *path = ckstrdup(Tcl_GetString(fileObj));
+	Tcl_DecrRefCount(fileObj);
+	return (chan);
+}
+
+private int
+include_push(Tcl_Channel chan, char *name)
+{
+	YY_BUFFER_STATE buf;
+	Tcl_Obj		*objPtr;
+	char		*dec = NULL, *script;
+	int		len, ret;
+
+	/* Read the file into memory. */
+	objPtr = Tcl_NewObj();
+	Tcl_IncrRefCount(objPtr);
+	if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) {
+		Tcl_Close(L->interp, chan);
+		L_err("error reading include file %s", name);
+		return (0);
+	}
+	Tcl_Close(L->interp, chan);
+
+	/* If it is encrypted, decrypt it. */
+	script = Tcl_GetStringFromObj(objPtr, &len);
+
+	/* Create a new flex buffer with the file contents. */
+	if (include_top >= INCLUDE_STACK_SZ) {
+		L_err("include file nesting too deep -- aborting");
+		while (include_pop()) ;
+		ret = 0;
+	} else {
+		++include_top;
+		include_stk[include_top].name = L->file;
+		include_stk[include_top].dir  = L->dir;
+		include_stk[include_top].line = L->line;
+		include_stk[include_top].buf = YY_CURRENT_BUFFER;
+		buf = yy_scan_bytes(script, len);
+		L->file = name;
+		L->dir  = L_dirname(L->file);
+		L->line = 1;
+		inject("#line 1\n");
+		ret = 1;
+	}
+	Tcl_DecrRefCount(objPtr);
+	if (dec) ckfree(dec);
+	return (ret);
+}
+
+private int
+include_pop()
+{
+	char	*s;
+
+	if (include_top >= 0) {
+		L->file = include_stk[include_top].name;
+		L->dir  = include_stk[include_top].dir;
+		L->line = include_stk[include_top].line;
+		yy_delete_buffer(YY_CURRENT_BUFFER);
+		yy_switch_to_buffer(include_stk[include_top].buf);
+		--include_top;
+		s = cksprintf("#line %d\n", L->line);
+		inject(s);
+		ckfree(s);
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * Given a decimal, hex, or octal integer constant of arbitrary
+ * precision, return a canonical string representation.  This is done
+ * by converting it to a bignum and then taking its string rep.
+ */
+private char *
+canonical_num(char *num)
+{
+	char	*ret;
+	Tcl_Obj	*obj;
+	mp_int	big;
+
+	obj = Tcl_NewStringObj(num, -1);
+	Tcl_IncrRefCount(obj);
+	Tcl_TakeBignumFromObj(NULL, obj, &big);
+	Tcl_SetBignumObj(obj, &big);
+	ret = ckstrdup(Tcl_GetString(obj));
+	Tcl_DecrRefCount(obj);
+	return (ret);
+}
+
+/*
+ * Work around a Windows problem where our getopt type conficts
+ * with the system's.
+ */
+#undef getopt
+#undef optarg
+#undef optind
+
+%}
+%%
+<INITIAL,interpol>{
+	"("		return T_LPAREN;
+	")"		return T_RPAREN;
+	"{"		interpol_lbrace(); return T_LBRACE;
+	"["		return T_LBRACKET;
+	"]"		return T_RBRACKET;
+	","		return T_COMMA;
+	"!"		return T_BANG;
+	"+"		return T_PLUS;
+	"-"		return T_MINUS;
+	"*"		return T_STAR;
+	"/"		return T_SLASH;
+	"%"		return T_PERC;
+	"+="		return T_EQPLUS;
+	"-="		return T_EQMINUS;
+	"*="		return T_EQSTAR;
+	"/="		return T_EQSLASH;
+	"%="		return T_EQPERC;
+	"&="		return T_EQBITAND;
+	"|="		return T_EQBITOR;
+	"^="		return T_EQBITXOR;
+	"<<="		return T_EQLSHIFT;
+	">>="		return T_EQRSHIFT;
+	".="		return T_EQDOT;
+	"++"		return T_PLUSPLUS;
+	"--"		return T_MINUSMINUS;
+	"&&"		return T_ANDAND;
+	"||"		return T_OROR;
+	"&"		return T_BITAND;
+	"|"		return T_BITOR;
+	"^"		return T_BITXOR;
+	"~"		return T_BITNOT;
+	"<<"		return T_LSHIFT;
+	">>"		return T_RSHIFT;
+	"="		return T_EQUALS;
+	";"		return T_SEMI;
+	"."		return T_DOT;
+	[ \t\n\r]+"."[ \t\n\r]+		return T_STRCAT;
+	".."		return T_DOTDOT;
+	"..."		return T_ELLIPSIS;
+	"class"		return T_CLASS;
+	"extern"	return T_EXTERN;
+	"return"	return T_RETURN;
+	"void"		return T_VOID;
+	"string"	return T_STRING;
+	"widget"	return T_WIDGET;
+	"int"		return T_INT;
+	"float"		return T_FLOAT;
+	"poly"		return T_POLY;
+	"split"		return T_SPLIT;
+	"if"		return T_IF;
+	"else"		return T_ELSE;
+	"unless"	return T_UNLESS;
+	"while"		return T_WHILE;
+	"do"		return T_DO;
+	"for"		return T_FOR;
+	"struct"	return T_STRUCT;
+	"typedef"	return T_TYPEDEF;
+	"defined"	return T_DEFINED;
+	"foreach"	return T_FOREACH;
+	"break"		return T_BREAK;
+	"continue"	return T_CONTINUE;
+	"instance"	return T_INSTANCE;
+	"private"	return T_PRIVATE;
+	"public"	return T_PUBLIC;
+	"constructor"	return T_CONSTRUCTOR;
+	"destructor"	return T_DESTRUCTOR;
+	"expand"	return T_EXPAND;
+	"_argused"	return T_ARGUSED;
+	"_attribute"	return T_ATTRIBUTE;
+	"_attributes"	return T_ATTRIBUTE;
+	"_optional"	return T_OPTIONAL;
+	"_mustbetype"	return T_MUSTBETYPE;
+	"goto"		return T_GOTO;
+	"switch"	return T_SWITCH;
+	"case"		return T_CASE;
+	"default"	return T_DEFAULT;
+	"try"		return T_TRY;
+	"=>"		return T_ARROW;
+	"eq"		return T_EQ;
+	"ne"		return T_NE;
+	"lt"		return T_LT;
+	"le"		return T_LE;
+	"gt"		return T_GT;
+	"ge"		return T_GE;
+	"=="		return T_EQUALEQUAL;
+	"!="		return T_NOTEQUAL;
+	">"		return T_GREATER;
+	">="		return T_GREATEREQ;
+	"<"		return T_LESSTHAN;
+	"<="		return T_LESSTHANEQ;
+	"->"		return T_POINTS;
+	":"		return T_COLON;
+	"?"		return T_QUESTION;
+	"?>"		{
+				/*
+				 * ?> marks the end of a script or expr
+				 * inside of an lhtml document but is a
+				 * syntax error otherwise.
+				 */
+				unless (in_lhtml) {
+					undo_yy_user_action();
+					REJECT;
+				}
+				yy_pop_state();
+				STRBUF_START(L_lloc.end);
+				if (YYSTATE == lhtml_expr_start) {
+					yy_pop_state();  // pop back to lhtml
+					ASSERT(YYSTATE == lhtml);
+					return T_LHTML_EXPR_END;
+				}
+	}
+	"and"		{
+				L_err("'and','or','xor','not' are "
+				      "unimplemented reserved words");
+				return T_ANDAND;
+	}
+	"not"		{
+				L_err("'and','or','xor','not' are "
+				      "unimplemented reserved words");
+				return T_BANG;
+	}
+	"or"		{
+				L_err("'and','or','xor','not' are "
+				      "unimplemented reserved words");
+				return T_OROR;
+	}
+	"xor"		{
+				L_err("'and','or','xor','not' are "
+				      "unimplemented reserved words");
+				return T_BITXOR;
+	}
+	{ID}		{
+				Type *t = L_typedef_lookup(yytext);
+				if (t) {
+					L_lval.Typename.s = ckstrdup(yytext);
+					L_lval.Typename.t = t;
+					return T_TYPE;
+				} else {
+					L_lval.s = ckstrdup(yytext);
+					return T_ID;
+				}
+			}
+	{ID}:		{
+				/*
+				 * Push back the : and return a T_ID
+				 * unless it's "default".  The grammar relies
+				 * on this to avoid a nasty conflict.
+				 */
+				put_back(':');
+				if (!strncmp(yytext, "default", 7)) {
+					return T_DEFAULT;
+				}
+				L_lval.s = ckstrdup(yytext);
+				L_lval.s[yyleng-1] = 0;
+				return T_ID;
+			}
+	([A-Z]|::)([0-9a-zA-Z]|::)*_\*	{
+				L_lval.s = ckstrdup(yytext);
+				return T_PATTERN;
+			}
+	$[0-9]+		{
+				/* Regular expression submatches */
+				L_lval.s = ckstrdup(yytext);
+				return T_ID;
+			}
+	[0-9]+		{
+				/*
+				 * Skip any leading 0's which would
+				 * make it look like octal to Tcl.
+				 */
+				size_t	z = strspn(yytext, "0");
+				if (z == yyleng) z = 0;  // number is all 0's
+				L_lval.s = canonical_num(yytext+z);
+				return T_INT_LITERAL;
+			}
+	0o[0-7]+	{
+				/*
+				 * Create a leading 0 so it looks like
+				 * octal to Tcl.
+				 */
+				yytext[1] = '0';
+				L_lval.s = canonical_num(yytext+1);
+				return T_INT_LITERAL;
+			}
+	0x[0-9a-fA-F]+	{
+				L_lval.s = canonical_num(yytext);
+				return T_INT_LITERAL;
+			}
+	[0-9]*\.[0-9]+	{
+				L_lval.s = ckstrdup(yytext);
+				return T_FLOAT_LITERAL;
+			}
+	^#line[ \t]+[0-9]+\n {
+				int	line = strtoul(yytext+5, NULL, 10);
+
+				if (line <= 0) {
+					--L->line;  // since \n already scanned
+					L_err("malformed #line");
+					++L->line;
+				} else {
+					L->line = line;
+				}
+			}
+	^#line[ \t]+[0-9]+[ \t]+\"[^\"\n]*\"\n {
+				int	line  = strtoul(yytext+5, NULL, 10);
+				char	*beg  = strchr(yytext, '"') + 1;
+				char	*end  = strrchr(yytext, '"');
+				char	*name = ckstrndup(beg, end-beg);
+
+				if (line <= 0) {
+					--L->line;  // since \n already scanned
+					L_err("malformed #line");
+					++L->line;
+				} else {
+					L->file = name;
+					L->line = line;
+				}
+			}
+	^#line.*\n	{
+				--L->line;  // since \n already scanned
+				L_err("malformed #line");
+				++L->line;
+			}
+	^#include[ \t]*\"[^\"\n]+\"	{
+				char	*beg  = strchr(yytext, '"') + 1;
+				char	*end  = strrchr(yytext, '"');
+				char	*name = ckstrndup(beg, end-beg);
+				Tcl_Channel chan;
+
+				chan = include_search(name, NULL, 1);
+
+				undo_yy_user_action();
+				if (chan && !include_push(chan, name)) {
+					/* Bail if includes nest too deeply. */
+					yyterminate();
+				}
+			}
+	^#include[ \t]*<[^>\n]+>	{
+				char	*beg  = strchr(yytext, '<') + 1;
+				char	*end  = strrchr(yytext, '>');
+				char	*name = ckstrndup(beg, end-beg);
+				char	*path = NULL;
+				Tcl_Channel chan;
+
+				chan = include_search(name, &path, 0);
+				ckfree(name);
+
+				undo_yy_user_action();
+				if (chan && !include_push(chan, path)) {
+					/* Bail if includes nest too deeply. */
+					yyterminate();
+				}
+			}
+	^#include	{
+				L_err("malformed #include");
+				yy_push_state(eat_through_eol);
+			}
+	^#pragma[ \t]+		return T_PRAGMA;
+	^#.*("\r"|"\n"|"\r\n")	{
+				/*
+				 * Rather than using a start condition
+				 * to separate out all the ^# patterns
+				 * that don't end in \n, this is
+				 * simpler.  If it's not a comment,
+				 * REJECT it so that flex then takes
+				 * the second best rule (those above).
+				 */
+				if (!strncmp(yytext, "#pragma ",  8) ||
+				    !strncmp(yytext, "#pragma\t", 8)) {
+					undo_yy_user_action();
+					REJECT;
+				} else if (!strncmp(yytext, "#include", 8)) {
+					undo_yy_user_action();
+					REJECT;
+				} else unless (L->line == 2) {
+					--L->line;  // since \n already scanned
+					L_err("# comment valid only on line 1");
+					++L->line;
+				}
+			}
+	[ \t]+#.*("\r"|"\n"|"\r\n") {
+				--L->line;  // since \n already scanned
+				unless (L->line == 1) {
+					L_err("# comment valid only on line 1");
+				} else {
+					L_err("# comment must start at "
+					      "first column");
+				}
+				++L->line;
+			}
+	"//".*("\r"|"\n"|"\r\n")
+	[ \t]+
+	\n|\r|\f
+	\"		yy_push_state(str_double); STRBUF_START(L->token_off);
+	\'		yy_push_state(str_single); STRBUF_START(L->token_off);
+	\`		yy_push_state(str_backtick); STRBUF_START(L->token_off);
+	"/*"		yy_push_state(comment);
+	[!=]~[ \t\r\n]*"m".	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 2);	// next token starts at the "m"
+		extract_re_delims(yytext[yyleng-1]);
+		L_lloc.end = L_lloc.beg + 2;	// this token spans the "=~"
+		return ((yytext[0] == '=') ? T_EQTWID : T_BANGTWID);
+	}
+	/* if / is used to delimit the regexp, the m can be omitted */
+	[!=]~[ \t\r\n]*"/"	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 1);	// next token starts at the "/"
+		extract_re_delims('/');
+		L_lloc.end = L_lloc.beg + 2;	// this token spans the "=~"
+		return ((yytext[0] == '=') ? T_EQTWID : T_BANGTWID);
+	}
+	/* a substitution pattern */
+	"=~"[ \t\r\n]*"s".	{
+		yy_push_state(re_modifier);
+		yy_push_state(subst_re);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 2);	// next token starts at the "s"
+		extract_re_delims(yytext[yyleng-1]);
+		L_lloc.end = L_lloc.beg + 2;	// this token spans the "=~"
+		return T_EQTWID;
+	}
+	/* here document (interpolated), valid only on rhs of an assignment */
+	=[ \t\r\n]*<<[a-zA-Z_][a-zA-Z_0-9]*\n		{
+		char	*p, *q;
+
+		if (here_delim) {
+			L_err("nested here documents illegal");
+		}
+		p = strchr(yytext, '<') + 2;  // the < is guaranteed to exist
+		for (q = p; (q > yytext) && (*q != '\n'); --q) ;
+		if ((q > yytext) && (*q == '\n')) {
+			// \n then <<; the in-between whitespace is the here_pfx
+			here_pfx = ckstrndup(q+1, p-q-3);
+		} else {
+			// non-indented here document
+			here_pfx = ckstrdup("");
+		}
+		here_delim = ckstrndup(p, yyleng - (p-yytext) - 1);
+		STRBUF_START(L->token_off);
+		L_lloc.end = L_lloc.beg + 1;
+		yy_push_state(here_doc_interp);
+		return T_EQUALS;
+	}
+	/* here document (uninterpolated), valid only on rhs of an assignment */
+	=[ \t\r\n]*<<\'[a-zA-Z_][a-zA-Z_0-9]*\'\n		{
+		char	*p, *q;
+
+		if (here_delim) {
+			L_err("nested here documents illegal");
+		}
+		p = strchr(yytext, '<') + 2;  // the < is guaranteed to exist
+		for (q = p; (q > yytext) && (*q != '\n'); --q) ;
+		if ((q > yytext) && (*q == '\n')) {
+			// \n then <<; the in-between whitespace is the here_pfx
+			here_pfx = ckstrndup(q+1, p-q-3);
+		} else {
+			// non-indented here document
+			here_pfx = ckstrdup("");
+		}
+		here_delim = ckstrndup(p+1, yyleng - (p-yytext) - 3);
+		STRBUF_START(L->token_off);
+		L_lloc.end = L_lloc.beg + 1;
+		yy_push_state(here_doc_nointerp);
+		return T_EQUALS;
+	}
+	/* illegal here documents (bad stuff before or after the delim) */
+	=[ \t\r\n]*<<-[a-zA-Z_][a-zA-Z_0-9]*			|
+	=[ \t\r\n]*<<-\'[a-zA-Z_][a-zA-Z_0-9]*\'		{
+		L_synerr("<<- unsupported, use =\\n\\t<<END to strip one "
+			 "leading tab");
+	}
+	=[ \t\r\n]*<<[a-zA-Z_][a-zA-Z_0-9]*[^\n]		{
+		L_synerr("illegal characters after here-document delimeter");
+	}
+	=[ \t\r\n]*<<[^a-zA-Z_][a-zA-Z_][a-zA-Z_0-9]*		{
+		L_synerr("illegal characters before here-document delimeter");
+	}
+	=[ \t\r\n]*<<\'[a-zA-Z_][a-zA-Z_0-9]*\'[^\n]		{
+		L_synerr("illegal characters after here-document delimeter");
+	}
+	=[ \t\r\n]*<<\'[^a-zA-Z_][a-zA-Z_][a-zA-Z_0-9]*\'	{
+		L_synerr("illegal characters before here-document delimeter");
+	}
+}
+
+<lhtml>{
+	/*
+	 * The compiler prepends a #line directive to Lhtml source.
+	 * This communicates the correct line number to the Tcl
+	 * code that prints run-time error messages.
+	 */
+	^#line[ \t]+[0-9]+\n {
+		int	line = strtoul(yytext+5, NULL, 10);
+
+		if (line <= 0) {
+			--L->line;  // since \n already scanned
+			L_err("malformed #line");
+			++L->line;
+		} else {
+			L->line = line;
+		}
+	}
+	"<?"=?	{
+		L_lval.s = ckstrdup(STRBUF_STRING());
+		STRBUF_STOP(L_lloc.beg);
+		if (yyleng == 2) {
+			yy_push_state(INITIAL);
+		} else {
+			yy_push_state(lhtml_expr_start);
+		}
+		return T_HTML;
+	}
+	.|\n	STRBUF_ADD(yytext, yyleng);
+	<<EOF>>	{
+		unless (STRBUF_STARTED()) yyterminate();
+		L_lval.s = ckstrdup(STRBUF_STRING());
+		STRBUF_STOP(L_lloc.beg);
+		return T_HTML;
+	}
+}
+
+<lhtml_expr_start>{
+	/*
+	 * This start condition is here only so the rule for ?> can
+	 * know whether we previously scanned <? or <?=.
+	 */
+	.|\n	{
+		unput(yytext[0]);
+		undo_yy_user_action();
+		yy_push_state(INITIAL);
+		return T_LHTML_EXPR_START;
+	}
+}
+
+<re_arg_split>{
+	/*
+	 * A regexp in the context of the first arg to split().  If
+	 * it's not an RE, pop the start-condition stack and push it
+	 * back, so we can continue as normal.
+	 */
+	[ \t\r\n]*
+	/* / starts an RE */
+	"/"	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 1);	// next token starts at the "/"
+		extract_re_delims('/');
+	}
+	/*
+	 * m<punctuation> starts an RE, except for "m)" so that
+	 * "split(m)" works.
+	 */
+	"m"[^a-zA-Z() \t\r\n]	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 1);	// next token starts at the delim
+		extract_re_delims(yytext[yyleng-1]);
+	}
+	/* nothing else starts an RE */
+	.	{
+		unput(yytext[0]);
+		undo_yy_user_action();
+		yy_pop_state();
+	}
+}
+
+<re_arg_case>{
+	/*
+	 * A regexp in the context of a case statement.  If it's not
+	 * an RE, pop the start-condition stack and push it back, so
+	 * we can continue as normal.
+	 */
+	[ \t\r\n]*
+	/* / starts an RE */
+	"/"	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 1);	// next token starts at the "/"
+		extract_re_delims('/');
+	}
+	/*
+	 * m<punctuation> starts an RE except for "m:" which we scan
+	 * as the variable m (so that "case m:" works) or "m(" which
+	 * is the start of a call to the function m (so that "case m():"
+	 * or "case m(arg):" etc work).
+	 */
+	m[^a-zA-Z:( \t\r\n]	{
+		yy_push_state(re_modifier);
+		yy_push_state(glob_re);
+		STRBUF_START(L_lloc.end - 1);	// next token starts at the delim
+		extract_re_delims(yytext[yyleng-1]);
+	}
+	/* nothing else starts an RE */
+	.	{
+		unput(yytext[0]);
+		undo_yy_user_action();
+		yy_pop_state();
+	}
+}
+
+<INITIAL>{
+	"}"		return T_RBRACE;
+}
+
+<interpol>{
+	"}"		{
+				if (interpol_rbrace()) {
+					STRBUF_START(L_lloc.end);
+					interpol_pop();
+					if ((YYSTATE == glob_re) ||
+					    (YYSTATE == subst_re)) {
+						return T_RIGHT_INTERPOL_RE;
+					} else {
+						return T_RIGHT_INTERPOL;
+					}
+				} else {
+					return T_RBRACE;
+				}
+			}
+	.	{
+		L_synerr("illegal character");
+	}
+}
+
+<str_double>{
+	\\r		STRBUF_ADD("\r", 1);
+	\\n		STRBUF_ADD("\n", 1);
+	\\t		STRBUF_ADD("\t", 1);
+	\\u{HEX}		|
+	\\u{HEX}{HEX}		|
+	\\u{HEX}{HEX}{HEX}	|
+	\\u{HEX}{HEX}{HEX}{HEX}	{
+				char	buf[TCL_UTF_MAX];
+				int	ch;
+				TclParseHex(yytext+2, 4, &ch);
+				STRBUF_ADD(buf, Tcl_UniCharToUtf(ch, buf));
+			}
+	\\(.|\n)	STRBUF_ADD(yytext+1, 1);
+	"$"		STRBUF_ADD("$", 1);
+	\n		{
+				L_err("missing string terminator \"");
+				STRBUF_ADD("\n", 1);
+			}
+	[^\\\"$\n]+	STRBUF_ADD(yytext, yyleng);
+	"${"		{
+				if (interpol_push()) yyterminate();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.beg);
+				return T_LEFT_INTERPOL;
+			}
+	\"[ \t\r\n]*\"
+	\"		{
+				yy_pop_state();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.end);
+				return T_STR_LITERAL;
+			}
+}
+
+<str_single>{
+	\\\\		STRBUF_ADD("\\", 1);
+	\\\'		STRBUF_ADD("'", 1);
+	\\\n		STRBUF_ADD("\n", 1);
+	\n		{
+				L_err("missing string terminator \'");
+				STRBUF_ADD("\n", 1);
+			}
+	\\.		|
+	[^\\\'\n]+	STRBUF_ADD(yytext, yyleng);
+	\'[ \t\r\n]*\'
+	\'		{
+				yy_pop_state();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.end);
+				return T_STR_LITERAL;
+			}
+}
+
+<str_backtick>{
+	\\("$"|`|\\)	STRBUF_ADD(yytext+1, 1);
+	\\\n		/* ignore \<newline> */
+	\\.		|
+	"$"		|
+	[^\\`$\n]+	STRBUF_ADD(yytext, yyleng);
+	\n		{
+				L_err("missing string terminator `");
+				STRBUF_ADD("\n", 1);
+			}
+	"${"		{
+				if (interpol_push()) yyterminate();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.beg);
+				return T_LEFT_INTERPOL;
+			}
+	`		{
+				yy_pop_state();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.end);
+				if (YYSTATE == here_doc_interp) {
+					STRBUF_START(L_lloc.end);
+				}
+				return T_STR_BACKTICK;
+			}
+}
+
+<here_doc_nointerp>{
+	^[ \t]*[a-zA-Z_][a-zA-Z_0-9]*;?$	{
+				int	len;
+				char	*p = yytext;
+
+				/*
+				 * Look for whitespace-prefixed here_delim.
+				 * Any amount of white space is allowed.
+				 */
+				while (isspace(*p)) ++p;
+				len = yyleng - (p - yytext);
+				if (p[len-1] == ';') --len;
+				if ((len == strlen(here_delim)) &&
+				    !strncmp(p, here_delim, len)) {
+					yy_pop_state();
+					unput(';');  // for the parser
+					L_lval.s = ckstrdup(STRBUF_STRING());
+					STRBUF_STOP(L_lloc.end);
+					ckfree(here_delim);
+					ckfree(here_pfx);
+					here_delim = NULL;
+					here_pfx = NULL;
+					return T_STR_LITERAL;
+				}
+				/*
+				 * It's a data line.  It must begin with
+				 * here_pfx or else it's an error.
+				 */
+				p = strstr(yytext, here_pfx);
+				if (p == yytext) {
+					p += strlen(here_pfx);
+				} else {
+					L_err("bad here-document prefix");
+					p = yytext;
+				}
+				STRBUF_ADD(p, yyleng - (p - yytext));
+			}
+	^[ \t]+		{
+				char	*p = strstr(yytext, here_pfx);
+				if (p == yytext) {
+					p += strlen(here_pfx);
+					STRBUF_ADD(p, yyleng - (p - yytext));
+				} else {
+					L_err("bad here-document prefix");
+					p = yytext;
+				}
+			}
+	.|\n		STRBUF_ADD(yytext, 1);
+}
+
+<here_doc_interp>{
+	\\\\		STRBUF_ADD("\\", 1);
+	\\\$		STRBUF_ADD("$", 1);
+	\\`		STRBUF_ADD("`", 1);
+	\\\n		// ignore \<newline>
+	"${"		{
+				if (interpol_push()) yyterminate();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.beg);
+				return T_LEFT_INTERPOL;
+			}
+	`		{
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.beg);
+				yy_push_state(str_backtick);
+				STRBUF_START(L->token_off);
+				return T_START_BACKTICK;
+			}
+	^[ \t]*[a-zA-Z_][a-zA-Z_0-9]*;?$	{
+				int	len;
+				char	*p = yytext;
+
+				/*
+				 * Look for whitespace-prefixed here_delim.
+				 * Any amount of white space is allowed.
+				 */
+				while (isspace(*p)) ++p;
+				len = yyleng - (p - yytext);
+				if (p[len-1] == ';') --len;
+				if ((len == strlen(here_delim)) &&
+				    !strncmp(p, here_delim, len)) {
+					yy_pop_state();
+					unput(';');  // for the parser
+					L_lval.s = ckstrdup(STRBUF_STRING());
+					STRBUF_STOP(L_lloc.end);
+					ckfree(here_delim);
+					ckfree(here_pfx);
+					here_delim = NULL;
+					here_pfx = NULL;
+					return T_STR_LITERAL;
+				}
+				/*
+				 * It's a data line.  It must begin with
+				 * here_pfx or else it's an error.
+				 */
+				p = strstr(yytext, here_pfx);
+				if (p == yytext) {
+					p += strlen(here_pfx);
+				} else {
+					L_err("bad here-document prefix");
+					p = yytext;
+				}
+				STRBUF_ADD(p, yyleng - (p - yytext));
+			}
+	^[ \t]+		{
+				char	*p = strstr(yytext, here_pfx);
+				if (p == yytext) {
+					p += strlen(here_pfx);
+					STRBUF_ADD(p, yyleng - (p - yytext));
+				} else {
+					L_err("bad here-document prefix");
+					p = yytext;
+				}
+			}
+	.|\n		STRBUF_ADD(yytext, 1);
+}
+
+<comment>{
+	[^*]+
+	"*"
+	"*/"		yy_pop_state();
+}
+
+<glob_re,subst_re>{
+	"${"		{
+				if (interpol_push()) yyterminate();
+				L_lval.s = ckstrdup(STRBUF_STRING());
+				STRBUF_STOP(L_lloc.beg);
+				return T_LEFT_INTERPOL_RE;
+			}
+	\\.		{
+				if ((yytext[1] == re_end_delim) ||
+				    (yytext[1] == re_start_delim)) {
+					STRBUF_ADD(yytext+1, 1);
+				} else {
+					STRBUF_ADD(yytext, yyleng);
+				}
+			}
+	\n		{
+				--L->line;  // since \n already scanned
+				L_err("run-away regular expression");
+				++L->line;
+				STRBUF_ADD(yytext, yyleng);
+				yy_pop_state();
+				if (YYSTATE == re_modifier) yy_pop_state();
+				return T_RE;
+			}
+	"$"[0-9]	{
+				// Convert $3 to \3 (regexp capture reference).
+				STRBUF_ADD("\\", 1);
+				STRBUF_ADD(yytext+1, yyleng-1);
+			}
+	.		{
+				if (*yytext == re_end_delim) {
+					L_lval.s = ckstrdup(STRBUF_STRING());
+					STRBUF_STOP(L_lloc.end);
+					if (YYSTATE == subst_re) {
+						yy_pop_state();
+						return T_SUBST;
+					} else {
+						yy_pop_state();
+						if (YYSTATE == subst_re) {
+							STRBUF_START(L_lloc.end);
+							if (re_start_delim !=
+							    re_end_delim) {
+								yy_push_state(
+								    re_delim);
+							}
+						}
+						return T_RE;
+					}
+				} else if (*yytext == re_start_delim) {
+					L_err("regexp delimiter must be quoted "
+					      "inside the regexp");
+					STRBUF_ADD(yytext+1, 1);
+				} else {
+					STRBUF_ADD(yytext, yyleng);
+				}
+			}
+
+}
+
+<re_delim>{
+	\n		{
+				--L->line;  // since \n already scanned
+				L_err("run-away regular expression");
+				++L->line;
+				STRBUF_ADD(yytext, yyleng);
+				yy_pop_state();
+			}
+	.		{
+				extract_re_delims(*yytext);
+				yy_pop_state();
+			}
+}
+
+<re_modifier>{
+	[iglt]+		{
+				L_lval.s = ckstrdup(yytext);
+				yy_pop_state();
+				return T_RE_MODIFIER;
+			}
+	.|\n		{
+				unput(yytext[0]);
+				undo_yy_user_action();
+				yy_pop_state();
+				L_lval.s = ckstrdup("");
+				return T_RE_MODIFIER;
+			}
+}
+
+<eat_through_eol>{
+	.
+	\n	yy_pop_state();
+}
+
+	.		{
+				/* This rule matches a char if no other does. */
+				L_synerr("illegal character");
+				yyterminate();
+			}
+	<<EOF>>		{
+				if (in_lhtml) {
+					yy_user_action();  // for line #s
+					L_synerr("premature EOF");
+				}
+				unless (include_pop()) yyterminate();
+			}
+%%
+void
+L_lex_start()
+{
+	include_top = -1;
+	if (in_lhtml) {
+		STRBUF_START(0);
+		BEGIN(lhtml);
+	} else {
+		BEGIN(INITIAL);
+	}
+}
+
+void
+L_lex_begReArg(int kind)
+{
+	switch (kind) {
+	    case 0:
+		yy_push_state(re_arg_split);
+		break;
+	    case 1:
+		yy_push_state(re_arg_case);
+		break;
+	    default:
+		break;
+	}
+}
+
+private void
+extract_re_delims(char c)
+{
+	re_start_delim = c;
+	if (c == '{') {
+		re_end_delim = '}';
+	} else {
+		re_end_delim = c;
+	}
+}
+
+void
+L_lex_begLhtml()
+{
+	in_lhtml = 1;
+}
+
+void
+L_lex_endLhtml()
+{
+	in_lhtml = 0;
+}
+
+/*
+ * These functions are declared down here because they reference
+ * things that flex has not yet declared in the prelogue (like
+ * unput() or yyterminate() etc).
+ */
+
+/*
+ * Unput a single character.  This function is declared down here
+ * because it calls flex's unput() which is not declared before
+ * the prelogue code earlier.
+ */
+private void
+put_back(char c)
+{
+	unput(c);
+	--L_lloc.end;
+	--L->prev_token_len;
+	tally_newlines(&c, 1, -1);
+	--L->script_len;
+	Tcl_SetObjLength(L->script, L->script_len);
+}
+
+/*
+ * API for scanning string interpolations:
+ * interpol_push()	- call when starting an interpolation; returns 1
+ *			  on interpolation stack overflow
+ * interpol_pop()	- call when finishing an interpolation
+ * interpol_lbrace()	- call when "{" seen
+ * interpol_rbrace()	- call when "}" seen; returns non-0 if this brace
+ *			  ends the current interpolation
+ */
+
+private int
+interpol_push()
+{
+	if (interpol_top >= INTERPOL_STACK_SZ) {
+		L_err("string interpolation nesting too deep -- aborting");
+		interpol_top = -1;
+		return (1);
+	}
+	interpol_stk[++interpol_top] = 0;
+	yy_push_state(interpol);
+	return (0);
+}
+
+private void
+interpol_pop()
+{
+	ASSERT((interpol_top >= 0) && (interpol_top <= INTERPOL_STACK_SZ));
+	--interpol_top;
+	yy_pop_state();
+}
+
+private void
+interpol_lbrace()
+{
+	if (interpol_top >= 0) {
+		ASSERT(interpol_top <= INTERPOL_STACK_SZ);
+		++interpol_stk[interpol_top];
+	}
+}
+
+private int
+interpol_rbrace()
+{
+	if (interpol_top >= 0) {
+		ASSERT(interpol_top <= INTERPOL_STACK_SZ);
+		return (interpol_stk[interpol_top]-- == 0);
+	} else {
+		return (0);
+	}
+}