summaryrefslogtreecommitdiffstats
path: root/generic/Lscanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'generic/Lscanner.l')
-rw-r--r--generic/Lscanner.l1334
1 files changed, 1334 insertions, 0 deletions
diff --git a/generic/Lscanner.l b/generic/Lscanner.l
new file mode 100644
index 0000000..126b019
--- /dev/null
+++ b/generic/Lscanner.l
@@ -0,0 +1,1334 @@
+%option noyywrap
+%option noyy_top_state
+%option stack
+%option noinput
+%x re_delim
+%x re_modifier
+%x re_arg_split
+%x re_arg_case
+%x glob_re
+%x subst_re
+%x comment
+%x str_double
+%x str_single
+%x str_backtick
+%x interpol
+%x here_doc_interp
+%x here_doc_nointerp
+%x eat_through_eol
+%x lhtml
+%x lhtml_expr_start
+ID ([a-zA-Z_]|::)([0-9a-zA-Z_]|::)*
+HEX [a-fA-F0-9]
+%{
+/*
+ * Copyright (c) 2006-2008 BitMover, Inc.
+ */
+#include <string.h>
+#define _PWD_H // Some solaris9 conflict, we don't need pwd.h
+#include "tclInt.h"
+#include "Lcompile.h"
+#include "Lgrammar.h"
+#include "tommath.h"
+
+private void extract_re_delims(char c);
+private int include_pop();
+private int include_push(Tcl_Channel chan, char *name);
+private Tcl_Channel include_search(char *file, char **path, int cwdOnly);
+private Tcl_Channel include_try(Tcl_Obj *fileObj, int *found);
+private void inject(char *s);
+private void interpol_lbrace();
+private void interpol_pop();
+private int interpol_push();
+private int interpol_rbrace();
+private void put_back(char c);
+private void tally_newlines(char *s, int len, int tally);
+
+// Max nesting depth of string interpolations.
+#define INTERPOL_STACK_SZ 10
+
+// Stack for tracking include() statements.
+#define INCLUDE_STACK_SZ 10
+typedef struct {
+ char *name;
+ char *dir;
+ int line;
+ YY_BUFFER_STATE buf;
+} Include;
+
+private char re_start_delim; // delimiters for m|regexp| form
+private char re_end_delim;
+private Tcl_Obj *str; // string collection buffer
+private int str_beg; // source offset of string
+private char *here_delim = NULL;
+private char *here_pfx = NULL;
+private int include_top;
+private Include include_stk[INCLUDE_STACK_SZ+1];
+private Tcl_HashTable *include_table = NULL;
+private int interpol_top = -1;
+private int interpol_stk[INTERPOL_STACK_SZ+1];
+private int in_lhtml = 0; // Lhtml mode
+
+#define STRBUF_START(beg) \
+ do { \
+ str = Tcl_NewObj(); \
+ Tcl_IncrRefCount(str); \
+ str_beg = (beg); \
+ } while (0)
+
+
+#define STRBUF_STRING() Tcl_GetString(str)
+
+#define STRBUF_STARTED() (str != NULL)
+
+#define STRBUF_ADD(s, len) Tcl_AppendToObj(str, s, len)
+
+#define STRBUF_STOP(e) \
+ do { \
+ Tcl_DecrRefCount(str); \
+ str = NULL; \
+ L_lloc.beg = str_beg; \
+ L_lloc.end = (e); \
+ } while (0)
+
+/*
+ * Keep track of the current offset in the input string.
+ * YY_USER_ACTION is run before each action. Note that some actions
+ * further modify L_lloc.
+ */
+
+#define YY_USER_ACTION yy_user_action();
+
+private void
+yy_user_action()
+{
+ L->prev_token_off = L->token_off;
+ L->token_off += L->prev_token_len;
+ L->prev_token_len = yyleng;
+
+ L_lloc.beg = L->token_off;
+ L_lloc.end = L->token_off + yyleng;
+
+ tally_newlines(yytext, yyleng, 1);
+ L_lloc.line = L->line;
+
+ L_lloc.file = L->file;
+
+ /*
+ * Build up in L->script the text that the scanner scans.
+ * The compiler later passes this on to tcl as the script
+ * source. This allows include() stmts to be handled properly.
+ */
+ Tcl_AppendToObj(L->script, yytext, yyleng);
+ L->script_len += yyleng;
+}
+
+/*
+ * Un-do the effects of the YY_USER_ACTION on the token offset
+ * tracking. This is useful in include() processing where the
+ * characters in the '#include "file"' must be ignored.
+ */
+private void
+undo_yy_user_action()
+{
+ L->prev_token_len = L->token_off - L->prev_token_off;
+ L->token_off = L->prev_token_off;
+
+ L_lloc.beg = L->prev_token_off;
+ L_lloc.end = L->prev_token_off + L->prev_token_len;
+
+ tally_newlines(yytext, yyleng, -1);
+ L_lloc.line = L->line;
+
+ L->script_len -= yyleng;
+ Tcl_SetObjLength(L->script, L->script_len);
+}
+
+/*
+ * Inject the given string into the L script text, but do not give it
+ * to the scanner. This is useful for inserting #line directives (for
+ * #include's) which need to remain in the script so Tcl can see them
+ * but which aren't parsed.
+ */
+private void
+inject(char *s)
+{
+ int len = strlen(s);
+
+ L->prev_token_len += len;
+
+ Tcl_AppendToObj(L->script, s, len);
+ L->script_len += len;
+}
+
+/*
+ * Count the newlines in a string and add the number to L->line. Pass
+ * in tally == 1 to count them and tally == -1 to undo it.
+ */
+private void
+tally_newlines(char *s, int len, int tally)
+{
+ char *end, *p;
+
+ for (p = s, end = p + len; p < end; p++) {
+ if (*p == '\n') {
+ L->line += tally;
+ } else if ((*p == '\r') && ((p+1) < end) && (*(p+1) != '\n')) {
+ /* Mac line endings. */
+ L->line += tally;
+ }
+ }
+}
+
+private Tcl_Channel
+include_try(Tcl_Obj *fileObj, int *found)
+{
+ int new;
+ Tcl_Channel chan;
+ char *file = Tcl_GetString(fileObj);
+ char *path;
+ Tcl_Obj *pathObj;
+
+ /*
+ * See if the normalized path has been included before. If the path
+ * isn't absolute, consider it to be relative to where L->file is.
+ */
+ if (Tcl_FSGetPathType(fileObj) == TCL_PATH_ABSOLUTE) {
+ if ((pathObj = Tcl_FSGetNormalizedPath(NULL, fileObj)) == NULL){
+ L_err("unable to normalize include file %s", file);
+ return (NULL);
+ }
+ } else {
+ pathObj = Tcl_ObjPrintf("%s/%s", L->dir, file);
+ }
+ Tcl_IncrRefCount(pathObj);
+
+ path = Tcl_GetString(pathObj);
+ Tcl_CreateHashEntry(include_table, path, &new);
+ if (new) {
+ chan = Tcl_FSOpenFileChannel(L->interp, pathObj, "r", 0666);
+ *found = (chan != NULL);
+ return (chan);
+ } else {
+ *found = 1; // already included
+ return (NULL);
+ }
+ Tcl_DecrRefCount(pathObj);
+}
+
+/*
+ * Search for an include file. If the path is absolute, use it.
+ * Else, for #include <file> (cwdOnly == 0) try
+ * $BIN/include (where BIN is where the running tclsh lives)
+ * /usr/local/include/L
+ * /usr/include/L
+ * For #include "file" (cwdOnly == 1) look only in the directory
+ * where the script doing the #include resides.
+ */
+private Tcl_Channel
+include_search(char *file, char **path, int cwdOnly)
+{
+ int found, len;
+ Tcl_Channel chan;
+ Tcl_Obj *binObj = NULL;
+ Tcl_Obj *fileObj;
+
+ unless (include_table) {
+ include_table = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable));
+ Tcl_InitHashTable(include_table, TCL_STRING_KEYS);
+ }
+
+ fileObj = Tcl_NewStringObj(file, -1);
+ Tcl_IncrRefCount(fileObj);
+ if ((Tcl_FSGetPathType(fileObj) == TCL_PATH_ABSOLUTE) || cwdOnly) {
+ chan = include_try(fileObj, &found);
+ } else {
+ /* Try $BIN/include */
+ binObj = TclGetObjNameOfExecutable();
+ Tcl_GetStringFromObj(binObj, &len);
+ if (len > 0) {
+ Tcl_DecrRefCount(fileObj);
+ /* TclPathPart bumps the ref count. */
+ fileObj = TclPathPart(L->interp, binObj,
+ TCL_PATH_DIRNAME);
+ Tcl_AppendPrintfToObj(fileObj, "/include/%s", file);
+ chan = include_try(fileObj, &found);
+ if (found) goto done;
+ }
+ /* Try /usr/local/include/L */
+ Tcl_DecrRefCount(fileObj);
+ fileObj = Tcl_ObjPrintf("/usr/local/include/L/%s", file);
+ Tcl_IncrRefCount(fileObj);
+ chan = include_try(fileObj, &found);
+ if (found) goto done;
+ /* Try /usr/include/L */
+ Tcl_DecrRefCount(fileObj);
+ fileObj = Tcl_ObjPrintf("/usr/include/L/%s", file);
+ Tcl_IncrRefCount(fileObj);
+ chan = include_try(fileObj, &found);
+ }
+ done:
+ unless (found) {
+ L_err("cannot find include file %s", file);
+ }
+ if (path) *path = ckstrdup(Tcl_GetString(fileObj));
+ Tcl_DecrRefCount(fileObj);
+ return (chan);
+}
+
+private int
+include_push(Tcl_Channel chan, char *name)
+{
+ YY_BUFFER_STATE buf;
+ Tcl_Obj *objPtr;
+ char *dec = NULL, *script;
+ int len, ret;
+
+ /* Read the file into memory. */
+ objPtr = Tcl_NewObj();
+ Tcl_IncrRefCount(objPtr);
+ if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) {
+ Tcl_Close(L->interp, chan);
+ L_err("error reading include file %s", name);
+ return (0);
+ }
+ Tcl_Close(L->interp, chan);
+
+ /* If it is encrypted, decrypt it. */
+ script = Tcl_GetStringFromObj(objPtr, &len);
+
+ /* Create a new flex buffer with the file contents. */
+ if (include_top >= INCLUDE_STACK_SZ) {
+ L_err("include file nesting too deep -- aborting");
+ while (include_pop()) ;
+ ret = 0;
+ } else {
+ ++include_top;
+ include_stk[include_top].name = L->file;
+ include_stk[include_top].dir = L->dir;
+ include_stk[include_top].line = L->line;
+ include_stk[include_top].buf = YY_CURRENT_BUFFER;
+ buf = yy_scan_bytes(script, len);
+ L->file = name;
+ L->dir = L_dirname(L->file);
+ L->line = 1;
+ inject("#line 1\n");
+ ret = 1;
+ }
+ Tcl_DecrRefCount(objPtr);
+ if (dec) ckfree(dec);
+ return (ret);
+}
+
+private int
+include_pop()
+{
+ char *s;
+
+ if (include_top >= 0) {
+ L->file = include_stk[include_top].name;
+ L->dir = include_stk[include_top].dir;
+ L->line = include_stk[include_top].line;
+ yy_delete_buffer(YY_CURRENT_BUFFER);
+ yy_switch_to_buffer(include_stk[include_top].buf);
+ --include_top;
+ s = cksprintf("#line %d\n", L->line);
+ inject(s);
+ ckfree(s);
+ return (1);
+ } else {
+ return (0);
+ }
+}
+
+/*
+ * Given a decimal, hex, or octal integer constant of arbitrary
+ * precision, return a canonical string representation. This is done
+ * by converting it to a bignum and then taking its string rep.
+ */
+private char *
+canonical_num(char *num)
+{
+ char *ret;
+ Tcl_Obj *obj;
+ mp_int big;
+
+ obj = Tcl_NewStringObj(num, -1);
+ Tcl_IncrRefCount(obj);
+ Tcl_TakeBignumFromObj(NULL, obj, &big);
+ Tcl_SetBignumObj(obj, &big);
+ ret = ckstrdup(Tcl_GetString(obj));
+ Tcl_DecrRefCount(obj);
+ return (ret);
+}
+
+/*
+ * Work around a Windows problem where our getopt type conficts
+ * with the system's.
+ */
+#undef getopt
+#undef optarg
+#undef optind
+
+%}
+%%
+<INITIAL,interpol>{
+ "(" return T_LPAREN;
+ ")" return T_RPAREN;
+ "{" interpol_lbrace(); return T_LBRACE;
+ "[" return T_LBRACKET;
+ "]" return T_RBRACKET;
+ "," return T_COMMA;
+ "!" return T_BANG;
+ "+" return T_PLUS;
+ "-" return T_MINUS;
+ "*" return T_STAR;
+ "/" return T_SLASH;
+ "%" return T_PERC;
+ "+=" return T_EQPLUS;
+ "-=" return T_EQMINUS;
+ "*=" return T_EQSTAR;
+ "/=" return T_EQSLASH;
+ "%=" return T_EQPERC;
+ "&=" return T_EQBITAND;
+ "|=" return T_EQBITOR;
+ "^=" return T_EQBITXOR;
+ "<<=" return T_EQLSHIFT;
+ ">>=" return T_EQRSHIFT;
+ ".=" return T_EQDOT;
+ "++" return T_PLUSPLUS;
+ "--" return T_MINUSMINUS;
+ "&&" return T_ANDAND;
+ "||" return T_OROR;
+ "&" return T_BITAND;
+ "|" return T_BITOR;
+ "^" return T_BITXOR;
+ "~" return T_BITNOT;
+ "<<" return T_LSHIFT;
+ ">>" return T_RSHIFT;
+ "=" return T_EQUALS;
+ ";" return T_SEMI;
+ "." return T_DOT;
+ [ \t\n\r]+"."[ \t\n\r]+ return T_STRCAT;
+ ".." return T_DOTDOT;
+ "..." return T_ELLIPSIS;
+ "class" return T_CLASS;
+ "extern" return T_EXTERN;
+ "return" return T_RETURN;
+ "void" return T_VOID;
+ "string" return T_STRING;
+ "widget" return T_WIDGET;
+ "int" return T_INT;
+ "float" return T_FLOAT;
+ "poly" return T_POLY;
+ "split" return T_SPLIT;
+ "if" return T_IF;
+ "else" return T_ELSE;
+ "unless" return T_UNLESS;
+ "while" return T_WHILE;
+ "do" return T_DO;
+ "for" return T_FOR;
+ "struct" return T_STRUCT;
+ "typedef" return T_TYPEDEF;
+ "defined" return T_DEFINED;
+ "foreach" return T_FOREACH;
+ "break" return T_BREAK;
+ "continue" return T_CONTINUE;
+ "instance" return T_INSTANCE;
+ "private" return T_PRIVATE;
+ "public" return T_PUBLIC;
+ "constructor" return T_CONSTRUCTOR;
+ "destructor" return T_DESTRUCTOR;
+ "expand" return T_EXPAND;
+ "_argused" return T_ARGUSED;
+ "_attribute" return T_ATTRIBUTE;
+ "_attributes" return T_ATTRIBUTE;
+ "_optional" return T_OPTIONAL;
+ "_mustbetype" return T_MUSTBETYPE;
+ "goto" return T_GOTO;
+ "switch" return T_SWITCH;
+ "case" return T_CASE;
+ "default" return T_DEFAULT;
+ "try" return T_TRY;
+ "=>" return T_ARROW;
+ "eq" return T_EQ;
+ "ne" return T_NE;
+ "lt" return T_LT;
+ "le" return T_LE;
+ "gt" return T_GT;
+ "ge" return T_GE;
+ "==" return T_EQUALEQUAL;
+ "!=" return T_NOTEQUAL;
+ ">" return T_GREATER;
+ ">=" return T_GREATEREQ;
+ "<" return T_LESSTHAN;
+ "<=" return T_LESSTHANEQ;
+ "->" return T_POINTS;
+ ":" return T_COLON;
+ "?" return T_QUESTION;
+ "?>" {
+ /*
+ * ?> marks the end of a script or expr
+ * inside of an lhtml document but is a
+ * syntax error otherwise.
+ */
+ unless (in_lhtml) {
+ undo_yy_user_action();
+ REJECT;
+ }
+ yy_pop_state();
+ STRBUF_START(L_lloc.end);
+ if (YYSTATE == lhtml_expr_start) {
+ yy_pop_state(); // pop back to lhtml
+ ASSERT(YYSTATE == lhtml);
+ return T_LHTML_EXPR_END;
+ }
+ }
+ "and" {
+ L_err("'and','or','xor','not' are "
+ "unimplemented reserved words");
+ return T_ANDAND;
+ }
+ "not" {
+ L_err("'and','or','xor','not' are "
+ "unimplemented reserved words");
+ return T_BANG;
+ }
+ "or" {
+ L_err("'and','or','xor','not' are "
+ "unimplemented reserved words");
+ return T_OROR;
+ }
+ "xor" {
+ L_err("'and','or','xor','not' are "
+ "unimplemented reserved words");
+ return T_BITXOR;
+ }
+ {ID} {
+ Type *t = L_typedef_lookup(yytext);
+ if (t) {
+ L_lval.Typename.s = ckstrdup(yytext);
+ L_lval.Typename.t = t;
+ return T_TYPE;
+ } else {
+ L_lval.s = ckstrdup(yytext);
+ return T_ID;
+ }
+ }
+ {ID}: {
+ /*
+ * Push back the : and return a T_ID
+ * unless it's "default". The grammar relies
+ * on this to avoid a nasty conflict.
+ */
+ put_back(':');
+ if (!strncmp(yytext, "default", 7)) {
+ return T_DEFAULT;
+ }
+ L_lval.s = ckstrdup(yytext);
+ L_lval.s[yyleng-1] = 0;
+ return T_ID;
+ }
+ ([A-Z]|::)([0-9a-zA-Z]|::)*_\* {
+ L_lval.s = ckstrdup(yytext);
+ return T_PATTERN;
+ }
+ $[0-9]+ {
+ /* Regular expression submatches */
+ L_lval.s = ckstrdup(yytext);
+ return T_ID;
+ }
+ [0-9]+ {
+ /*
+ * Skip any leading 0's which would
+ * make it look like octal to Tcl.
+ */
+ size_t z = strspn(yytext, "0");
+ if (z == yyleng) z = 0; // number is all 0's
+ L_lval.s = canonical_num(yytext+z);
+ return T_INT_LITERAL;
+ }
+ 0o[0-7]+ {
+ /*
+ * Create a leading 0 so it looks like
+ * octal to Tcl.
+ */
+ yytext[1] = '0';
+ L_lval.s = canonical_num(yytext+1);
+ return T_INT_LITERAL;
+ }
+ 0x[0-9a-fA-F]+ {
+ L_lval.s = canonical_num(yytext);
+ return T_INT_LITERAL;
+ }
+ [0-9]*\.[0-9]+ {
+ L_lval.s = ckstrdup(yytext);
+ return T_FLOAT_LITERAL;
+ }
+ ^#line[ \t]+[0-9]+\n {
+ int line = strtoul(yytext+5, NULL, 10);
+
+ if (line <= 0) {
+ --L->line; // since \n already scanned
+ L_err("malformed #line");
+ ++L->line;
+ } else {
+ L->line = line;
+ }
+ }
+ ^#line[ \t]+[0-9]+[ \t]+\"[^\"\n]*\"\n {
+ int line = strtoul(yytext+5, NULL, 10);
+ char *beg = strchr(yytext, '"') + 1;
+ char *end = strrchr(yytext, '"');
+ char *name = ckstrndup(beg, end-beg);
+
+ if (line <= 0) {
+ --L->line; // since \n already scanned
+ L_err("malformed #line");
+ ++L->line;
+ } else {
+ L->file = name;
+ L->line = line;
+ }
+ }
+ ^#line.*\n {
+ --L->line; // since \n already scanned
+ L_err("malformed #line");
+ ++L->line;
+ }
+ ^#include[ \t]*\"[^\"\n]+\" {
+ char *beg = strchr(yytext, '"') + 1;
+ char *end = strrchr(yytext, '"');
+ char *name = ckstrndup(beg, end-beg);
+ Tcl_Channel chan;
+
+ chan = include_search(name, NULL, 1);
+
+ undo_yy_user_action();
+ if (chan && !include_push(chan, name)) {
+ /* Bail if includes nest too deeply. */
+ yyterminate();
+ }
+ }
+ ^#include[ \t]*<[^>\n]+> {
+ char *beg = strchr(yytext, '<') + 1;
+ char *end = strrchr(yytext, '>');
+ char *name = ckstrndup(beg, end-beg);
+ char *path = NULL;
+ Tcl_Channel chan;
+
+ chan = include_search(name, &path, 0);
+ ckfree(name);
+
+ undo_yy_user_action();
+ if (chan && !include_push(chan, path)) {
+ /* Bail if includes nest too deeply. */
+ yyterminate();
+ }
+ }
+ ^#include {
+ L_err("malformed #include");
+ yy_push_state(eat_through_eol);
+ }
+ ^#pragma[ \t]+ return T_PRAGMA;
+ ^#.*("\r"|"\n"|"\r\n") {
+ /*
+ * Rather than using a start condition
+ * to separate out all the ^# patterns
+ * that don't end in \n, this is
+ * simpler. If it's not a comment,
+ * REJECT it so that flex then takes
+ * the second best rule (those above).
+ */
+ if (!strncmp(yytext, "#pragma ", 8) ||
+ !strncmp(yytext, "#pragma\t", 8)) {
+ undo_yy_user_action();
+ REJECT;
+ } else if (!strncmp(yytext, "#include", 8)) {
+ undo_yy_user_action();
+ REJECT;
+ } else unless (L->line == 2) {
+ --L->line; // since \n already scanned
+ L_err("# comment valid only on line 1");
+ ++L->line;
+ }
+ }
+ [ \t]+#.*("\r"|"\n"|"\r\n") {
+ --L->line; // since \n already scanned
+ unless (L->line == 1) {
+ L_err("# comment valid only on line 1");
+ } else {
+ L_err("# comment must start at "
+ "first column");
+ }
+ ++L->line;
+ }
+ "//".*("\r"|"\n"|"\r\n")
+ [ \t]+
+ \n|\r|\f
+ \" yy_push_state(str_double); STRBUF_START(L->token_off);
+ \' yy_push_state(str_single); STRBUF_START(L->token_off);
+ \` yy_push_state(str_backtick); STRBUF_START(L->token_off);
+ "/*" yy_push_state(comment);
+ [!=]~[ \t\r\n]*"m". {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 2); // next token starts at the "m"
+ extract_re_delims(yytext[yyleng-1]);
+ L_lloc.end = L_lloc.beg + 2; // this token spans the "=~"
+ return ((yytext[0] == '=') ? T_EQTWID : T_BANGTWID);
+ }
+ /* if / is used to delimit the regexp, the m can be omitted */
+ [!=]~[ \t\r\n]*"/" {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 1); // next token starts at the "/"
+ extract_re_delims('/');
+ L_lloc.end = L_lloc.beg + 2; // this token spans the "=~"
+ return ((yytext[0] == '=') ? T_EQTWID : T_BANGTWID);
+ }
+ /* a substitution pattern */
+ "=~"[ \t\r\n]*"s". {
+ yy_push_state(re_modifier);
+ yy_push_state(subst_re);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 2); // next token starts at the "s"
+ extract_re_delims(yytext[yyleng-1]);
+ L_lloc.end = L_lloc.beg + 2; // this token spans the "=~"
+ return T_EQTWID;
+ }
+ /* here document (interpolated), valid only on rhs of an assignment */
+ =[ \t\r\n]*<<[a-zA-Z_][a-zA-Z_0-9]*\n {
+ char *p, *q;
+
+ if (here_delim) {
+ L_err("nested here documents illegal");
+ }
+ p = strchr(yytext, '<') + 2; // the < is guaranteed to exist
+ for (q = p; (q > yytext) && (*q != '\n'); --q) ;
+ if ((q > yytext) && (*q == '\n')) {
+ // \n then <<; the in-between whitespace is the here_pfx
+ here_pfx = ckstrndup(q+1, p-q-3);
+ } else {
+ // non-indented here document
+ here_pfx = ckstrdup("");
+ }
+ here_delim = ckstrndup(p, yyleng - (p-yytext) - 1);
+ STRBUF_START(L->token_off);
+ L_lloc.end = L_lloc.beg + 1;
+ yy_push_state(here_doc_interp);
+ return T_EQUALS;
+ }
+ /* here document (uninterpolated), valid only on rhs of an assignment */
+ =[ \t\r\n]*<<\'[a-zA-Z_][a-zA-Z_0-9]*\'\n {
+ char *p, *q;
+
+ if (here_delim) {
+ L_err("nested here documents illegal");
+ }
+ p = strchr(yytext, '<') + 2; // the < is guaranteed to exist
+ for (q = p; (q > yytext) && (*q != '\n'); --q) ;
+ if ((q > yytext) && (*q == '\n')) {
+ // \n then <<; the in-between whitespace is the here_pfx
+ here_pfx = ckstrndup(q+1, p-q-3);
+ } else {
+ // non-indented here document
+ here_pfx = ckstrdup("");
+ }
+ here_delim = ckstrndup(p+1, yyleng - (p-yytext) - 3);
+ STRBUF_START(L->token_off);
+ L_lloc.end = L_lloc.beg + 1;
+ yy_push_state(here_doc_nointerp);
+ return T_EQUALS;
+ }
+ /* illegal here documents (bad stuff before or after the delim) */
+ =[ \t\r\n]*<<-[a-zA-Z_][a-zA-Z_0-9]* |
+ =[ \t\r\n]*<<-\'[a-zA-Z_][a-zA-Z_0-9]*\' {
+ L_synerr("<<- unsupported, use =\\n\\t<<END to strip one "
+ "leading tab");
+ }
+ =[ \t\r\n]*<<[a-zA-Z_][a-zA-Z_0-9]*[^\n] {
+ L_synerr("illegal characters after here-document delimeter");
+ }
+ =[ \t\r\n]*<<[^a-zA-Z_][a-zA-Z_][a-zA-Z_0-9]* {
+ L_synerr("illegal characters before here-document delimeter");
+ }
+ =[ \t\r\n]*<<\'[a-zA-Z_][a-zA-Z_0-9]*\'[^\n] {
+ L_synerr("illegal characters after here-document delimeter");
+ }
+ =[ \t\r\n]*<<\'[^a-zA-Z_][a-zA-Z_][a-zA-Z_0-9]*\' {
+ L_synerr("illegal characters before here-document delimeter");
+ }
+}
+
+<lhtml>{
+ /*
+ * The compiler prepends a #line directive to Lhtml source.
+ * This communicates the correct line number to the Tcl
+ * code that prints run-time error messages.
+ */
+ ^#line[ \t]+[0-9]+\n {
+ int line = strtoul(yytext+5, NULL, 10);
+
+ if (line <= 0) {
+ --L->line; // since \n already scanned
+ L_err("malformed #line");
+ ++L->line;
+ } else {
+ L->line = line;
+ }
+ }
+ "<?"=? {
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ if (yyleng == 2) {
+ yy_push_state(INITIAL);
+ } else {
+ yy_push_state(lhtml_expr_start);
+ }
+ return T_HTML;
+ }
+ .|\n STRBUF_ADD(yytext, yyleng);
+ <<EOF>> {
+ unless (STRBUF_STARTED()) yyterminate();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ return T_HTML;
+ }
+}
+
+<lhtml_expr_start>{
+ /*
+ * This start condition is here only so the rule for ?> can
+ * know whether we previously scanned <? or <?=.
+ */
+ .|\n {
+ unput(yytext[0]);
+ undo_yy_user_action();
+ yy_push_state(INITIAL);
+ return T_LHTML_EXPR_START;
+ }
+}
+
+<re_arg_split>{
+ /*
+ * A regexp in the context of the first arg to split(). If
+ * it's not an RE, pop the start-condition stack and push it
+ * back, so we can continue as normal.
+ */
+ [ \t\r\n]*
+ /* / starts an RE */
+ "/" {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 1); // next token starts at the "/"
+ extract_re_delims('/');
+ }
+ /*
+ * m<punctuation> starts an RE, except for "m)" so that
+ * "split(m)" works.
+ */
+ "m"[^a-zA-Z() \t\r\n] {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 1); // next token starts at the delim
+ extract_re_delims(yytext[yyleng-1]);
+ }
+ /* nothing else starts an RE */
+ . {
+ unput(yytext[0]);
+ undo_yy_user_action();
+ yy_pop_state();
+ }
+}
+
+<re_arg_case>{
+ /*
+ * A regexp in the context of a case statement. If it's not
+ * an RE, pop the start-condition stack and push it back, so
+ * we can continue as normal.
+ */
+ [ \t\r\n]*
+ /* / starts an RE */
+ "/" {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 1); // next token starts at the "/"
+ extract_re_delims('/');
+ }
+ /*
+ * m<punctuation> starts an RE except for "m:" which we scan
+ * as the variable m (so that "case m:" works) or "m(" which
+ * is the start of a call to the function m (so that "case m():"
+ * or "case m(arg):" etc work).
+ */
+ m[^a-zA-Z:( \t\r\n] {
+ yy_push_state(re_modifier);
+ yy_push_state(glob_re);
+ STRBUF_START(L_lloc.end - 1); // next token starts at the delim
+ extract_re_delims(yytext[yyleng-1]);
+ }
+ /* nothing else starts an RE */
+ . {
+ unput(yytext[0]);
+ undo_yy_user_action();
+ yy_pop_state();
+ }
+}
+
+<INITIAL>{
+ "}" return T_RBRACE;
+}
+
+<interpol>{
+ "}" {
+ if (interpol_rbrace()) {
+ STRBUF_START(L_lloc.end);
+ interpol_pop();
+ if ((YYSTATE == glob_re) ||
+ (YYSTATE == subst_re)) {
+ return T_RIGHT_INTERPOL_RE;
+ } else {
+ return T_RIGHT_INTERPOL;
+ }
+ } else {
+ return T_RBRACE;
+ }
+ }
+ . {
+ L_synerr("illegal character");
+ }
+}
+
+<str_double>{
+ \\r STRBUF_ADD("\r", 1);
+ \\n STRBUF_ADD("\n", 1);
+ \\t STRBUF_ADD("\t", 1);
+ \\u{HEX} |
+ \\u{HEX}{HEX} |
+ \\u{HEX}{HEX}{HEX} |
+ \\u{HEX}{HEX}{HEX}{HEX} {
+ char buf[TCL_UTF_MAX];
+ int ch;
+ TclParseHex(yytext+2, 4, &ch);
+ STRBUF_ADD(buf, Tcl_UniCharToUtf(ch, buf));
+ }
+ \\(.|\n) STRBUF_ADD(yytext+1, 1);
+ "$" STRBUF_ADD("$", 1);
+ \n {
+ L_err("missing string terminator \"");
+ STRBUF_ADD("\n", 1);
+ }
+ [^\\\"$\n]+ STRBUF_ADD(yytext, yyleng);
+ "${" {
+ if (interpol_push()) yyterminate();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ return T_LEFT_INTERPOL;
+ }
+ \"[ \t\r\n]*\"
+ \" {
+ yy_pop_state();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ return T_STR_LITERAL;
+ }
+}
+
+<str_single>{
+ \\\\ STRBUF_ADD("\\", 1);
+ \\\' STRBUF_ADD("'", 1);
+ \\\n STRBUF_ADD("\n", 1);
+ \n {
+ L_err("missing string terminator \'");
+ STRBUF_ADD("\n", 1);
+ }
+ \\. |
+ [^\\\'\n]+ STRBUF_ADD(yytext, yyleng);
+ \'[ \t\r\n]*\'
+ \' {
+ yy_pop_state();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ return T_STR_LITERAL;
+ }
+}
+
+<str_backtick>{
+ \\("$"|`|\\) STRBUF_ADD(yytext+1, 1);
+ \\\n /* ignore \<newline> */
+ \\. |
+ "$" |
+ [^\\`$\n]+ STRBUF_ADD(yytext, yyleng);
+ \n {
+ L_err("missing string terminator `");
+ STRBUF_ADD("\n", 1);
+ }
+ "${" {
+ if (interpol_push()) yyterminate();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ return T_LEFT_INTERPOL;
+ }
+ ` {
+ yy_pop_state();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ if (YYSTATE == here_doc_interp) {
+ STRBUF_START(L_lloc.end);
+ }
+ return T_STR_BACKTICK;
+ }
+}
+
+<here_doc_nointerp>{
+ ^[ \t]*[a-zA-Z_][a-zA-Z_0-9]*;?$ {
+ int len;
+ char *p = yytext;
+
+ /*
+ * Look for whitespace-prefixed here_delim.
+ * Any amount of white space is allowed.
+ */
+ while (isspace(*p)) ++p;
+ len = yyleng - (p - yytext);
+ if (p[len-1] == ';') --len;
+ if ((len == strlen(here_delim)) &&
+ !strncmp(p, here_delim, len)) {
+ yy_pop_state();
+ unput(';'); // for the parser
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ ckfree(here_delim);
+ ckfree(here_pfx);
+ here_delim = NULL;
+ here_pfx = NULL;
+ return T_STR_LITERAL;
+ }
+ /*
+ * It's a data line. It must begin with
+ * here_pfx or else it's an error.
+ */
+ p = strstr(yytext, here_pfx);
+ if (p == yytext) {
+ p += strlen(here_pfx);
+ } else {
+ L_err("bad here-document prefix");
+ p = yytext;
+ }
+ STRBUF_ADD(p, yyleng - (p - yytext));
+ }
+ ^[ \t]+ {
+ char *p = strstr(yytext, here_pfx);
+ if (p == yytext) {
+ p += strlen(here_pfx);
+ STRBUF_ADD(p, yyleng - (p - yytext));
+ } else {
+ L_err("bad here-document prefix");
+ p = yytext;
+ }
+ }
+ .|\n STRBUF_ADD(yytext, 1);
+}
+
+<here_doc_interp>{
+ \\\\ STRBUF_ADD("\\", 1);
+ \\\$ STRBUF_ADD("$", 1);
+ \\` STRBUF_ADD("`", 1);
+ \\\n // ignore \<newline>
+ "${" {
+ if (interpol_push()) yyterminate();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ return T_LEFT_INTERPOL;
+ }
+ ` {
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ yy_push_state(str_backtick);
+ STRBUF_START(L->token_off);
+ return T_START_BACKTICK;
+ }
+ ^[ \t]*[a-zA-Z_][a-zA-Z_0-9]*;?$ {
+ int len;
+ char *p = yytext;
+
+ /*
+ * Look for whitespace-prefixed here_delim.
+ * Any amount of white space is allowed.
+ */
+ while (isspace(*p)) ++p;
+ len = yyleng - (p - yytext);
+ if (p[len-1] == ';') --len;
+ if ((len == strlen(here_delim)) &&
+ !strncmp(p, here_delim, len)) {
+ yy_pop_state();
+ unput(';'); // for the parser
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ ckfree(here_delim);
+ ckfree(here_pfx);
+ here_delim = NULL;
+ here_pfx = NULL;
+ return T_STR_LITERAL;
+ }
+ /*
+ * It's a data line. It must begin with
+ * here_pfx or else it's an error.
+ */
+ p = strstr(yytext, here_pfx);
+ if (p == yytext) {
+ p += strlen(here_pfx);
+ } else {
+ L_err("bad here-document prefix");
+ p = yytext;
+ }
+ STRBUF_ADD(p, yyleng - (p - yytext));
+ }
+ ^[ \t]+ {
+ char *p = strstr(yytext, here_pfx);
+ if (p == yytext) {
+ p += strlen(here_pfx);
+ STRBUF_ADD(p, yyleng - (p - yytext));
+ } else {
+ L_err("bad here-document prefix");
+ p = yytext;
+ }
+ }
+ .|\n STRBUF_ADD(yytext, 1);
+}
+
+<comment>{
+ [^*]+
+ "*"
+ "*/" yy_pop_state();
+}
+
+<glob_re,subst_re>{
+ "${" {
+ if (interpol_push()) yyterminate();
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.beg);
+ return T_LEFT_INTERPOL_RE;
+ }
+ \\. {
+ if ((yytext[1] == re_end_delim) ||
+ (yytext[1] == re_start_delim)) {
+ STRBUF_ADD(yytext+1, 1);
+ } else {
+ STRBUF_ADD(yytext, yyleng);
+ }
+ }
+ \n {
+ --L->line; // since \n already scanned
+ L_err("run-away regular expression");
+ ++L->line;
+ STRBUF_ADD(yytext, yyleng);
+ yy_pop_state();
+ if (YYSTATE == re_modifier) yy_pop_state();
+ return T_RE;
+ }
+ "$"[0-9] {
+ // Convert $3 to \3 (regexp capture reference).
+ STRBUF_ADD("\\", 1);
+ STRBUF_ADD(yytext+1, yyleng-1);
+ }
+ . {
+ if (*yytext == re_end_delim) {
+ L_lval.s = ckstrdup(STRBUF_STRING());
+ STRBUF_STOP(L_lloc.end);
+ if (YYSTATE == subst_re) {
+ yy_pop_state();
+ return T_SUBST;
+ } else {
+ yy_pop_state();
+ if (YYSTATE == subst_re) {
+ STRBUF_START(L_lloc.end);
+ if (re_start_delim !=
+ re_end_delim) {
+ yy_push_state(
+ re_delim);
+ }
+ }
+ return T_RE;
+ }
+ } else if (*yytext == re_start_delim) {
+ L_err("regexp delimiter must be quoted "
+ "inside the regexp");
+ STRBUF_ADD(yytext+1, 1);
+ } else {
+ STRBUF_ADD(yytext, yyleng);
+ }
+ }
+
+}
+
+<re_delim>{
+ \n {
+ --L->line; // since \n already scanned
+ L_err("run-away regular expression");
+ ++L->line;
+ STRBUF_ADD(yytext, yyleng);
+ yy_pop_state();
+ }
+ . {
+ extract_re_delims(*yytext);
+ yy_pop_state();
+ }
+}
+
+<re_modifier>{
+ [iglt]+ {
+ L_lval.s = ckstrdup(yytext);
+ yy_pop_state();
+ return T_RE_MODIFIER;
+ }
+ .|\n {
+ unput(yytext[0]);
+ undo_yy_user_action();
+ yy_pop_state();
+ L_lval.s = ckstrdup("");
+ return T_RE_MODIFIER;
+ }
+}
+
+<eat_through_eol>{
+ .
+ \n yy_pop_state();
+}
+
+ . {
+ /* This rule matches a char if no other does. */
+ L_synerr("illegal character");
+ yyterminate();
+ }
+ <<EOF>> {
+ if (in_lhtml) {
+ yy_user_action(); // for line #s
+ L_synerr("premature EOF");
+ }
+ unless (include_pop()) yyterminate();
+ }
+%%
+void
+L_lex_start()
+{
+ include_top = -1;
+ if (in_lhtml) {
+ STRBUF_START(0);
+ BEGIN(lhtml);
+ } else {
+ BEGIN(INITIAL);
+ }
+}
+
+void
+L_lex_begReArg(int kind)
+{
+ switch (kind) {
+ case 0:
+ yy_push_state(re_arg_split);
+ break;
+ case 1:
+ yy_push_state(re_arg_case);
+ break;
+ default:
+ break;
+ }
+}
+
+private void
+extract_re_delims(char c)
+{
+ re_start_delim = c;
+ if (c == '{') {
+ re_end_delim = '}';
+ } else {
+ re_end_delim = c;
+ }
+}
+
+void
+L_lex_begLhtml()
+{
+ in_lhtml = 1;
+}
+
+void
+L_lex_endLhtml()
+{
+ in_lhtml = 0;
+}
+
+/*
+ * These functions are declared down here because they reference
+ * things that flex has not yet declared in the prelogue (like
+ * unput() or yyterminate() etc).
+ */
+
+/*
+ * Unput a single character. This function is declared down here
+ * because it calls flex's unput() which is not declared before
+ * the prelogue code earlier.
+ */
+private void
+put_back(char c)
+{
+ unput(c);
+ --L_lloc.end;
+ --L->prev_token_len;
+ tally_newlines(&c, 1, -1);
+ --L->script_len;
+ Tcl_SetObjLength(L->script, L->script_len);
+}
+
+/*
+ * API for scanning string interpolations:
+ * interpol_push() - call when starting an interpolation; returns 1
+ * on interpolation stack overflow
+ * interpol_pop() - call when finishing an interpolation
+ * interpol_lbrace() - call when "{" seen
+ * interpol_rbrace() - call when "}" seen; returns non-0 if this brace
+ * ends the current interpolation
+ */
+
+private int
+interpol_push()
+{
+ if (interpol_top >= INTERPOL_STACK_SZ) {
+ L_err("string interpolation nesting too deep -- aborting");
+ interpol_top = -1;
+ return (1);
+ }
+ interpol_stk[++interpol_top] = 0;
+ yy_push_state(interpol);
+ return (0);
+}
+
+private void
+interpol_pop()
+{
+ ASSERT((interpol_top >= 0) && (interpol_top <= INTERPOL_STACK_SZ));
+ --interpol_top;
+ yy_pop_state();
+}
+
+private void
+interpol_lbrace()
+{
+ if (interpol_top >= 0) {
+ ASSERT(interpol_top <= INTERPOL_STACK_SZ);
+ ++interpol_stk[interpol_top];
+ }
+}
+
+private int
+interpol_rbrace()
+{
+ if (interpol_top >= 0) {
+ ASSERT(interpol_top <= INTERPOL_STACK_SZ);
+ return (interpol_stk[interpol_top]-- == 0);
+ } else {
+ return (0);
+ }
+}