summaryrefslogtreecommitdiffstats
path: root/libmscgen/mscgen_lexer.l
diff options
context:
space:
mode:
Diffstat (limited to 'libmscgen/mscgen_lexer.l')
-rw-r--r--libmscgen/mscgen_lexer.l236
1 files changed, 236 insertions, 0 deletions
diff --git a/libmscgen/mscgen_lexer.l b/libmscgen/mscgen_lexer.l
new file mode 100644
index 0000000..29d6aea
--- /dev/null
+++ b/libmscgen/mscgen_lexer.l
@@ -0,0 +1,236 @@
+%{
+/***************************************************************************
+ *
+ * $Id: lexer.l 184 2011-02-28 21:38:28Z Michael.McTernan $
+ *
+ * Mscgen language lexer definition.
+ * Copyright (C) 2009 Michael C McTernan, Michael.McTernan.2001@cs.bris.ac.uk
+ *
+ * This file is part of msclib.
+ *
+ * Msc is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * Msclib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with msclib; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ ***************************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "mscgen_config.h"
+#include "mscgen_msc.h"
+#include "mscgen_bool.h"
+#include "mscgen_safe.h"
+#include "mscgen_lexer.h"
+#include "mscgen_language.h" /* Token definitions from Yacc/Bison */
+/* Counter for error reporting */
+static unsigned long lex_linenum = 1;
+static char *lex_line = NULL;
+static Boolean lex_utf8 = FALSE;
+
+/* Local function prototypes */
+static void newline(const char *text, unsigned int n);
+static char *trimQstring(char *s);
+
+%}
+
+/* Not used, so prevent compiler warning */
+%option never-interactive
+%option noinput
+%option noyywrap
+
+%x IN_COMMENT BODY
+%%
+
+<INITIAL>{
+\xef\xbb\xbf lex_utf8 = TRUE; BEGIN(BODY);
+(\r\n).* newline(yytext, 2); BEGIN(BODY);
+(\r|\n).* newline(yytext, 1); BEGIN(BODY);
+. unput(yytext[0]); BEGIN(BODY);
+}
+
+<IN_COMMENT>{
+"*/" BEGIN(BODY);
+[^*\n]+
+"*"
+(\r\n).* newline(yytext, 2);
+(\r|\n).* newline(yytext, 1);
+}
+
+<BODY>{
+
+"/*" BEGIN(IN_COMMENT);
+
+(\r\n).* newline(yytext, 2);
+(\r|\n).* newline(yytext, 1);
+
+#.*$ /* Ignore lines after '#' */
+\/\/.*$ /* Ignore lines after '//' */
+
+msc return TOK_MSC;
+HSCALE|hscale yylval.optType = MSC_OPT_HSCALE; return TOK_OPT_HSCALE;
+WIDTH|width yylval.optType = MSC_OPT_WIDTH; return TOK_OPT_WIDTH;
+ARCGRADIENT|arcgradient yylval.optType = MSC_OPT_ARCGRADIENT; return TOK_OPT_ARCGRADIENT;
+WORDWRAPARCS|wordwraparcs yylval.optType = MSC_OPT_WORDWRAPARCS; return TOK_OPT_WORDWRAPARCS;
+URL|url yylval.attribType = MSC_ATTR_URL; return TOK_ATTR_URL;
+LABEL|label yylval.attribType = MSC_ATTR_LABEL; return TOK_ATTR_LABEL;
+IDURL|idurl yylval.attribType = MSC_ATTR_IDURL; return TOK_ATTR_IDURL;
+ID|id yylval.attribType = MSC_ATTR_ID; return TOK_ATTR_ID;
+LINECOLO(U?)R|linecolo(u?)r yylval.attribType = MSC_ATTR_LINE_COLOUR; return TOK_ATTR_LINE_COLOUR;
+TEXTCOLO(U?)R|textcolo(u?)r yylval.attribType = MSC_ATTR_TEXT_COLOUR; return TOK_ATTR_TEXT_COLOUR;
+TEXTBGCOLO(U?)R|textbgcolo(u?)r yylval.attribType = MSC_ATTR_TEXT_BGCOLOUR; return TOK_ATTR_TEXT_BGCOLOUR;
+ARCLINECOLO(U?)R|arclinecolo(u?)r yylval.attribType = MSC_ATTR_ARC_LINE_COLOUR; return TOK_ATTR_ARC_LINE_COLOUR;
+ARCTEXTCOLO(U?)R|arctextcolo(u?)r yylval.attribType = MSC_ATTR_ARC_TEXT_COLOUR; return TOK_ATTR_ARC_TEXT_COLOUR;
+ARCTEXTBGCOLO(U?)R|arctextbgcolo(u?)r yylval.attribType = MSC_ATTR_ARC_TEXT_BGCOLOUR; return TOK_ATTR_ARC_TEXT_BGCOLOUR;
+ARCSKIP|arcskip yylval.attribType = MSC_ATTR_ARC_SKIP; return TOK_ATTR_ARC_SKIP;
+\.\.\. yylval.arctype = MSC_ARC_DISCO; return TOK_SPECIAL_ARC; /* ... */
+--- yylval.arctype = MSC_ARC_DIVIDER; return TOK_SPECIAL_ARC; /* --- */
+\|\|\| yylval.arctype = MSC_ARC_SPACE; return TOK_SPECIAL_ARC; /* ||| */
+\<-\> yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_BI; /* <-> */
+-\> yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_TO; /* -> */
+\<- yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG_FROM; /* <- */
+-- yylval.arctype = MSC_ARC_SIGNAL; return TOK_REL_SIG; /* -- */
+-[Xx] yylval.arctype = MSC_ARC_LOSS; return TOK_REL_LOSS_TO; /* -x */
+[Xx]- yylval.arctype = MSC_ARC_LOSS; return TOK_REL_LOSS_FROM; /* x- */
+\<=\> yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_BI; /* <=> */
+=\> yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_TO; /* => */
+\<= yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD_FROM; /* <= */
+== yylval.arctype = MSC_ARC_METHOD; return TOK_REL_METHOD; /* == */
+\<\<\>\> yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_BI; /* <<>> */
+\>\> yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_TO; /* >> */
+\<\< yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL_FROM; /* << */
+\.\. yylval.arctype = MSC_ARC_RETVAL; return TOK_REL_RETVAL; /* .. */
+\<:\> yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_BI; /* <:> */
+:\> yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_TO; /* :> */
+\<: yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE_FROM; /* <: */
+:: yylval.arctype = MSC_ARC_DOUBLE; return TOK_REL_DOUBLE; /* :: */
+\<\<=\>\> yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_BI; /* <<=>> */
+=\>\> yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_TO; /* =>> */
+\<\<= yylval.arctype = MSC_ARC_CALLBACK; return TOK_REL_CALLBACK_FROM; /* <<= */
+BOX|box yylval.arctype = MSC_ARC_BOX; return TOK_REL_BOX; /* box */
+ABOX|abox yylval.arctype = MSC_ARC_ABOX; return TOK_REL_ABOX; /* abox */
+RBOX|rbox yylval.arctype = MSC_ARC_RBOX; return TOK_REL_RBOX; /* rbox */
+NOTE|note yylval.arctype = MSC_ARC_NOTE; return TOK_REL_NOTE; /* note */
+[A-Za-z0-9_]+ yylval.string = strdup_s(yytext); return TOK_STRING;
+\"(\\\"|[^\"])*\" yylval.string = trimQstring(strdup_s(yytext)); return TOK_QSTRING;
+= return TOK_EQUAL;
+, return TOK_COMMA;
+\; return TOK_SEMICOLON;
+\{ return TOK_OCBRACKET;
+\} return TOK_CCBRACKET;
+\[ return TOK_OSBRACKET;
+\] return TOK_CSBRACKET;
+\* return TOK_ASTERISK;
+[ \t]+ /* ignore whitespace */;
+
+}
+
+
+<*>.|\n|\r return TOK_UNKNOWN;
+
+%%
+
+/* Handle a new line of input.
+ * This counts the line number and duplicates the string incase we need
+ * it for error reporting. The line is then returned back for parsing
+ * without the newline characters prefixed.
+ */
+static void newline(const char *text, unsigned int n)
+{
+ lex_linenum++;
+ if(lex_line != NULL)
+ {
+ free(lex_line);
+ }
+
+ lex_line = strdup(text + n);
+ yyless(n);
+}
+
+
+/* Trim a multi-line quoted string.
+ * This allows the parsed input quoted strings to span multiple lines of
+ * input but be condensed to only a single line of output e.g.
+ * a->b [label="line 1
+ * line 1 too"];
+ * Will parse to a string such as"line1\n line1 too". This function
+ * will collapse the \n and whitespace into a single space.
+ */
+static char *trimQstring(char *const s)
+{
+ int i = 0, o = 0, skipmode = 0;
+
+ /* Strip leading " */
+ if(s[i] == '\"')
+ {
+ i++;
+ }
+
+ /* Copy body, compacting whitespace after newline sequences */
+ while(s[i] != '\0')
+ {
+ if(s[i] == '\r' || s[i] == '\n' || s[i] == '\f')
+ {
+ skipmode = 1;
+ }
+ else if(!skipmode || !isspace(s[i]))
+ {
+ if(skipmode)
+ {
+ s[o] = ' ';
+ o++;
+ }
+
+ skipmode = 0;
+ s[o] = s[i];
+ o++;
+ }
+
+ i++;
+ }
+
+ /* Null terminate */
+ s[o] = '\0';
+
+ /* Remove trailing " */
+ if(o >= 1 && s[o - 1] == '\"')
+ s[o-1] = '\0';
+
+ return s;
+}
+
+unsigned long lex_getlinenum(void)
+{
+ return lex_linenum;
+}
+
+char *lex_getline(void)
+{
+ return lex_line;
+}
+
+void lex_destroy(void)
+{
+ if(lex_line != NULL)
+ {
+ free(lex_line);
+ lex_line = NULL;
+ }
+}
+
+Boolean lex_getutf8(void)
+{
+ return lex_utf8;
+}
+
+/* END OF FILE */