summaryrefslogtreecommitdiffstats
path: root/src/doctokenizer.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/doctokenizer.l')
-rw-r--r--src/doctokenizer.l577
1 files changed, 577 insertions, 0 deletions
diff --git a/src/doctokenizer.l b/src/doctokenizer.l
new file mode 100644
index 0000000..31d6d7f
--- /dev/null
+++ b/src/doctokenizer.l
@@ -0,0 +1,577 @@
+/******************************************************************************
+ *
+ *
+ *
+ *
+ * Copyright (C) 1997-2002 by Dimitri van Heesch.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation under the terms of the GNU General Public License is hereby
+ * granted. No representations are made about the suitability of this software
+ * for any purpose. It is provided "as is" without express or implied warranty.
+ * See the GNU General Public License for more details.
+ *
+ * Documents produced by Doxygen are derivative works derived from the
+ * input used in their production; they are not affected by this license.
+ *
+ */
+
+%{
+
+#include <qfile.h>
+#include <qcstring.h>
+#include <qstack.h>
+#include <qdict.h>
+
+#include "doctokenizer.h"
+#include "cmdmapper.h"
+#include "config.h"
+
+#define YY_NEVER_INTERACTIVE 1
+
+//--------------------------------------------------------------------------
+
+static int g_commentState;
+TokenInfo *g_token = 0;
+static int g_inputPos = 0;
+static const char *g_inputString;
+
+struct DocLexerContext
+{
+ TokenInfo *token;
+ int rule;
+ int inputPos;
+ const char *inputString;
+ YY_BUFFER_STATE state;
+};
+
+static QStack<DocLexerContext> g_lexerStack;
+
+//--------------------------------------------------------------------------
+
+void doctokenizerYYpushContext()
+{
+ DocLexerContext *ctx = new DocLexerContext;
+ ctx->rule = YY_START;
+ ctx->token = g_token;
+ ctx->inputPos = g_inputPos;
+ ctx->inputString = g_inputString;
+ ctx->state = YY_CURRENT_BUFFER;
+ g_lexerStack.push(ctx);
+ yy_switch_to_buffer(yy_create_buffer(doctokenizerYYin, YY_BUF_SIZE));
+}
+
+bool doctokenizerYYpopContext()
+{
+ if (g_lexerStack.isEmpty()) return FALSE;
+ DocLexerContext *ctx = g_lexerStack.pop();
+ g_inputPos = ctx->inputPos;
+ g_inputString = ctx->inputString;
+ yy_delete_buffer(YY_CURRENT_BUFFER);
+ yy_switch_to_buffer(ctx->state);
+ BEGIN(ctx->rule);
+ delete ctx;
+ return TRUE;
+}
+
+
+//--------------------------------------------------------------------------
+
+const char *tokToString(int token)
+{
+ switch (token)
+ {
+ case 0: return "TK_EOF";
+ case TK_WORD: return "TK_WORD";
+ case TK_WHITESPACE: return "TK_WHITESPACE";
+ case TK_LISTITEM: return "TK_LISTITEM";
+ case TK_ENDLIST: return "TK_ENDLIST";
+ case TK_COMMAND: return "TK_COMMAND";
+ case TK_HTMLTAG: return "TK_HTMLTAG";
+ case TK_SYMBOL: return "TK_SYMBOL";
+ case TK_NEWPARA: return "TK_NEWPARA";
+ case TK_RCSTAG: return "TK_RCSTAG";
+ case TK_URL: return "TK_URL";
+ }
+ return "ERROR";
+}
+
+static int computeIndent(const char *str,int length)
+{
+ int i;
+ int indent=0;
+ int tabSize=Config_getInt("TAB_SIZE");
+ for (i=0;i<length;i++)
+ {
+ if (str[i]=='\t')
+ {
+ indent+=tabSize - (indent%tabSize);
+ }
+ else if (str[i]=='\n')
+ {
+ indent=0;
+ }
+ else
+ {
+ indent++;
+ }
+ }
+ return indent;
+}
+
+/*! converts input string \a opt into a list of Options. Each
+ * option is a name, value pair. The result is stored in g_token->options
+ */
+static void parseOptions(const QCString &opt)
+{
+ //printf("parseOptions(%s)\n",opt.data());
+ QCString options=opt;
+ g_token->options.clear();
+ int len = options.length();
+ char c;
+ int i=0,startName,endName,startOption,endOption;
+ while (i<len)
+ {
+ c=options.at(i);
+ // skip spaces
+ while (i<len && c==' ') { c=options.at(++i); }
+ startName=i;
+ // search for end of name
+ while (i<len && c!=' ' && c!='=') { c=options.at(++i); }
+ endName=i;
+ Option *opt = new Option;
+ opt->name = options.mid(startName,endName-startName).lower();
+ // skip spaces
+ while (i<len && c==' ') { c=options.at(++i); }
+ if (options.at(i)=='=') // option has value
+ {
+ i++;
+ // skip spaces
+ while (i<len && c==' ') { c=options.at(++i); }
+ if (options.at(i)=='\'') // option '...'
+ {
+ i++;
+ startOption=i;
+ // search for matching quote
+ while (i<len && c!='\'') { c=options.at(++i); }
+ endOption=i;
+ i++;
+ }
+ else if (options.at(i)=='"') // option "..."
+ {
+ i++;
+ startOption=i;
+ // search for matching quote
+ while (i<len && c!='"') { c=options.at(++i); }
+ endOption=i;
+ i++;
+ }
+ else // value without any quotes
+ {
+ startOption=i;
+ // search for separator
+ while (i<len && c!=' ') { c=options.at(++i); }
+ endOption=i;
+ i++;
+ }
+ opt->value = options.mid(startOption,endOption-startOption);
+ }
+ else // start next option
+ {
+ }
+ //printf("=====> Adding option name=<%s> value=<%s>\n",
+ // opt->name.data(),opt->value.data());
+ g_token->options.append(opt);
+ }
+}
+
+//--------------------------------------------------------------------------
+
+#undef YY_INPUT
+#define YY_INPUT(buf,result,max_size) result=yyread(buf,max_size);
+
+static int yyread(char *buf,int max_size)
+{
+ int c=0;
+ const char *src=g_inputString+g_inputPos;
+ while ( c < max_size && *src ) *buf++ = *src++, c++;
+ g_inputPos+=c;
+ return c;
+}
+
+//--------------------------------------------------------------------------
+
+%}
+
+CMD ("\\"|"@")
+WS [ \t\r\n]
+NONWS [^ \t\r\n]
+BLANK [ \t\r]
+ID [a-z_A-Z][a-z_A-Z0-9]*
+OPTSTARS ("//"{BLANK}*)?"*"*{BLANK}*
+LISTITEM {BLANK}*{OPTSTARS}"-"("#")?{WS}
+ENDLIST {BLANK}*{OPTSTARS}"."{BLANK}*\n
+ATTRIB {ID}("="(("\""[^\"]*"\"")|("'"[^\']*"'")|[^ \t\r\n'"><]+))?
+URLCHAR [a-z_A-Z0-9\!\~\:\;\'\$\?\@\&\%\#\.\-\+\/\=]
+URLMASK (([a-z_A-Z][^\>\"\n]*{URLCHAR})|({URLCHAR}+))([({]{URLCHAR}*[)}])?
+FILESCHAR [a-z_A-Z0-9\\:\\\/\-\+]
+FILEECHAR [a-z_A-Z0-9\-\+]
+FILEMASK {FILESCHAR}*{FILEECHAR}+("."{FILESCHAR}*{FILEECHAR}+)*
+LINKMASK [^ \t\n\r\\@<&$]+("("[^\n)]*")")?({BLANK}*("const"|"volatile"))?
+SPCMD1 {CMD}[a-z_A-Z0-9]+
+SPCMD2 {CMD}[\\@<>&$#%~]
+SPCMD3 {CMD}form#[0-9]+
+WORD1 [^ \t\n\r\\@<&$]+
+WORD2 [^ \t\n\r\\@<&$]+"("[^\n)]*")"({BLANK}*("const"|"volatile"))?
+
+%option noyywrap
+%option yylineno
+
+%x St_Para
+%x St_Comment
+%x St_Title
+%x St_Code
+%x St_HtmlOnly
+%x St_LatexOnly
+%x St_Verbatim
+%x St_Param
+%x St_XRefItem
+%x St_File
+%x St_Pattern
+%x St_Link
+%x St_Ref
+%x St_Ref2
+
+%%
+ /* TODO: \~lang_id */
+<St_Para>\r /* skip carriage return */
+<St_Para>^{LISTITEM} { /* list item */
+ QCString text=yytext;
+ int dashPos = text.findRev('-');
+ g_token->isEnumList = text.at(dashPos+1)=='#';
+ g_token->indent = computeIndent(yytext,dashPos);
+ return TK_LISTITEM;
+ }
+<St_Para>{BLANK}*\n{LISTITEM} { /* list item on next line */
+ QCString text=yytext;
+ text=text.right(text.length()-text.find('\n')-1);
+ int dashPos = text.findRev('-');
+ g_token->isEnumList = text.at(dashPos+1)=='#';
+ g_token->indent = computeIndent(text,dashPos);
+ return TK_LISTITEM;
+ }
+<St_Para>^{ENDLIST} { /* end list */
+ int dotPos = QCString(yytext).findRev('.');
+ g_token->indent = computeIndent(yytext,dotPos);
+ return TK_ENDLIST;
+ }
+<St_Para>{BLANK}*\n{ENDLIST} { /* end list on next line */
+ QCString text=yytext;
+ text=text.right(text.length()-text.find('\n')-1);
+ int dotPos = text.findRev('.');
+ g_token->indent = computeIndent(text,dotPos);
+ return TK_ENDLIST;
+ }
+<St_Para>"{"{BLANK}*"@link" {
+ g_token->name = "javalink";
+ return TK_COMMAND;
+ }
+<St_Para>{SPCMD3} {
+ g_token->name = "form";
+ bool ok;
+ g_token->id = QCString(yytext).right(yyleng-6).toInt(&ok);
+ ASSERT(ok);
+ return TK_COMMAND;
+ }
+<St_Para>{SPCMD1} |
+<St_Para>{SPCMD2} { /* special command */
+ g_token->name = yytext+1;
+ return TK_COMMAND;
+ }
+<St_Para>("http:"|"https:"|"ftp:"|"file:"|"news:"){URLMASK} {
+ g_token->name=yytext;
+ return TK_URL;
+ }
+<St_Para>[a-z_A-Z0-9.-]+"@"[a-z_A-Z0-9.-]+ {
+ g_token->name=yytext;
+ return TK_URL;
+ }
+<St_Para>"$"{ID}":"[^\n$]+"$" { /* RCS tag */
+ QCString tagName(yytext+1);
+ int i=tagName.find(':');
+ g_token->name = tagName.left(i);
+ g_token->text = tagName.mid(i+1,tagName.length()-i-2);
+ return TK_RCSTAG;
+ }
+<St_Para>"$("{ID}")" { /* environment variable */
+ QCString name = &yytext[2];
+ name = name.left(name.length()-1);
+ QCString value = getenv(name);
+ for (int i=value.length()-1;i>=0;i--) unput(value.at(i));
+ }
+<St_Para>"<"(("/")?){ID}({BLANK}+{ATTRIB})*">" { /* html tag */
+ g_token->name = yytext;
+ int startNamePos=1;
+ if (g_token->name.at(1)=='/') startNamePos++;
+ int optSep = g_token->name.find(' ');
+ if (optSep!=-1) // tag has one or more options
+ {
+ parseOptions(g_token->name.mid(optSep+1,g_token->name.length()-optSep-2));
+ g_token->name=g_token->name.mid(startNamePos,optSep-1).lower();
+ }
+ else // tag without options, strip brackets
+ {
+ g_token->name=g_token->name.mid(startNamePos,g_token->name.length()-startNamePos-1).lower();
+ }
+ g_token->endTag = startNamePos==2;
+ return TK_HTMLTAG;
+ }
+<St_Para>"&"{ID}";" { /* special symbol */
+ g_token->name = yytext;
+ return TK_SYMBOL;
+ }
+<St_Para>{WORD1} | /* word, #word, or %word */
+<St_Para>{WORD2} { /* function call */
+ g_token->name = yytext;
+ return TK_WORD;
+ /* dummy code to please the compiler, removing this
+ results in a warning on my machine */ goto find_rule;
+ }
+<St_Para>{BLANK}+ |
+<St_Para>{BLANK}*\n{BLANK}* { /* white space */
+ g_token->chars=yytext;
+ return TK_WHITESPACE;
+ }
+<St_Para>({BLANK}*\n)+{BLANK}*\n {
+ /* start of a new paragraph */
+ return TK_NEWPARA;
+ }
+<St_Code>{CMD}"endcode" {
+ return RetVal_OK;
+ }
+<St_Code>[^\\@\n]+ |
+<St_Code>\n |
+<St_Code>. {
+ g_token->verb+=yytext;
+ }
+<St_HtmlOnly>{CMD}"endhtmlonly" {
+ return RetVal_OK;
+ }
+<St_HtmlOnly>[^\\@\n]+ |
+<St_HtmlOnly>\n |
+<St_HtmlOnly>. {
+ g_token->verb+=yytext;
+ }
+<St_LatexOnly>{CMD}"endlatexonly" {
+ return RetVal_OK;
+ }
+<St_LatexOnly>[^\\@\n]+ |
+<St_LatexOnly>\n |
+<St_LatexOnly>. {
+ g_token->verb+=yytext;
+ }
+<St_Verbatim>{CMD}"endverbatim" {
+ return RetVal_OK;
+ }
+<St_Verbatim>[^\\@\n]+ |
+<St_Verbatim>\n |
+<St_Verbatim>. { /* Verbatim text */
+ g_token->verb+=yytext;
+ }
+<St_Title>"&"{ID}";" { /* symbol */
+ g_token->name = yytext;
+ return TK_SYMBOL;
+ }
+<St_Title>{SPCMD1} |
+<St_Title>{SPCMD2} { /* special command */
+ g_token->name = yytext+1;
+ return TK_COMMAND;
+ }
+<St_Title>{WORD1} |
+<St_Title>{WORD2} { /* word */
+ g_token->name = yytext;
+ return TK_WORD;
+ }
+<St_Title>[ \t]+ {
+ g_token->chars=yytext;
+ return TK_WHITESPACE;
+ }
+<St_Title>\n { /* new line => end of title */
+ return 0;
+ }
+<St_Ref>{ID} {
+ g_token->name=yytext;
+ return TK_WORD;
+ }
+<St_Ref>{BLANK}+ {
+ return 0;
+ }
+<St_Ref>{BLANK}+"\"" {
+ BEGIN(St_Ref2);
+ }
+<St_Ref>\n {
+ unput(*yytext);
+ return 0;
+ }
+<St_Ref>. {
+ unput(*yytext);
+ return 0;
+ }
+<St_Ref2>"&"{ID}";" { /* symbol */
+ g_token->name = yytext;
+ return TK_SYMBOL;
+ }
+<St_Ref2>{SPCMD1} |
+<St_Ref2>{SPCMD2} { /* special command */
+ g_token->name = yytext+1;
+ return TK_COMMAND;
+ }
+<St_Ref2>[^ \t\n\r\\@<&$"]+ |
+<St_Ref2>[^ \t\n\r\\@<&$"]+"("[^\n")]*")"({BLANK}*("const"|"volatile"))? {
+ /* word */
+ g_token->name = yytext;
+ return TK_WORD;
+ }
+<St_Ref2>[ \t]+ {
+ g_token->chars=yytext;
+ return TK_WHITESPACE;
+ }
+<St_Ref2>"\""|\n { /* " or \n => end of title */
+ return 0;
+ }
+<St_XRefItem>[0-9]+\n {
+ QCString numStr=yytext;
+ numStr=numStr.left(yyleng-1);
+ g_token->id=numStr.toInt();
+ return RetVal_OK;
+ }
+<St_Para,St_Title,St_Ref2>"<!--" { /* html style comment block */
+ g_commentState = YY_START;
+ BEGIN(St_Comment);
+ }
+<St_Param>"\""[^\n\"]+"\"" {
+ g_token->name = yytext+1;
+ g_token->name = g_token->name.left(yyleng-2);
+ return TK_WORD;
+ }
+<St_Param>[^ \t\n,]+ {
+ g_token->name = yytext;
+ return TK_WORD;
+ }
+<St_Param>{WS}*","{WS}* /* param separator */
+<St_Param>{WS} {
+ g_token->chars=yytext;
+ return TK_WHITESPACE;
+ }
+<St_File>{FILEMASK} {
+ g_token->name = yytext;
+ return TK_WORD;
+ }
+<St_File>"\""[^\n\"]+"\"" {
+ QCString text=yytext;
+ g_token->name = text.mid(1,text.length()-2);
+ return TK_WORD;
+ }
+<St_Pattern>[^\r\n]+ {
+ g_token->name = yytext;
+ g_token->name = g_token->name.stripWhiteSpace();
+ return TK_WORD;
+ }
+<St_Link>{LINKMASK} {
+ g_token->name = yytext;
+ return TK_WORD;
+ }
+<St_Comment>"-->" { /* end of html comment */
+ BEGIN(g_commentState);
+ }
+<St_Comment>[^-\n]+ /* inside html comment */
+<St_Comment>. /* inside html comment */
+<*>\n {
+ printf("Error: Unexpected new line character at line %d\n",yylineno);
+ }
+<*>. {
+ printf("Error: Unexpected character `%s' at line %d\n",yytext,yylineno);
+ }
+%%
+
+//--------------------------------------------------------------------------
+
+void doctokenizerYYinit(const char *input)
+{
+ g_inputString = input;
+ g_inputPos = 0;
+ BEGIN(St_Para);
+}
+
+void doctokenizerYYsetStatePara()
+{
+ BEGIN(St_Para);
+}
+
+void doctokenizerYYsetStateTitle()
+{
+ BEGIN(St_Title);
+}
+
+void doctokenizerYYsetStateCode()
+{
+ g_token->verb.resize(0);
+ BEGIN(St_Code);
+}
+
+void doctokenizerYYsetStateHtmlOnly()
+{
+ g_token->verb.resize(0);
+ BEGIN(St_HtmlOnly);
+}
+
+void doctokenizerYYsetStateLatexOnly()
+{
+ g_token->verb.resize(0);
+ BEGIN(St_LatexOnly);
+}
+
+void doctokenizerYYsetStateVerbatim()
+{
+ g_token->verb.resize(0);
+ BEGIN(St_Verbatim);
+}
+
+void doctokenizerYYsetStateParam()
+{
+ BEGIN(St_Param);
+}
+
+void doctokenizerYYsetStateXRefItem()
+{
+ BEGIN(St_XRefItem);
+}
+
+void doctokenizerYYsetStateFile()
+{
+ BEGIN(St_File);
+}
+
+void doctokenizerYYsetStatePattern()
+{
+ BEGIN(St_Pattern);
+}
+
+void doctokenizerYYsetStateLink()
+{
+ BEGIN(St_Link);
+}
+
+void doctokenizerYYsetStateRef()
+{
+ BEGIN(St_Ref);
+}
+
+void doctokenizerYYcleanup()
+{
+ yy_delete_buffer( YY_CURRENT_BUFFER );
+}
+
+extern "C" { // some bogus code to keep the compiler happy
+ void doctokenizerYYdummy() { yy_flex_realloc(0,0); }
+}