diff options
Diffstat (limited to 'src/doctokenizer.l')
-rw-r--r-- | src/doctokenizer.l | 577 |
1 files changed, 577 insertions, 0 deletions
diff --git a/src/doctokenizer.l b/src/doctokenizer.l new file mode 100644 index 0000000..31d6d7f --- /dev/null +++ b/src/doctokenizer.l @@ -0,0 +1,577 @@ +/****************************************************************************** + * + * + * + * + * Copyright (C) 1997-2002 by Dimitri van Heesch. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation under the terms of the GNU General Public License is hereby + * granted. No representations are made about the suitability of this software + * for any purpose. It is provided "as is" without express or implied warranty. + * See the GNU General Public License for more details. + * + * Documents produced by Doxygen are derivative works derived from the + * input used in their production; they are not affected by this license. + * + */ + +%{ + +#include <qfile.h> +#include <qcstring.h> +#include <qstack.h> +#include <qdict.h> + +#include "doctokenizer.h" +#include "cmdmapper.h" +#include "config.h" + +#define YY_NEVER_INTERACTIVE 1 + +//-------------------------------------------------------------------------- + +static int g_commentState; +TokenInfo *g_token = 0; +static int g_inputPos = 0; +static const char *g_inputString; + +struct DocLexerContext +{ + TokenInfo *token; + int rule; + int inputPos; + const char *inputString; + YY_BUFFER_STATE state; +}; + +static QStack<DocLexerContext> g_lexerStack; + +//-------------------------------------------------------------------------- + +void doctokenizerYYpushContext() +{ + DocLexerContext *ctx = new DocLexerContext; + ctx->rule = YY_START; + ctx->token = g_token; + ctx->inputPos = g_inputPos; + ctx->inputString = g_inputString; + ctx->state = YY_CURRENT_BUFFER; + g_lexerStack.push(ctx); + yy_switch_to_buffer(yy_create_buffer(doctokenizerYYin, YY_BUF_SIZE)); +} + +bool doctokenizerYYpopContext() +{ + if (g_lexerStack.isEmpty()) return FALSE; + DocLexerContext *ctx = g_lexerStack.pop(); + g_inputPos = ctx->inputPos; + g_inputString = ctx->inputString; + yy_delete_buffer(YY_CURRENT_BUFFER); + yy_switch_to_buffer(ctx->state); + BEGIN(ctx->rule); + delete ctx; + return TRUE; +} + + +//-------------------------------------------------------------------------- + +const char *tokToString(int token) +{ + switch (token) + { + case 0: return "TK_EOF"; + case TK_WORD: return "TK_WORD"; + case TK_WHITESPACE: return "TK_WHITESPACE"; + case TK_LISTITEM: return "TK_LISTITEM"; + case TK_ENDLIST: return "TK_ENDLIST"; + case TK_COMMAND: return "TK_COMMAND"; + case TK_HTMLTAG: return "TK_HTMLTAG"; + case TK_SYMBOL: return "TK_SYMBOL"; + case TK_NEWPARA: return "TK_NEWPARA"; + case TK_RCSTAG: return "TK_RCSTAG"; + case TK_URL: return "TK_URL"; + } + return "ERROR"; +} + +static int computeIndent(const char *str,int length) +{ + int i; + int indent=0; + int tabSize=Config_getInt("TAB_SIZE"); + for (i=0;i<length;i++) + { + if (str[i]=='\t') + { + indent+=tabSize - (indent%tabSize); + } + else if (str[i]=='\n') + { + indent=0; + } + else + { + indent++; + } + } + return indent; +} + +/*! converts input string \a opt into a list of Options. Each + * option is a name, value pair. The result is stored in g_token->options + */ +static void parseOptions(const QCString &opt) +{ + //printf("parseOptions(%s)\n",opt.data()); + QCString options=opt; + g_token->options.clear(); + int len = options.length(); + char c; + int i=0,startName,endName,startOption,endOption; + while (i<len) + { + c=options.at(i); + // skip spaces + while (i<len && c==' ') { c=options.at(++i); } + startName=i; + // search for end of name + while (i<len && c!=' ' && c!='=') { c=options.at(++i); } + endName=i; + Option *opt = new Option; + opt->name = options.mid(startName,endName-startName).lower(); + // skip spaces + while (i<len && c==' ') { c=options.at(++i); } + if (options.at(i)=='=') // option has value + { + i++; + // skip spaces + while (i<len && c==' ') { c=options.at(++i); } + if (options.at(i)=='\'') // option '...' + { + i++; + startOption=i; + // search for matching quote + while (i<len && c!='\'') { c=options.at(++i); } + endOption=i; + i++; + } + else if (options.at(i)=='"') // option "..." + { + i++; + startOption=i; + // search for matching quote + while (i<len && c!='"') { c=options.at(++i); } + endOption=i; + i++; + } + else // value without any quotes + { + startOption=i; + // search for separator + while (i<len && c!=' ') { c=options.at(++i); } + endOption=i; + i++; + } + opt->value = options.mid(startOption,endOption-startOption); + } + else // start next option + { + } + //printf("=====> Adding option name=<%s> value=<%s>\n", + // opt->name.data(),opt->value.data()); + g_token->options.append(opt); + } +} + +//-------------------------------------------------------------------------- + +#undef YY_INPUT +#define YY_INPUT(buf,result,max_size) result=yyread(buf,max_size); + +static int yyread(char *buf,int max_size) +{ + int c=0; + const char *src=g_inputString+g_inputPos; + while ( c < max_size && *src ) *buf++ = *src++, c++; + g_inputPos+=c; + return c; +} + +//-------------------------------------------------------------------------- + +%} + +CMD ("\\"|"@") +WS [ \t\r\n] +NONWS [^ \t\r\n] +BLANK [ \t\r] +ID [a-z_A-Z][a-z_A-Z0-9]* +OPTSTARS ("//"{BLANK}*)?"*"*{BLANK}* +LISTITEM {BLANK}*{OPTSTARS}"-"("#")?{WS} +ENDLIST {BLANK}*{OPTSTARS}"."{BLANK}*\n +ATTRIB {ID}("="(("\""[^\"]*"\"")|("'"[^\']*"'")|[^ \t\r\n'"><]+))? +URLCHAR [a-z_A-Z0-9\!\~\:\;\'\$\?\@\&\%\#\.\-\+\/\=] +URLMASK (([a-z_A-Z][^\>\"\n]*{URLCHAR})|({URLCHAR}+))([({]{URLCHAR}*[)}])? +FILESCHAR [a-z_A-Z0-9\\:\\\/\-\+] +FILEECHAR [a-z_A-Z0-9\-\+] +FILEMASK {FILESCHAR}*{FILEECHAR}+("."{FILESCHAR}*{FILEECHAR}+)* +LINKMASK [^ \t\n\r\\@<&$]+("("[^\n)]*")")?({BLANK}*("const"|"volatile"))? +SPCMD1 {CMD}[a-z_A-Z0-9]+ +SPCMD2 {CMD}[\\@<>&$#%~] +SPCMD3 {CMD}form#[0-9]+ +WORD1 [^ \t\n\r\\@<&$]+ +WORD2 [^ \t\n\r\\@<&$]+"("[^\n)]*")"({BLANK}*("const"|"volatile"))? + +%option noyywrap +%option yylineno + +%x St_Para +%x St_Comment +%x St_Title +%x St_Code +%x St_HtmlOnly +%x St_LatexOnly +%x St_Verbatim +%x St_Param +%x St_XRefItem +%x St_File +%x St_Pattern +%x St_Link +%x St_Ref +%x St_Ref2 + +%% + /* TODO: \~lang_id */ +<St_Para>\r /* skip carriage return */ +<St_Para>^{LISTITEM} { /* list item */ + QCString text=yytext; + int dashPos = text.findRev('-'); + g_token->isEnumList = text.at(dashPos+1)=='#'; + g_token->indent = computeIndent(yytext,dashPos); + return TK_LISTITEM; + } +<St_Para>{BLANK}*\n{LISTITEM} { /* list item on next line */ + QCString text=yytext; + text=text.right(text.length()-text.find('\n')-1); + int dashPos = text.findRev('-'); + g_token->isEnumList = text.at(dashPos+1)=='#'; + g_token->indent = computeIndent(text,dashPos); + return TK_LISTITEM; + } +<St_Para>^{ENDLIST} { /* end list */ + int dotPos = QCString(yytext).findRev('.'); + g_token->indent = computeIndent(yytext,dotPos); + return TK_ENDLIST; + } +<St_Para>{BLANK}*\n{ENDLIST} { /* end list on next line */ + QCString text=yytext; + text=text.right(text.length()-text.find('\n')-1); + int dotPos = text.findRev('.'); + g_token->indent = computeIndent(text,dotPos); + return TK_ENDLIST; + } +<St_Para>"{"{BLANK}*"@link" { + g_token->name = "javalink"; + return TK_COMMAND; + } +<St_Para>{SPCMD3} { + g_token->name = "form"; + bool ok; + g_token->id = QCString(yytext).right(yyleng-6).toInt(&ok); + ASSERT(ok); + return TK_COMMAND; + } +<St_Para>{SPCMD1} | +<St_Para>{SPCMD2} { /* special command */ + g_token->name = yytext+1; + return TK_COMMAND; + } +<St_Para>("http:"|"https:"|"ftp:"|"file:"|"news:"){URLMASK} { + g_token->name=yytext; + return TK_URL; + } +<St_Para>[a-z_A-Z0-9.-]+"@"[a-z_A-Z0-9.-]+ { + g_token->name=yytext; + return TK_URL; + } +<St_Para>"$"{ID}":"[^\n$]+"$" { /* RCS tag */ + QCString tagName(yytext+1); + int i=tagName.find(':'); + g_token->name = tagName.left(i); + g_token->text = tagName.mid(i+1,tagName.length()-i-2); + return TK_RCSTAG; + } +<St_Para>"$("{ID}")" { /* environment variable */ + QCString name = &yytext[2]; + name = name.left(name.length()-1); + QCString value = getenv(name); + for (int i=value.length()-1;i>=0;i--) unput(value.at(i)); + } +<St_Para>"<"(("/")?){ID}({BLANK}+{ATTRIB})*">" { /* html tag */ + g_token->name = yytext; + int startNamePos=1; + if (g_token->name.at(1)=='/') startNamePos++; + int optSep = g_token->name.find(' '); + if (optSep!=-1) // tag has one or more options + { + parseOptions(g_token->name.mid(optSep+1,g_token->name.length()-optSep-2)); + g_token->name=g_token->name.mid(startNamePos,optSep-1).lower(); + } + else // tag without options, strip brackets + { + g_token->name=g_token->name.mid(startNamePos,g_token->name.length()-startNamePos-1).lower(); + } + g_token->endTag = startNamePos==2; + return TK_HTMLTAG; + } +<St_Para>"&"{ID}";" { /* special symbol */ + g_token->name = yytext; + return TK_SYMBOL; + } +<St_Para>{WORD1} | /* word, #word, or %word */ +<St_Para>{WORD2} { /* function call */ + g_token->name = yytext; + return TK_WORD; + /* dummy code to please the compiler, removing this + results in a warning on my machine */ goto find_rule; + } +<St_Para>{BLANK}+ | +<St_Para>{BLANK}*\n{BLANK}* { /* white space */ + g_token->chars=yytext; + return TK_WHITESPACE; + } +<St_Para>({BLANK}*\n)+{BLANK}*\n { + /* start of a new paragraph */ + return TK_NEWPARA; + } +<St_Code>{CMD}"endcode" { + return RetVal_OK; + } +<St_Code>[^\\@\n]+ | +<St_Code>\n | +<St_Code>. { + g_token->verb+=yytext; + } +<St_HtmlOnly>{CMD}"endhtmlonly" { + return RetVal_OK; + } +<St_HtmlOnly>[^\\@\n]+ | +<St_HtmlOnly>\n | +<St_HtmlOnly>. { + g_token->verb+=yytext; + } +<St_LatexOnly>{CMD}"endlatexonly" { + return RetVal_OK; + } +<St_LatexOnly>[^\\@\n]+ | +<St_LatexOnly>\n | +<St_LatexOnly>. { + g_token->verb+=yytext; + } +<St_Verbatim>{CMD}"endverbatim" { + return RetVal_OK; + } +<St_Verbatim>[^\\@\n]+ | +<St_Verbatim>\n | +<St_Verbatim>. { /* Verbatim text */ + g_token->verb+=yytext; + } +<St_Title>"&"{ID}";" { /* symbol */ + g_token->name = yytext; + return TK_SYMBOL; + } +<St_Title>{SPCMD1} | +<St_Title>{SPCMD2} { /* special command */ + g_token->name = yytext+1; + return TK_COMMAND; + } +<St_Title>{WORD1} | +<St_Title>{WORD2} { /* word */ + g_token->name = yytext; + return TK_WORD; + } +<St_Title>[ \t]+ { + g_token->chars=yytext; + return TK_WHITESPACE; + } +<St_Title>\n { /* new line => end of title */ + return 0; + } +<St_Ref>{ID} { + g_token->name=yytext; + return TK_WORD; + } +<St_Ref>{BLANK}+ { + return 0; + } +<St_Ref>{BLANK}+"\"" { + BEGIN(St_Ref2); + } +<St_Ref>\n { + unput(*yytext); + return 0; + } +<St_Ref>. { + unput(*yytext); + return 0; + } +<St_Ref2>"&"{ID}";" { /* symbol */ + g_token->name = yytext; + return TK_SYMBOL; + } +<St_Ref2>{SPCMD1} | +<St_Ref2>{SPCMD2} { /* special command */ + g_token->name = yytext+1; + return TK_COMMAND; + } +<St_Ref2>[^ \t\n\r\\@<&$"]+ | +<St_Ref2>[^ \t\n\r\\@<&$"]+"("[^\n")]*")"({BLANK}*("const"|"volatile"))? { + /* word */ + g_token->name = yytext; + return TK_WORD; + } +<St_Ref2>[ \t]+ { + g_token->chars=yytext; + return TK_WHITESPACE; + } +<St_Ref2>"\""|\n { /* " or \n => end of title */ + return 0; + } +<St_XRefItem>[0-9]+\n { + QCString numStr=yytext; + numStr=numStr.left(yyleng-1); + g_token->id=numStr.toInt(); + return RetVal_OK; + } +<St_Para,St_Title,St_Ref2>"<!--" { /* html style comment block */ + g_commentState = YY_START; + BEGIN(St_Comment); + } +<St_Param>"\""[^\n\"]+"\"" { + g_token->name = yytext+1; + g_token->name = g_token->name.left(yyleng-2); + return TK_WORD; + } +<St_Param>[^ \t\n,]+ { + g_token->name = yytext; + return TK_WORD; + } +<St_Param>{WS}*","{WS}* /* param separator */ +<St_Param>{WS} { + g_token->chars=yytext; + return TK_WHITESPACE; + } +<St_File>{FILEMASK} { + g_token->name = yytext; + return TK_WORD; + } +<St_File>"\""[^\n\"]+"\"" { + QCString text=yytext; + g_token->name = text.mid(1,text.length()-2); + return TK_WORD; + } +<St_Pattern>[^\r\n]+ { + g_token->name = yytext; + g_token->name = g_token->name.stripWhiteSpace(); + return TK_WORD; + } +<St_Link>{LINKMASK} { + g_token->name = yytext; + return TK_WORD; + } +<St_Comment>"-->" { /* end of html comment */ + BEGIN(g_commentState); + } +<St_Comment>[^-\n]+ /* inside html comment */ +<St_Comment>. /* inside html comment */ +<*>\n { + printf("Error: Unexpected new line character at line %d\n",yylineno); + } +<*>. { + printf("Error: Unexpected character `%s' at line %d\n",yytext,yylineno); + } +%% + +//-------------------------------------------------------------------------- + +void doctokenizerYYinit(const char *input) +{ + g_inputString = input; + g_inputPos = 0; + BEGIN(St_Para); +} + +void doctokenizerYYsetStatePara() +{ + BEGIN(St_Para); +} + +void doctokenizerYYsetStateTitle() +{ + BEGIN(St_Title); +} + +void doctokenizerYYsetStateCode() +{ + g_token->verb.resize(0); + BEGIN(St_Code); +} + +void doctokenizerYYsetStateHtmlOnly() +{ + g_token->verb.resize(0); + BEGIN(St_HtmlOnly); +} + +void doctokenizerYYsetStateLatexOnly() +{ + g_token->verb.resize(0); + BEGIN(St_LatexOnly); +} + +void doctokenizerYYsetStateVerbatim() +{ + g_token->verb.resize(0); + BEGIN(St_Verbatim); +} + +void doctokenizerYYsetStateParam() +{ + BEGIN(St_Param); +} + +void doctokenizerYYsetStateXRefItem() +{ + BEGIN(St_XRefItem); +} + +void doctokenizerYYsetStateFile() +{ + BEGIN(St_File); +} + +void doctokenizerYYsetStatePattern() +{ + BEGIN(St_Pattern); +} + +void doctokenizerYYsetStateLink() +{ + BEGIN(St_Link); +} + +void doctokenizerYYsetStateRef() +{ + BEGIN(St_Ref); +} + +void doctokenizerYYcleanup() +{ + yy_delete_buffer( YY_CURRENT_BUFFER ); +} + +extern "C" { // some bogus code to keep the compiler happy + void doctokenizerYYdummy() { yy_flex_realloc(0,0); } +} |