/****************************************************************************** * * * * * Copyright (C) 1997-2004 by Dimitri van Heesch. * * Permission to use, copy, modify, and distribute this software and its * documentation under the terms of the GNU General Public License is hereby * granted. No representations are made about the suitability of this software * for any purpose. It is provided "as is" without express or implied warranty. * See the GNU General Public License for more details. * * Documents produced by Doxygen are derivative works derived from the * input used in their production; they are not affected by this license. * */ %{ #include #include #include #include #include "doctokenizer.h" #include "cmdmapper.h" #include "config.h" #include "message.h" #include "section.h" #include "membergroup.h" #include "definition.h" #include "doxygen.h" #define YY_NEVER_INTERACTIVE 1 //-------------------------------------------------------------------------- // context for tokenizer phase static int g_commentState; TokenInfo *g_token = 0; static int g_inputPos = 0; static const char *g_inputString; static QString g_fileName; static bool g_insidePre; // context for section finding phase static Definition *g_definition; static MemberGroup *g_memberGroup; static QCString g_secLabel; static QCString g_secTitle; static SectionInfo::SectionType g_secType; static QCString g_endMarker; struct DocLexerContext { TokenInfo *token; int rule; int inputPos; const char *inputString; YY_BUFFER_STATE state; }; static QStack g_lexerStack; //-------------------------------------------------------------------------- void doctokenizerYYpushContext() { DocLexerContext *ctx = new DocLexerContext; ctx->rule = YY_START; ctx->token = g_token; ctx->inputPos = g_inputPos; ctx->inputString = g_inputString; ctx->state = YY_CURRENT_BUFFER; g_lexerStack.push(ctx); yy_switch_to_buffer(yy_create_buffer(doctokenizerYYin, YY_BUF_SIZE)); } bool doctokenizerYYpopContext() { if (g_lexerStack.isEmpty()) return FALSE; DocLexerContext *ctx = g_lexerStack.pop(); g_inputPos = ctx->inputPos; g_inputString = ctx->inputString; yy_delete_buffer(YY_CURRENT_BUFFER); yy_switch_to_buffer(ctx->state); BEGIN(ctx->rule); delete ctx; return TRUE; } //-------------------------------------------------------------------------- const char *tokToString(int token) { switch (token) { case 0: return "TK_EOF"; case TK_WORD: return "TK_WORD"; case TK_LNKWORD: return "TK_LNKWORD"; case TK_WHITESPACE: return "TK_WHITESPACE"; case TK_LISTITEM: return "TK_LISTITEM"; case TK_ENDLIST: return "TK_ENDLIST"; case TK_COMMAND: return "TK_COMMAND"; case TK_HTMLTAG: return "TK_HTMLTAG"; case TK_SYMBOL: return "TK_SYMBOL"; case TK_NEWPARA: return "TK_NEWPARA"; case TK_RCSTAG: return "TK_RCSTAG"; case TK_URL: return "TK_URL"; } return "ERROR"; } static int computeIndent(const char *str,int length) { int i; int indent=0; int tabSize=Config_getInt("TAB_SIZE"); for (i=0;iattribs */ static void parseHtmlAttribs(const char *att) { //printf("parseHtmlAttribs(%s)\n",att); QCString attribs=att; int len = attribs.length(); char c; int i=0,startName,endName,startAttrib,endAttrib; while (i Adding option name=<%s> value=<%s>\n", // opt.name.data(),opt.value.data()); g_token->attribs.append(&opt); } } //-------------------------------------------------------------------------- static void processSection() { //printf("%s: found section/anchor with name `%s'\n",g_fileName.data(),g_secLabel.data()); QCString file; if (g_memberGroup) { file = g_memberGroup->parent()->getOutputFileBase(); } else if (g_definition) { file = g_definition->getOutputFileBase(); } else { warn(g_fileName,yylineno,"Found section/anchor %s without context\n",g_secLabel.data()); } SectionInfo *si=0; if ((si=Doxygen::sectionDict.find(g_secLabel))) { si->fileName = file; //si = new SectionInfo(file,g_secLabel,g_secTitle,g_secType); //Doxygen::sectionDict.insert(g_secLabel,si); } } static void handleHtmlTag() { g_token->name = yytext; g_token->attribs.clear(); int startNamePos=1; if (g_token->name.at(1)=='/') startNamePos++; int attSep=0; while (attSepname.mid(attSep+1,g_token->name.length()-attSep-2)); g_token->name=g_token->name.mid(startNamePos,attSep-1).lower(); } else // tag without options, strip brackets { g_token->name=g_token->name.mid(startNamePos,g_token->name.length()-startNamePos-1).lower(); } g_token->endTag = startNamePos==2; } //-------------------------------------------------------------------------- #undef YY_INPUT #define YY_INPUT(buf,result,max_size) result=yyread(buf,max_size); static int yyread(char *buf,int max_size) { int c=0; const char *src=g_inputString+g_inputPos; while ( c < max_size && *src ) *buf++ = *src++, c++; g_inputPos+=c; return c; } //-------------------------------------------------------------------------- %} CMD ("\\"|"@") WS [ \t\r\n] NONWS [^ \t\r\n] BLANK [ \t\r] ID [a-z_A-Z][a-z_A-Z0-9]* OPTSTARS ("//"{BLANK}*)?"*"*{BLANK}* LISTITEM {BLANK}*{OPTSTARS}"-"("#")?{WS} ENDLIST {BLANK}*{OPTSTARS}"."{BLANK}*\n ATTRIB {ID}{WS}*("="{WS}*(("\""[^\"]*"\"")|("'"[^\']*"'")|[^ \t\r\n'"><]+))? URLCHAR [a-z_A-Z0-9\!\~\:\;\'\$\?\@\&\%\#\.\-\+\/\=] URLMASK (([a-z_A-Z][^\>\"\n]*{URLCHAR})|({URLCHAR}+))([({]{URLCHAR}*[)}])? FILESCHAR [a-z_A-Z0-9\\:\\\/\-\+] FILEECHAR [a-z_A-Z0-9\-\+] FILEMASK {FILESCHAR}*{FILEECHAR}+("."{FILESCHAR}*{FILEECHAR}+)* LINKMASK [^ \t\n\r\\@<&${}]+("("[^\n)]*")")?({BLANK}*("const"|"volatile"))? SPCMD1 {CMD}[a-z_A-Z0-9]+ SPCMD2 {CMD}[\\@<>&$#%~] SPCMD3 {CMD}form#[0-9]+ INOUT "in"|"out"|("in"{BLANK}*","{BLANK}*"out")|("out"{BLANK}*","{BLANK}*"in") PARAMIO {CMD}param{BLANK}*"["{BLANK}*{INOUT}{BLANK}*"]" TEMPCHAR [a-z_A-Z0-9,: \t\*\&] FUNCCHAR [a-z_A-Z0-9,:\<\> \t\*\&] SCOPESEP "::"|"#"|"." SCOPEPRE {ID}("<"{TEMPCHAR}*">")?{SCOPESEP} SCOPEKEYS ":"({ID}":")* SCOPEMASK {SCOPEPRE}*(~)?{ID}{SCOPEKEYS}?("<"{TEMPCHAR}*">")? FUNCARG "("{FUNCCHAR}*")" OPNEW {BLANK}+"new"({BLANK}*"[]")? OPDEL {BLANK}+"delete"({BLANK}*"[]")? OPNORM {OPNEW}|{OPDEL}|"+"|"-"|"*"|"/"|"%"|"^"|"&"|"|"|"~"|"!"|"="|"<"|">"|"+="|"-="|"*="|"/="|"%="|"^="|"&="|"|="|"<<"|">>"|"<<="|">>="|"=="|"!="|"<="|">="|"&&"|"||"|"++"|"--"|","|"->*"|"->"|"[]"|"()" OPCAST {BLANK}+[^(\r\n.,]+ OPMASK ({BLANK}*{OPNORM}{FUNCARG})|({OPCAST}{FUNCARG}) LNKWORD1 ("::"|"#")?{SCOPEMASK} CVSPEC {BLANK}*("const"|"volatile") LNKWORD2 {SCOPEPRE}*"operator"{OPMASK} WORD1 [^ \t\n\r\\@<>()\[\]:;\?{}&$#,.]+|"{"|"}"|("\""[^"\n]*"\"") WORD2 "."|","|"("|")"|"["|"]"|":"|";"|"\?" WORD1NQ [^ \t\n\r\\@<>()\[\]:;\?{}&$#,."]+ WORD2NQ "."|","|"("|")"|"["|"]"|":"|";"|"\?" HTMLTAG "<"(("/")?){ID}({WS}+{ATTRIB})*{WS}*">" HTMLKEYL "strong"|"center"|"table"|"caption"|"small"|"code"|"dfn"|"var"|"img"|"pre"|"sub"|"sup"|"tr"|"td"|"th"|"ol"|"ul"|"li"|"tt"|"kbd"|"em"|"hr"|"dl"|"dt"|"dd"|"br"|"i"|"a"|"b"|"p" HTMLKEYU "STRONG"|"CENTER"|"TABLE"|"CAPTION"|"SMALL"|"CODE"|"DFN"|"VAR"|"IMG"|"PRE"|"SUB"|"SUP"|"TR"|"TD"|"TH"|"OL"|"UL"|"LI"|"TT"|"KBD"|"EM"|"HR"|"DL"|"DT"|"DD"|"BR"|"I"|"A"|"B"|"P" HTMLKEYW {HTMLKEYL}|{HTMLKEYU} LABELID [a-z_A-Z][a-z_A-Z0-9\-]* %option noyywrap %option yylineno %x St_Para %x St_Comment %x St_Title %x St_TitleN %x St_TitleQ %x St_TitleA %x St_TitleV %x St_Code %x St_HtmlOnly %x St_ManOnly %x St_LatexOnly %x St_XmlOnly %x St_Verbatim %x St_Dot %x St_Param %x St_XRefItem %x St_XRefItem2 %x St_File %x St_Pattern %x St_Link %x St_Ref %x St_Ref2 %x St_IntRef %x St_Text %x St_SkipTitle %x St_Sections %s St_SecLabel1 %s St_SecLabel2 %s St_SecTitle %x St_SecSkip %% \r /* skip carriage return */ ^{LISTITEM} { /* list item */ QString text=yytext; int dashPos = text.findRev('-'); g_token->isEnumList = text.at(dashPos+1)=='#'; g_token->indent = computeIndent(yytext,dashPos); return TK_LISTITEM; } {BLANK}*\n{LISTITEM} { /* list item on next line */ QString text=yytext; text=text.right(text.length()-text.find('\n')-1); int dashPos = text.findRev('-'); g_token->isEnumList = text.at(dashPos+1)=='#'; g_token->indent = computeIndent(text,dashPos); return TK_LISTITEM; } ^{ENDLIST} { /* end list */ int dotPos = QString(yytext).findRev('.'); g_token->indent = computeIndent(yytext,dotPos); return TK_ENDLIST; } {BLANK}*\n{ENDLIST} { /* end list on next line */ QString text=yytext; text=text.right(text.length()-text.find('\n')-1); int dotPos = text.findRev('.'); g_token->indent = computeIndent(text,dotPos); return TK_ENDLIST; } "{"{BLANK}*"@link" { g_token->name = "javalink"; return TK_COMMAND; } {SPCMD3} { g_token->name = "form"; bool ok; g_token->id = QString(yytext).right(yyleng-6).toInt(&ok); ASSERT(ok); return TK_COMMAND; } {SPCMD1} | {SPCMD2} { /* special command */ g_token->name = yytext+1; g_token->paramDir=TokenInfo::Unspecified; return TK_COMMAND; } {PARAMIO} { /* param [in,out] command */ g_token->name = "param"; QString s(yytext); bool isIn = s.find("in")!=-1; bool isOut = s.find("out")!=-1; if (isIn) { if (isOut) { g_token->paramDir=TokenInfo::InOut; } else { g_token->paramDir=TokenInfo::In; } } else if (isOut) { g_token->paramDir=TokenInfo::Out; } else { g_token->paramDir=TokenInfo::Unspecified; } return TK_COMMAND; } ("http:"|"https:"|"ftp:"|"file:"|"news:"){URLMASK} { // URL g_token->name=yytext; g_token->isEMailAddr=FALSE; return TK_URL; } [a-z_A-Z0-9.+-]+"@"[a-z_A-Z0-9-]+("."[a-z_A-Z0-9\-]+)+[a-z_A-Z0-9\-]+ { // Mail address g_token->name=yytext; g_token->isEMailAddr=TRUE; return TK_URL; } "$"{ID}":"[^\n$]+"$" { /* RCS tag */ QString tagName(yytext+1); int index=tagName.find(':'); g_token->name = tagName.left(index); g_token->text = tagName.mid(index+1,tagName.length()-index-2); return TK_RCSTAG; } "$("{ID}")" { /* environment variable */ QCString name = &yytext[2]; name = name.left(name.length()-1); QCString value = getenv(name); for (int i=value.length()-1;i>=0;i--) unput(value.at(i)); } {HTMLTAG} { /* html tag */ handleHtmlTag(); return TK_HTMLTAG; } "&"{ID}";" { /* special symbol */ g_token->name = yytext; return TK_SYMBOL; } /********* patterns for linkable words ******************/ {ID}/"<"{HTMLKEYW}">" { /* this rule is to prevent opening html * tag to be recognized as a templated classes */ g_token->name = yytext; return TK_LNKWORD; } {LNKWORD1} | {LNKWORD1}{FUNCARG} | {LNKWORD2} { g_token->name = yytext; return TK_LNKWORD; } {LNKWORD1}{FUNCARG}{CVSPEC}[^a-z_A-Z0-9] { g_token->name = yytext; g_token->name = g_token->name.left(g_token->name.length()-1); unput(yytext[yyleng-1]); return TK_LNKWORD; } /********* patterns for normal words ******************/ {WORD1} | {WORD2} { /* function call */ if (yytext[0]=='%') // strip % if present g_token->name = &yytext[1]; else g_token->name = yytext; return TK_WORD; /* the following is dummy code to please the * compiler, removing this results in a warning * on my machine */ goto find_rule; } /*******************************************************/ {BLANK}+ | {BLANK}*\n{BLANK}* { /* white space */ g_token->chars=yytext; return TK_WHITESPACE; } [\\@<>&$#%~] { g_token->name = yytext; return TK_COMMAND; } ({BLANK}*\n)+{BLANK}*\n { if (g_insidePre) { /* Inside a
..
blank lines are treated * as whitespace. */ g_token->chars=yytext; return TK_WHITESPACE; } else { /* start of a new paragraph */ return TK_NEWPARA; } } {WS}*{CMD}"endcode" { return RetVal_OK; } [^\\@\n]+ | \n | . { g_token->verb+=yytext; } {CMD}"endhtmlonly" { return RetVal_OK; } [^\\@\n$]+ | \n | . { g_token->verb+=yytext; } {CMD}"endmanonly" { return RetVal_OK; } [^\\@\n$]+ | \n | . { g_token->verb+=yytext; } {CMD}"endlatexonly" { return RetVal_OK; } [^\\@\n]+ | \n | . { g_token->verb+=yytext; } {CMD}"endxmlonly" { return RetVal_OK; } [^\\@\n]+ | \n | . { g_token->verb+=yytext; } {CMD}"endverbatim" { return RetVal_OK; } [^\\@\n]+ | \n | . { /* Verbatim text */ g_token->verb+=yytext; } {CMD}"enddot" { return RetVal_OK; } [^\\@\n]+ | \n | . { /* dot text */ g_token->verb+=yytext; } "\"" { // quoted title BEGIN(St_TitleQ); } [ \t]+ { g_token->chars=yytext; return TK_WHITESPACE; } . { // non-quoted title unput(*yytext); BEGIN(St_TitleN); } \n { return 0; } "&"{ID}";" { /* symbol */ g_token->name = yytext; return TK_SYMBOL; } {HTMLTAG} { } {SPCMD1} | {SPCMD2} { /* special command */ g_token->name = yytext+1; g_token->paramDir=TokenInfo::Unspecified; return TK_COMMAND; } {WORD1} | {WORD2} { /* word */ if (yytext[0]=='%') // strip % if present g_token->name = &yytext[1]; else g_token->name = yytext; return TK_WORD; } [ \t]+ { g_token->chars=yytext; return TK_WHITESPACE; } \n { /* new line => end of title */ unput(*yytext); return 0; } "&"{ID}";" { /* symbol */ g_token->name = yytext; return TK_SYMBOL; } {SPCMD1} | {SPCMD2} { /* special command */ g_token->name = yytext+1; g_token->paramDir=TokenInfo::Unspecified; return TK_COMMAND; } {WORD1NQ} | {WORD2NQ} { /* word */ g_token->name = yytext; return TK_WORD; } [ \t]+ { g_token->chars=yytext; return TK_WHITESPACE; } "\"" { /* closing quote => end of title */ BEGIN(St_TitleA); return 0; } \n { /* new line => end of title */ unput(*yytext); return 0; } {BLANK}*{ID}{BLANK}*"="{BLANK}* { // title attribute g_token->name = yytext; g_token->name = g_token->name.left( g_token->name.find('=')).stripWhiteSpace(); BEGIN(St_TitleV); } [^ \t\r\n]+ { // attribute value g_token->chars = yytext; BEGIN(St_TitleN); return TK_WORD; } . { unput(*yytext); return 0; } \n { return 0; } ("#"|"::")?({ID}("."|"#"|"::"|"-"))*{ID} { g_token->name=yytext; return TK_WORD; } {BLANK}+ { unput(' '); return 0; } {BLANK}+"\"" { BEGIN(St_Ref2); } \n { unput(*yytext); return 0; } . { unput(*yytext); return 0; } [A-Z_a-z0-9.:#\-\+]+ { g_token->name = yytext; return TK_WORD; } {BLANK}+"\"" { BEGIN(St_Ref2); } "&"{ID}";" { /* symbol */ g_token->name = yytext; return TK_SYMBOL; } {SPCMD1} | {SPCMD2} { /* special command */ g_token->name = yytext+1; g_token->paramDir=TokenInfo::Unspecified; return TK_COMMAND; } {WORD1NQ} | {WORD2NQ} { /* word */ g_token->name = yytext; return TK_WORD; } [ \t]+ { g_token->chars=yytext; return TK_WHITESPACE; } "\""|\n { /* " or \n => end of title */ return 0; } {ID} { g_token->name=yytext; } " " { BEGIN(St_XRefItem2); } [0-9]+\n { QString numStr=yytext; numStr=numStr.left(yyleng-1); g_token->id=numStr.toInt(); return RetVal_OK; } "" { /* end of html comment */ BEGIN(g_commentState); } [^-\n]+ /* inside html comment */ . /* inside html comment */ /* State for skipping title (all chars until the end of the line) */ . \n { return 0; } /* State for the pass used to find the anchors and sections */ [^\n@\\]+ "@@"|"\\\\" {CMD}"anchor"{BLANK}+ { g_secType = SectionInfo::Anchor; BEGIN(St_SecLabel1); } {CMD}"section"{BLANK}+ { g_secType = SectionInfo::Section; BEGIN(St_SecLabel2); } {CMD}"subsection"{BLANK}+ { g_secType = SectionInfo::Subsection; BEGIN(St_SecLabel2); } {CMD}"subsubsection"{BLANK}+ { g_secType = SectionInfo::Subsubsection; BEGIN(St_SecLabel2); } {CMD}"paragraph"{BLANK}+ { g_secType = SectionInfo::Paragraph; BEGIN(St_SecLabel2); } {CMD}"verbatim" { g_endMarker="endverbatim"; BEGIN(St_SecSkip); } {CMD}"dot" { g_endMarker="enddot"; BEGIN(St_SecSkip); } {CMD}"htmlonly" { g_endMarker="endhtmlonly"; BEGIN(St_SecSkip); } {CMD}"latexonly" { g_endMarker="endlatexonly"; BEGIN(St_SecSkip); } {CMD}"xmlonly" { g_endMarker="endxmlonly"; BEGIN(St_SecSkip); } {CMD}"code" { g_endMarker="endcode"; BEGIN(St_SecSkip); } ""; BEGIN(St_SecSkip); } {CMD}{ID} { if (strcmp(yytext+1,g_endMarker)==0) { BEGIN(St_Sections); } } "-->" { if (strcmp(yytext,g_endMarker)==0) { BEGIN(St_Sections); } } [^a-z_A-Z0-9\-\\\@]+ . \n . \n {LABELID} { g_secLabel = yytext; processSection(); BEGIN(St_Sections); } {LABELID}{BLANK}+ | {LABELID} { g_secLabel = yytext; g_secLabel = g_secLabel.stripWhiteSpace(); BEGIN(St_SecTitle); } [^\n]+ | [^\n]*\n { g_secTitle = yytext; g_secTitle = g_secTitle.stripWhiteSpace(); processSection(); BEGIN(St_Sections); } . { warn(g_fileName,yylineno,"Error: Unexpected character `%s' while looking for section label or title",yytext); } /* Generic rules that work for all states */ <*>\n { warn(g_fileName,yylineno,"Error: Unexpected new line character"); } <*>[\\@<>&$#%~] { /* unescaped special character */ //warn(g_fileName,yylineno,"Warning: Unexpected character `%s', assuming command \\%s was meant.",yytext,yytext); g_token->name = yytext; return TK_COMMAND; } <*>. { warn(g_fileName,yylineno,"Error: Unexpected character `%s'",yytext); } %% //-------------------------------------------------------------------------- void doctokenizerYYFindSections(const char *input,Definition *d, MemberGroup *mg,const char *fileName) { if (input==0) return; g_inputString = input; //printf("parsing --->`%s'<---\n",input); g_inputPos = 0; g_definition = d; g_memberGroup = mg; g_fileName = fileName; BEGIN(St_Sections); doctokenizerYYlineno = 1; doctokenizerYYlex(); } void doctokenizerYYinit(const char *input,const char *fileName) { g_inputString = input; g_inputPos = 0; g_fileName = fileName; g_insidePre = FALSE; BEGIN(St_Para); } void doctokenizerYYsetStatePara() { BEGIN(St_Para); } void doctokenizerYYsetStateTitle() { BEGIN(St_Title); } void doctokenizerYYsetStateCode() { g_token->verb=""; BEGIN(St_Code); } void doctokenizerYYsetStateHtmlOnly() { g_token->verb=""; BEGIN(St_HtmlOnly); } void doctokenizerYYsetStateManOnly() { g_token->verb=""; BEGIN(St_ManOnly); } void doctokenizerYYsetStateXmlOnly() { g_token->verb=""; BEGIN(St_XmlOnly); } void doctokenizerYYsetStateLatexOnly() { g_token->verb=""; BEGIN(St_LatexOnly); } void doctokenizerYYsetStateVerbatim() { g_token->verb=""; BEGIN(St_Verbatim); } void doctokenizerYYsetStateDot() { g_token->verb=""; BEGIN(St_Dot); } void doctokenizerYYsetStateParam() { BEGIN(St_Param); } void doctokenizerYYsetStateXRefItem() { BEGIN(St_XRefItem); } void doctokenizerYYsetStateFile() { BEGIN(St_File); } void doctokenizerYYsetStatePattern() { BEGIN(St_Pattern); } void doctokenizerYYsetStateLink() { BEGIN(St_Link); } void doctokenizerYYsetStateRef() { BEGIN(St_Ref); } void doctokenizerYYsetStateInternalRef() { BEGIN(St_IntRef); } void doctokenizerYYsetStateText() { BEGIN(St_Text); } void doctokenizerYYsetStateSkipTitle() { BEGIN(St_SkipTitle); } void doctokenizerYYcleanup() { yy_delete_buffer( YY_CURRENT_BUFFER ); } void doctokenizerYYsetInsidePre(bool b) { g_insidePre = b; } #if !defined(YY_FLEX_SUBMINOR_VERSION) extern "C" { // some bogus code to keep the compiler happy void doctokenizerYYdummy() { yy_flex_realloc(0,0); } } #endif