From 747fc768476aef8b8b70fdd78749702a410dcd29 Mon Sep 17 00:00:00 2001 From: albert-github Date: Wed, 23 Aug 2017 19:31:28 +0200 Subject: Bug 733705 - parser misinterpreting fortran Corrected handling of (local) variables as functions as well as handling of non Fortran variables used in Fortran code. --- src/fortrancode.l | 145 ++++++++++++++++++++++++++++----------------------- src/fortranscanner.l | 31 +++++++++-- src/util.cpp | 14 +++-- 3 files changed, 115 insertions(+), 75 deletions(-) diff --git a/src/fortrancode.l b/src/fortrancode.l index e002b57..1f22700 100644 --- a/src/fortrancode.l +++ b/src/fortrancode.l @@ -67,7 +67,7 @@ * For fixed formatted code position 6 is of importance (continuation character). * The following variables and macros keep track of the column number * YY_USER_ACTION is always called for each scan action - * YY_FTN_REST is used to handle end of lines and reset the column counter + * YY_FTN_RESET is used to handle end of lines and reset the column counter * YY_FTN_REJECT resets the column counters when a pattern is rejected and thus rescanned. */ int yy_old_start = 0; @@ -135,6 +135,8 @@ static const char * g_inputString; //!< the code fragment as text static int g_inputPosition; //!< read offset during parsing static int g_inputLines; //!< number of line in the code fragment static int g_yyLineNr; //!< current line number +static int g_contLineNr; //!< current, local, line number for continuation determination +static int *g_hasContLine = NULL; //!< signals whether or not a line has a continuation line (fixed source form) static bool g_needsTermination; static Definition *g_searchCtx; static bool g_collectXRefs; @@ -159,51 +161,6 @@ static int bracketCount = 0; static bool g_endComment; -// simplified way to know if this is fixed form -// duplicate in fortranscanner.l -static bool recognizeFixedForm(const char* contents, FortranFormat format) -{ - int column=0; - bool skipLine=FALSE; - - if (format == FortranFormat_Fixed) return TRUE; - if (format == FortranFormat_Free) return FALSE; - for (int i=0;;i++) - { - column++; - - switch(contents[i]) - { - case '\n': - column=0; - skipLine=FALSE; - break; - case ' ': - break; - case '#': - skipLine=TRUE; - break; - case '\000': - return FALSE; - case 'C': - case 'c': - case '*': - if(column==1) return TRUE; - if(skipLine) break; - return FALSE; - case '!': - if(column>1 && column<7) return FALSE; - skipLine=TRUE; - break; - default: - if(skipLine) break; - if(column==7) return TRUE; - return FALSE; - } - } - return FALSE; -} - static void endFontClass() { if (g_currentFontClass) @@ -567,7 +524,8 @@ static bool getLink(UseSDict *usedict, // dictonary with used modules if (getFortranDefs(memberName, currentModule, md, usedict) && md->isLinkable()) { - //if (md->isVariable()) return FALSE; // variables aren't handled yet + if (md->isVariable() && (md->getLanguage()!=SrcLangExt_Fortran)) return FALSE; // Non Fortran variables aren't handled yet, + // see also linkifyText in util.cpp Definition *d = md->getOuterScope()==Doxygen::globalScope ? md->getBodyDef() : md->getOuterScope(); @@ -840,7 +798,9 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I BEGIN(UseOnly); } {BS},{BS} { codifyLines(yytext); } -{BS}&{BS}"\n" { codifyLines(yytext); YY_FTN_RESET} +{BS}&{BS}"\n" { codifyLines(yytext); + g_contLineNr++; + YY_FTN_RESET} {ID} { QCString tmp = yytext; tmp = tmp.lower(); @@ -867,7 +827,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I } /*-------- fortran module -----------------------------------------*/ ("block"{BS}"data"|"program"|"module"|"interface")/{BS_}|({COMMA}{ACCESS_SPEC})|\n { // - startScope(); + startScope(); startFontClass("keyword"); codifyLines(yytext); endFontClass(); @@ -937,6 +897,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I codifyLines(yytext); } "\n" { codifyLines(yytext); + g_contLineNr++; yy_pop_state(); YY_FTN_RESET } @@ -979,15 +940,22 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I endFontClass(); } {ID} { // local var - if (g_currentMemberDef && g_currentMemberDef->isFunction() && bracketCount==0) + if (g_isFixedForm && yy_my_start == 1) { - g_code->codify(yytext); - addLocalVar(yytext); + startFontClass("comment"); + g_code->codify(yytext); + endFontClass(); } - else + else if (g_currentMemberDef && ((g_currentMemberDef->isFunction() && (g_currentMemberDef->typeString() != QCString("subroutine"))) || + g_currentMemberDef->isVariable())) { generateLink(*g_code, yytext); } + else + { + g_code->codify(yytext); + addLocalVar(yytext); + } } {BS}("=>"|"="){BS} { // Procedure binding BEGIN(DeclarationBinding); @@ -1007,28 +975,36 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I g_code->codify(yytext); } -"&" { // continuation line +"&" { // continuation line g_code->codify(yytext); - yy_push_state(YY_START); - BEGIN(DeclContLine); + if (!g_isFixedForm) + { + yy_push_state(YY_START); + BEGIN(DeclContLine); + } } "\n" { // declaration not yet finished + g_contLineNr++; codifyLines(yytext); bracketCount = 0; yy_pop_state(); YY_FTN_RESET } -"\n" { // end declaration line +"\n" { // end declaration line (?) if (g_endComment) - { - g_endComment=FALSE; - } - else - { - codifyLines(yytext); - } + { + g_endComment=FALSE; + } + else + { + codifyLines(yytext); + } bracketCount = 0; - yy_pop_state(); + g_contLineNr++; + if (!(g_hasContLine && g_hasContLine[g_contLineNr - 1])) + { + yy_pop_state(); + } YY_FTN_RESET } @@ -1065,6 +1041,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I \n?{BS}"!>"|"!<" { // start comment line or comment block if (yytext[0] == '\n') { + g_contLineNr++; yy_old_start = 0; yy_my_start = 1; yy_end = yyleng; @@ -1085,6 +1062,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I docBlock+=yytext; } "\n"{BS}("!>"|"!<"|"!!") { // comment block (next line is also comment line) + g_contLineNr++; yy_old_start = 0; yy_my_start = 1; yy_end = yyleng; @@ -1094,6 +1072,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I } "\n" { // comment block ends at the end of this line // remove special comment (default config) + g_contLineNr++; if (Config_getBool(STRIP_CODE_COMMENTS)) { g_yyLineNr+=((QCString)docBlock).contains('\n'); @@ -1112,6 +1091,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I endFontClass(); } unput(*yytext); + g_contLineNr--; yy_pop_state(); YY_FTN_RESET } @@ -1145,6 +1125,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I /*------ preprocessor --------------------------------------------*/ "#".*\n { if (g_isFixedForm && yy_my_start == 6) YY_FTN_REJECT; + g_contLineNr++; startFontClass("preprocessor"); codifyLines(yytext); endFontClass(); @@ -1165,6 +1146,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I <*>"\\\""|\\\' { str+=yytext; /* ignore \" */} \n { // string with \n inside + g_contLineNr++; str+=yytext; startFontClass("stringliteral"); codifyLines(str); @@ -1201,6 +1183,7 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I { codifyLines(yytext); } + g_contLineNr++; YY_FTN_RESET } <*>^{BS}"type"{BS}"=" { g_code->codify(yytext); } @@ -1243,6 +1226,29 @@ PREFIX (RECURSIVE{BS_}|IMPURE{BS_}|PURE{BS_}|ELEMENTAL{BS_}){0,3}(RECURSIVE|I void resetFortranCodeParserState() {} +bool recognizeFixedForm(const char* contents, FortranFormat format); /* prototype, implementation in fortranscanner.l */ +const char* prepassFixedForm(const char* contents, int *hasContLine); /* prototype, implementation in fortranscanner.l */ +static void checkContLines(const char *s) +{ + int numLines = 0; + int curLine = 0; + int i = 0; + const char *p = s; + + numLines = 2; // one for element 0, one in case no \n at end + while (*p) + { + if (*p == '\n') numLines++; + p++; + } + + g_hasContLine = (int *) malloc((numLines) * sizeof(int)); + for (i = 0; i < numLines; i++) + g_hasContLine[i] = 0; + p = prepassFixedForm(s, g_hasContLine); + g_hasContLine[0] = 0; +} + void parseFortranCode(CodeOutputInterface &od,const char *className,const QCString &s, bool exBlock, const char *exName,FileDef *fd, int startLine,int endLine,bool inlineFragment, @@ -1262,6 +1268,12 @@ void parseFortranCode(CodeOutputInterface &od,const char *className,const QCStri g_inputString = s; g_inputPosition = 0; g_isFixedForm = recognizeFixedForm((const char*)s,format); + g_contLineNr = 1; + g_hasContLine = NULL; + if (g_isFixedForm) + { + checkContLines(g_inputString); + } g_currentFontClass = 0; g_needsTermination = FALSE; g_searchCtx = searchCtx; @@ -1276,7 +1288,6 @@ void parseFortranCode(CodeOutputInterface &od,const char *className,const QCStri else g_inputLines = g_yyLineNr + countLines() - 1; - g_exampleBlock = exBlock; g_exampleName = exName; g_sourceFileDef = fd; @@ -1317,6 +1328,8 @@ void parseFortranCode(CodeOutputInterface &od,const char *className,const QCStri delete g_sourceFileDef; g_sourceFileDef=0; } + if (g_hasContLine) free(g_hasContLine); + g_hasContLine = NULL; printlex(yy_flex_debug, FALSE, __FILE__, fd ? fd->fileName().data(): NULL); return; } diff --git a/src/fortranscanner.l b/src/fortranscanner.l index 2f5567a..23c0970 100644 --- a/src/fortranscanner.l +++ b/src/fortranscanner.l @@ -1366,7 +1366,7 @@ void truncatePrepass(int index) // simplified way to know if this is fixed form // duplicate in fortrancode.l -static bool recognizeFixedForm(const char* contents, FortranFormat format) +bool recognizeFixedForm(const char* contents, FortranFormat format) { int column=0; bool skipLine=FALSE; @@ -1419,7 +1419,8 @@ static void insertCharacter(char *contents, int length, int pos, char c) } /* change comments and bring line continuation character to previous line */ -static const char* prepassFixedForm(const char* contents) +/* also used to set continuation marks in case of fortran code usage, done here as it is quite complicated code */ +const char* prepassFixedForm(const char* contents, int *hasContLine) { int column=0; int prevLineLength=0; @@ -1434,6 +1435,7 @@ static const char* prepassFixedForm(const char* contents) bool fullCommentLine=TRUE; int newContentsSize = strlen(contents)+3; // \000, \n (when necessary) and one spare character (to avoid reallocation) char* newContents = (char*)malloc(newContentsSize); + int curLine = 1; for(int i=0, j=0;;i++,j++) { if(j>=newContentsSize-3) { // check for spare characters, which may be eventually used below (by & and '! ') @@ -1454,6 +1456,11 @@ static const char* prepassFixedForm(const char* contents) else { prevLineLength+=column; + /* Even though a full comment line is not really a comment line it can be seen as one. An empty line is also seen as a comment line (small bonus) */ + if (hasContLine) + { + hasContLine[curLine - 1] = 1; + } } fullCommentLine=TRUE; column=0; @@ -1461,12 +1468,18 @@ static const char* prepassFixedForm(const char* contents) commented=FALSE; newContents[j]=c; prevQuote = thisQuote; + curLine++; break; case ' ': case '\t': newContents[j]=c; break; case '\000': + if (hasContLine) + { + free(newContents); + return NULL; + } newContents[j]='\000'; newContentsSize = strlen(newContents); if (newContents[newContentsSize - 1] != '\n') @@ -1545,12 +1558,15 @@ static const char* prepassFixedForm(const char* contents) newContents[j]=' '; if(prevLineAmpOrExclIndex==-1) { // add & just before end of previous line - insertCharacter(newContents, j+1, (j+1)-6-1, '&'); + /* first line is not a continuation line in code, just in snippets etc. */ + if (curLine != 1) insertCharacter(newContents, j+1, (j+1)-6-1, '&'); j++; } else { // add & just before end of previous line comment - insertCharacter(newContents, j+1, (j+1)-6-prevLineLength+prevLineAmpOrExclIndex, '&'); + /* first line is not a continuation line in code, just in snippets etc. */ + if (curLine != 1) insertCharacter(newContents, j+1, (j+1)-6-prevLineLength+prevLineAmpOrExclIndex, '&'); j++; } + if (hasContLine) hasContLine[curLine - 1] = 1; } else { newContents[j]=c; // , just handle like space } @@ -1573,6 +1589,11 @@ static const char* prepassFixedForm(const char* contents) } } + if (hasContLine) + { + free(newContents); + return NULL; + } newContentsSize = strlen(newContents); if (newContents[newContentsSize - 1] != '\n') { @@ -2517,7 +2538,7 @@ static void parseMain(const char *fileName,const char *fileBuf,Entry *rt, Fortra //printf("Input fixed form string:\n%s\n", fileBuf); //printf("===========================\n"); - inputString = prepassFixedForm(fileBuf); + inputString = prepassFixedForm(fileBuf, NULL); //printf("Resulting free form string:\n%s\n", inputString); //printf("===========================\n"); diff --git a/src/util.cpp b/src/util.cpp index e44f825..0cda153 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -2161,10 +2161,16 @@ void linkifyText(const TextGeneratorIntf &out,Definition *scope, if (md!=self && (self==0 || md->name()!=self->name())) // name check is needed for overloaded members, where getDefs just returns one { - out.writeLink(md->getReference(),md->getOutputFileBase(), - md->anchor(),word); - //printf("found symbol %s\n",matchWord.data()); - found=TRUE; + /* in case of Fortran scop and the variable is a non Fortran variable: don't link, + * see also getLink in fortrancode.l + */ + if (!(scope && (scope->getLanguage() == SrcLangExt_Fortran) && md->isVariable() && (md->getLanguage() != SrcLangExt_Fortran))) + { + out.writeLink(md->getReference(),md->getOutputFileBase(), + md->anchor(),word); + //printf("found symbol %s\n",matchWord.data()); + found=TRUE; + } } } } -- cgit v0.12