From 608b5c375a3ea265afe96dd8b921b4b81d6b354d Mon Sep 17 00:00:00 2001 From: Dimitri van Heesch Date: Tue, 25 Sep 2018 21:09:01 +0200 Subject: redundant input_filter runs significantly reduce performance when FILTER_SOURCE_FILES and INLINE_SOURCES are both enabled #6395 --- src/definition.cpp | 235 +++++++++++++++++++++++++++++++++++++++-------------- src/doxygen.cpp | 12 +++ src/doxygen.h | 1 + 3 files changed, 189 insertions(+), 59 deletions(-) diff --git a/src/definition.cpp b/src/definition.cpp index ec8c0cc..bd97f6d 100644 --- a/src/definition.cpp +++ b/src/definition.cpp @@ -42,6 +42,7 @@ #include "filedef.h" #include "dirdef.h" #include "pagedef.h" +#include "bufstr.h" #define START_MARKER 0x4445465B // DEF[ #define END_MARKER 0x4445465D // DEF] @@ -716,6 +717,139 @@ void Definition::setInbodyDocumentation(const char *d,const char *inbodyFile,int _setInbodyDocumentation(d,inbodyFile,inbodyLine); } +//--------------------------------------- + +struct FilterCacheItem +{ + portable_off_t filePos; + uint fileSize; +}; + +/*! Cache for storing the result of filtering a file */ +class FilterCache +{ + public: + FilterCache() : m_endPos(0) { m_cache.setAutoDelete(TRUE); } + bool getFileContents(const QCString &fileName,BufStr &str) + { + static bool filterSourceFiles = Config_getBool(FILTER_SOURCE_FILES); + QCString filter = getFileFilter(fileName,TRUE); + bool usePipe = !filter.isEmpty() && filterSourceFiles; + FILE *f=0; + const int blockSize = 4096; + char buf[blockSize]; + FilterCacheItem *item=0; + if (usePipe && (item = m_cache.find(fileName))) // cache hit: reuse stored result + { + //printf("getFileContents(%s): cache hit\n",qPrint(fileName)); + // file already processed, get the results after filtering from the tmp file + Debug::print(Debug::FilterOutput,0,"Reusing filter result for %s from %s at offset=%d size=%d\n", + qPrint(fileName),qPrint(Doxygen::filterDBFileName),(int)item->filePos,(int)item->fileSize); + f = portable_fopen(Doxygen::filterDBFileName,"rb"); + if (f) + { + bool success=TRUE; + str.resize(item->fileSize+1); + if (portable_fseek(f,item->filePos,SEEK_SET)==-1) + { + err("Failed to seek to position %d in filter database file %s\n",(int)item->filePos,qPrint(Doxygen::filterDBFileName)); + success=FALSE; + } + if (success) + { + int numBytes = fread(str.data(),1,item->fileSize,f); + if (numBytes!=item->fileSize) + { + err("Failed to read %d bytes from position %d in filter database file %s: got %d bytes\n", + (int)item->fileSize,(int)item->filePos,qPrint(Doxygen::filterDBFileName),numBytes); + success=FALSE; + } + } + str.addChar('\0'); + fclose(f); + return success; + } + else + { + err("Failed to open filter database file %s\n",qPrint(Doxygen::filterDBFileName)); + return FALSE; + } + } + else if (usePipe) // cache miss: filter active but file not previously processed + { + //printf("getFileContents(%s): cache miss\n",qPrint(fileName)); + // filter file + QCString cmd=filter+" \""+fileName+"\""; + Debug::print(Debug::ExtCmd,0,"Executing popen(`%s`)\n",qPrint(cmd)); + f = portable_popen(cmd,"r"); + FILE *bf = portable_fopen(Doxygen::filterDBFileName,"a+b"); + FilterCacheItem *item = new FilterCacheItem; + item->filePos = m_endPos; + if (bf==0) + { + // handle error + err("Error opening filter database file %s\n",qPrint(Doxygen::filterDBFileName)); + str.addChar('\0'); + delete item; + portable_pclose(f); + return FALSE; + } + // append the filtered output to the database file + int size=0; + while (!feof(f)) + { + int bytesRead = fread(buf,1,blockSize,f); + int bytesWritten = fwrite(buf,1,bytesRead,bf); + if (bytesRead!=bytesWritten) + { + // handle error + err("Failed to write to filter database %s. Wrote %d out of %d bytes\n", + qPrint(Doxygen::filterDBFileName),bytesWritten,bytesRead); + str.addChar('\0'); + delete item; + portable_pclose(f); + fclose(bf); + return FALSE; + } + size+=bytesWritten; + str.addArray(buf,bytesWritten); + } + str.addChar('\0'); + item->fileSize = size; + // add location entry to the dictionary + m_cache.append(fileName,item); + Debug::print(Debug::FilterOutput,0,"Storing new filter result for %s in %s at offset=%d size=%d\n", + qPrint(fileName),qPrint(Doxygen::filterDBFileName),(int)item->filePos,(int)item->fileSize); + // update end of file position + m_endPos += size; + portable_pclose(f); + fclose(bf); + } + else // no filtering + { + // normal file + //printf("getFileContents(%s): no filter\n",qPrint(fileName)); + f = portable_fopen(fileName,"r"); + while (!feof(f)) + { + int bytesRead = fread(buf,1,blockSize,f); + str.addArray(buf,bytesRead); + } + str.addChar('\0'); + fclose(f); + } + return TRUE; + } + private: + SDict m_cache; + portable_off_t m_endPos; +}; + +static FilterCache g_filterCache; + +//----------------------------------------- + + /*! Reads a fragment of code from file \a fileName starting at * line \a startLine and ending at line \a endLine (inclusive). The fragment is * stored in \a result. If FALSE is returned the code fragment could not be @@ -730,67 +864,60 @@ void Definition::setInbodyDocumentation(const char *d,const char *inbodyFile,int bool readCodeFragment(const char *fileName, int &startLine,int &endLine,QCString &result) { + //printf("readCodeFragment(%s,startLine=%d,endLine=%d)\n",fileName,startLine,endLine); static bool filterSourceFiles = Config_getBool(FILTER_SOURCE_FILES); - static int tabSize = Config_getInt(TAB_SIZE); - //printf("readCodeFragment(%s,%d,%d)\n",fileName,startLine,endLine); - if (fileName==0 || fileName[0]==0) return FALSE; // not a valid file name QCString filter = getFileFilter(fileName,TRUE); - FILE *f=0; bool usePipe = !filter.isEmpty() && filterSourceFiles; + int tabSize = Config_getInt(TAB_SIZE); SrcLangExt lang = getLanguageFromFileName(fileName); - if (!usePipe) // no filter given or wanted - { - f = portable_fopen(fileName,"r"); - } - else // use filter - { - QCString cmd=filter+" \""+fileName+"\""; - Debug::print(Debug::ExtCmd,0,"Executing popen(`%s`)\n",qPrint(cmd)); - f = portable_popen(cmd,"r"); - } - bool found = lang==SrcLangExt_VHDL || - lang==SrcLangExt_Tcl || - lang==SrcLangExt_Python || - lang==SrcLangExt_Fortran; + const int blockSize = 4096; + BufStr str(blockSize); + g_filterCache.getFileContents(fileName,str); + + bool found = lang==SrcLangExt_VHDL || + lang==SrcLangExt_Tcl || + lang==SrcLangExt_Python || + lang==SrcLangExt_Fortran; // for VHDL, TCL, Python, and Fortran no bracket search is possible - if (f) + char *p=str.data(); + if (p) { int c=0; int col=0; int lineNr=1; // skip until the startLine has reached - while (lineNr readCodeFragment(%s,%d,%d) lineNr=%d\n",fileName,startLine,endLine,lineNr); - if (found) + if (found) { // For code with more than one line, // fill the line with spaces until we are at the right column @@ -827,57 +954,47 @@ bool readCodeFragment(const char *fileName, // copy until end of line if (c) result+=c; startLine=lineNr; - if (c==':') + if (c==':') { result+=cn; if (cn=='\n') lineNr++; } - const int maxLineLength=4096; - char lineStr[maxLineLength]; - do + char lineStr[blockSize]; + do { //printf("reading line %d in range %d-%d\n",lineNr,startLine,endLine); int size_read; - do + do { // read up to maxLineLength-1 bytes, the last byte being zero - char *p = fgets(lineStr, maxLineLength,f); - //printf(" read %s",p); - if (p) + int i=0; + while ((c=*p++) && i newLineIndex) + if (braceIndex > newLineIndex) { result.truncate(braceIndex+1); } endLine=lineNr-1; } } - if (usePipe) + if (usePipe) { - portable_pclose(f); Debug::print(Debug::FilterOutput, 0, "Filter output\n"); Debug::print(Debug::FilterOutput,0,"-------------\n%s\n-------------\n",qPrint(result)); } - else - { - fclose(f); - } } result = transcodeCharacterStringToUTF8(result); if (!result.isEmpty() && result.at(result.length()-1)!='\n') result += "\n"; diff --git a/src/doxygen.cpp b/src/doxygen.cpp index 1b40db1..9d8a914 100644 --- a/src/doxygen.cpp +++ b/src/doxygen.cpp @@ -167,6 +167,7 @@ bool Doxygen::suppressDocWarnings = FALSE; Store *Doxygen::symbolStorage; QCString Doxygen::objDBFileName; QCString Doxygen::entryDBFileName; +QCString Doxygen::filterDBFileName; bool Doxygen::gatherDefines = TRUE; IndexList *Doxygen::indexList; int Doxygen::subpageNestingLevel = 0; @@ -10679,6 +10680,10 @@ static void stopDoxygen(int) { thisDir.remove(Doxygen::objDBFileName); } + if (!Doxygen::filterDBFileName.isEmpty()) + { + thisDir.remove(Doxygen::filterDBFileName); + } killpg(0,SIGINT); exit(1); } @@ -10779,6 +10784,10 @@ static void exitDoxygen() { thisDir.remove(Doxygen::objDBFileName); } + if (!Doxygen::filterDBFileName.isEmpty()) + { + thisDir.remove(Doxygen::filterDBFileName); + } } } @@ -11018,6 +11027,8 @@ void parseInput() Doxygen::objDBFileName.prepend(outputDirectory+"/"); Doxygen::entryDBFileName.sprintf("doxygen_entrydb_%d.tmp",pid); Doxygen::entryDBFileName.prepend(outputDirectory+"/"); + Doxygen::filterDBFileName.sprintf("doxygen_filterdb_%d.tmp",pid); + Doxygen::filterDBFileName.prepend(outputDirectory+"/"); if (Doxygen::symbolStorage->open(Doxygen::objDBFileName)==-1) { @@ -11867,6 +11878,7 @@ void generateOutput() Doxygen::symbolStorage->close(); QDir thisDir; thisDir.remove(Doxygen::objDBFileName); + thisDir.remove(Doxygen::filterDBFileName); Config::deinit(); QTextCodec::deleteAllCodecs(); delete Doxygen::symbolMap; diff --git a/src/doxygen.h b/src/doxygen.h index 7bd05a4..4ff8a56 100644 --- a/src/doxygen.h +++ b/src/doxygen.h @@ -140,6 +140,7 @@ class Doxygen static Store *symbolStorage; static QCString objDBFileName; static QCString entryDBFileName; + static QCString filterDBFileName; static CiteDict *citeDict; static bool gatherDefines; static bool userComments; -- cgit v0.12