From 94dd6958376b61877d2b62b99d867edaa64618b5 Mon Sep 17 00:00:00 2001 From: Dimitri van Heesch Date: Sat, 6 Feb 2021 16:28:25 +0100 Subject: Refactoring: make doxyindexer independent of qtools --- addon/doxysearch/CMakeLists.txt | 4 +- addon/doxysearch/doxyindexer.cpp | 123 +++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 66 deletions(-) diff --git a/addon/doxysearch/CMakeLists.txt b/addon/doxysearch/CMakeLists.txt index d0c8724..2c1ac08 100644 --- a/addon/doxysearch/CMakeLists.txt +++ b/addon/doxysearch/CMakeLists.txt @@ -7,7 +7,7 @@ endif() include_directories( ${PROJECT_SOURCE_DIR}/libversion - ${PROJECT_SOURCE_DIR}/qtools + ${PROJECT_SOURCE_DIR}/libxml ${XAPIAN_INCLUDE_DIR} ${ZLIB_INCLUDE_DIRS} ) @@ -20,7 +20,7 @@ target_link_libraries(doxyindexer ${WIN_EXTRA_LIBS} ${COVERAGE_LINKER_FLAGS} doxygen_version - qtools + xml ) add_executable(doxysearch.cgi diff --git a/addon/doxysearch/doxyindexer.cpp b/addon/doxysearch/doxyindexer.cpp index ae4e046..8d295cb 100644 --- a/addon/doxysearch/doxyindexer.cpp +++ b/addon/doxysearch/doxyindexer.cpp @@ -20,17 +20,17 @@ #include #include #include +#include +#include +#include -// Qtools includes -#include -#include -#include -#include +#include // Xapian include #include #include "version.h" +#include "xml.h" #define MAX_TERM_LENGTH 245 @@ -106,13 +106,14 @@ static void addWords(const std::string &s,Xapian::Document &doc,int wfd) /** Adds all identifiers in \a s to document \a doc with weight \a wfd */ static void addIdentifiers(const std::string &s,Xapian::Document &doc,int wfd) { - QRegExp re("[A-Z_a-z][A-Z_a-z0-9]*"); - int i,l,p=0; - QCString qs = s.c_str(); - while ((i=re.match(qs,p,&l))!=-1) + std::regex id_re("[A-Z_a-z][A-Z_a-z0-9]*"); + auto id_begin = std::sregex_iterator(s.begin(), s.end(), id_re); + auto id_end = std::sregex_iterator(); + + for (auto i = id_begin; i!=id_end; ++i) { - safeAddTerm(qs.mid(p,i-p).data(),doc,wfd); - p=i+l; + std::smatch match = *i; + safeAddTerm(match.str(),doc,wfd); } } @@ -142,12 +143,12 @@ static std::string unescapeXmlEntities(const std::string &s) /** This class is a wrapper around SAX style XML parser, which * parses the file without first building a DOM tree in memory. */ -class XMLContentHandler : public QXmlDefaultHandler +class XMLContentHandler { public: /** Handler for parsing XML data */ - XMLContentHandler(const QString &path) - : m_db((path+"doxysearch.db").utf8().data(),Xapian::DB_CREATE_OR_OVERWRITE), + XMLContentHandler(const std::string &path) + : m_db(path+"doxysearch.db",Xapian::DB_CREATE_OR_OVERWRITE), m_stemmer("english") { m_curFieldName = UnknownField; @@ -161,7 +162,6 @@ class XMLContentHandler : public QXmlDefaultHandler m_db.commit(); } - private: enum FieldNames { UnknownField = 0, @@ -175,13 +175,12 @@ class XMLContentHandler : public QXmlDefaultHandler }; /** Handler for a start tag. Called for and tags */ - bool startElement(const QString &, const QString &, - const QString &name, const QXmlAttributes &attrib) + void startElement(const std::string &name, const XMLHandlers::Attributes &attrib) { m_data=""; if (name=="field") { - QString fieldName = attrib.value("name"); + std::string fieldName = XMLHandlers::value(attrib,"name"); if (fieldName=="type") m_curFieldName=TypeField; else if (fieldName=="name") m_curFieldName=NameField; else if (fieldName=="args") m_curFieldName=ArgsField; @@ -191,11 +190,10 @@ class XMLContentHandler : public QXmlDefaultHandler else if (fieldName=="text") m_curFieldName=TextField; else m_curFieldName=UnknownField; } - return TRUE; } /** Handler for an end tag. Called for and tags */ - bool endElement(const QString &, const QString &, const QString &name) + void endElement(const std::string &name) { if (name=="doc") // { @@ -260,16 +258,21 @@ class XMLContentHandler : public QXmlDefaultHandler m_curFieldName=UnknownField; } // reset m_data - return TRUE; } /** Handler for inline text */ - bool characters(const QString& ch) + void characters(const std::string& ch) { - m_data += std::string(ch.utf8()); - return TRUE; + m_data += ch; } + void error(const std::string &fileName,int lineNr,const std::string &msg) + { + std::cerr << "Fatal error at " << fileName << ":" << lineNr << ": " << msg << std::endl; + } + + private: + // internal state Xapian::WritableDatabase m_db; Xapian::Document m_doc; @@ -279,38 +282,31 @@ class XMLContentHandler : public QXmlDefaultHandler FieldNames m_curFieldName; }; -/** Class for handling error during XML parsing */ -class XMLErrorHandler : public QXmlErrorHandler -{ - public: - virtual ~XMLErrorHandler() {} - bool warning( const QXmlParseException & ) - { - return FALSE; - } - bool error( const QXmlParseException & ) - { - return FALSE; - } - bool fatalError( const QXmlParseException &exception ) - { - std::cerr << "Fatal error at line " << exception.lineNumber() - << " column " << exception.columnNumber() << ": " - << exception.message().utf8() << std::endl; - return FALSE; - } - QString errorString() { return ""; } - - private: - QString errorMsg; -}; - static void usage(const char *name, int exitVal = 1) { std::cerr << "Usage: " << name << " [-o output_dir] searchdata.xml [searchdata2.xml ...]" << std::endl; exit(exitVal); } +// return the contents of a file as a string +inline std::string fileToString(const std::string &fileName) +{ + std::ifstream t(fileName); + std::string result; + t.seekg(0, std::ios::end); + result.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + result.assign(std::istreambuf_iterator(t), + std::istreambuf_iterator()); + return result; +} + +bool dirExists(const char *path) +{ + struct stat info = {}; + return stat(path,&info)==0 && (info.st_mode&S_IFDIR); +} + /** main function to index data */ int main(int argc,const char **argv) { @@ -318,7 +314,7 @@ int main(int argc,const char **argv) { usage(argv[0]); } - QString outputDir; + std::string outputDir; for (int i=1;i