diff options
-rw-r--r-- | addon/doxysearch/CMakeLists.txt | 4 | ||||
-rw-r--r-- | addon/doxysearch/doxyindexer.cpp | 123 |
2 files changed, 61 insertions, 66 deletions
diff --git a/addon/doxysearch/CMakeLists.txt b/addon/doxysearch/CMakeLists.txt index d0c8724..2c1ac08 100644 --- a/addon/doxysearch/CMakeLists.txt +++ b/addon/doxysearch/CMakeLists.txt @@ -7,7 +7,7 @@ endif() include_directories( ${PROJECT_SOURCE_DIR}/libversion - ${PROJECT_SOURCE_DIR}/qtools + ${PROJECT_SOURCE_DIR}/libxml ${XAPIAN_INCLUDE_DIR} ${ZLIB_INCLUDE_DIRS} ) @@ -20,7 +20,7 @@ target_link_libraries(doxyindexer ${WIN_EXTRA_LIBS} ${COVERAGE_LINKER_FLAGS} doxygen_version - qtools + xml ) add_executable(doxysearch.cgi diff --git a/addon/doxysearch/doxyindexer.cpp b/addon/doxysearch/doxyindexer.cpp index ae4e046..8d295cb 100644 --- a/addon/doxysearch/doxyindexer.cpp +++ b/addon/doxysearch/doxyindexer.cpp @@ -20,17 +20,17 @@ #include <string> #include <algorithm> #include <sstream> +#include <fstream> +#include <iterator> +#include <regex> -// Qtools includes -#include <qregexp.h> -#include <qxml.h> -#include <qfile.h> -#include <qfileinfo.h> +#include <sys/stat.h> // Xapian include #include <xapian.h> #include "version.h" +#include "xml.h" #define MAX_TERM_LENGTH 245 @@ -106,13 +106,14 @@ static void addWords(const std::string &s,Xapian::Document &doc,int wfd) /** Adds all identifiers in \a s to document \a doc with weight \a wfd */ static void addIdentifiers(const std::string &s,Xapian::Document &doc,int wfd) { - QRegExp re("[A-Z_a-z][A-Z_a-z0-9]*"); - int i,l,p=0; - QCString qs = s.c_str(); - while ((i=re.match(qs,p,&l))!=-1) + std::regex id_re("[A-Z_a-z][A-Z_a-z0-9]*"); + auto id_begin = std::sregex_iterator(s.begin(), s.end(), id_re); + auto id_end = std::sregex_iterator(); + + for (auto i = id_begin; i!=id_end; ++i) { - safeAddTerm(qs.mid(p,i-p).data(),doc,wfd); - p=i+l; + std::smatch match = *i; + safeAddTerm(match.str(),doc,wfd); } } @@ -142,12 +143,12 @@ static std::string unescapeXmlEntities(const std::string &s) /** This class is a wrapper around SAX style XML parser, which * parses the file without first building a DOM tree in memory. */ -class XMLContentHandler : public QXmlDefaultHandler +class XMLContentHandler { public: /** Handler for parsing XML data */ - XMLContentHandler(const QString &path) - : m_db((path+"doxysearch.db").utf8().data(),Xapian::DB_CREATE_OR_OVERWRITE), + XMLContentHandler(const std::string &path) + : m_db(path+"doxysearch.db",Xapian::DB_CREATE_OR_OVERWRITE), m_stemmer("english") { m_curFieldName = UnknownField; @@ -161,7 +162,6 @@ class XMLContentHandler : public QXmlDefaultHandler m_db.commit(); } - private: enum FieldNames { UnknownField = 0, @@ -175,13 +175,12 @@ class XMLContentHandler : public QXmlDefaultHandler }; /** Handler for a start tag. Called for <doc> and <field> tags */ - bool startElement(const QString &, const QString &, - const QString &name, const QXmlAttributes &attrib) + void startElement(const std::string &name, const XMLHandlers::Attributes &attrib) { m_data=""; if (name=="field") { - QString fieldName = attrib.value("name"); + std::string fieldName = XMLHandlers::value(attrib,"name"); if (fieldName=="type") m_curFieldName=TypeField; else if (fieldName=="name") m_curFieldName=NameField; else if (fieldName=="args") m_curFieldName=ArgsField; @@ -191,11 +190,10 @@ class XMLContentHandler : public QXmlDefaultHandler else if (fieldName=="text") m_curFieldName=TextField; else m_curFieldName=UnknownField; } - return TRUE; } /** Handler for an end tag. Called for </doc> and </field> tags */ - bool endElement(const QString &, const QString &, const QString &name) + void endElement(const std::string &name) { if (name=="doc") // </doc> { @@ -260,16 +258,21 @@ class XMLContentHandler : public QXmlDefaultHandler m_curFieldName=UnknownField; } // reset m_data - return TRUE; } /** Handler for inline text */ - bool characters(const QString& ch) + void characters(const std::string& ch) { - m_data += std::string(ch.utf8()); - return TRUE; + m_data += ch; } + void error(const std::string &fileName,int lineNr,const std::string &msg) + { + std::cerr << "Fatal error at " << fileName << ":" << lineNr << ": " << msg << std::endl; + } + + private: + // internal state Xapian::WritableDatabase m_db; Xapian::Document m_doc; @@ -279,38 +282,31 @@ class XMLContentHandler : public QXmlDefaultHandler FieldNames m_curFieldName; }; -/** Class for handling error during XML parsing */ -class XMLErrorHandler : public QXmlErrorHandler -{ - public: - virtual ~XMLErrorHandler() {} - bool warning( const QXmlParseException & ) - { - return FALSE; - } - bool error( const QXmlParseException & ) - { - return FALSE; - } - bool fatalError( const QXmlParseException &exception ) - { - std::cerr << "Fatal error at line " << exception.lineNumber() - << " column " << exception.columnNumber() << ": " - << exception.message().utf8() << std::endl; - return FALSE; - } - QString errorString() { return ""; } - - private: - QString errorMsg; -}; - static void usage(const char *name, int exitVal = 1) { std::cerr << "Usage: " << name << " [-o output_dir] searchdata.xml [searchdata2.xml ...]" << std::endl; exit(exitVal); } +// return the contents of a file as a string +inline std::string fileToString(const std::string &fileName) +{ + std::ifstream t(fileName); + std::string result; + t.seekg(0, std::ios::end); + result.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + result.assign(std::istreambuf_iterator<char>(t), + std::istreambuf_iterator<char>()); + return result; +} + +bool dirExists(const char *path) +{ + struct stat info = {}; + return stat(path,&info)==0 && (info.st_mode&S_IFDIR); +} + /** main function to index data */ int main(int argc,const char **argv) { @@ -318,7 +314,7 @@ int main(int argc,const char **argv) { usage(argv[0]); } - QString outputDir; + std::string outputDir; for (int i=1;i<argc;i++) { if (std::string(argv[i])=="-o") @@ -332,8 +328,7 @@ int main(int argc,const char **argv) { i++; outputDir=argv[i]; - QFileInfo fi(outputDir); - if (!fi.exists() || !fi.isDir()) + if (!dirExists(outputDir.c_str())) { std::cerr << "Error: specified output directory does not exist!" << std::endl; usage(argv[0]); @@ -353,12 +348,16 @@ int main(int argc,const char **argv) try { - if (!outputDir.isEmpty() && outputDir.at(outputDir.length()-1)!=pathSep) + if (!outputDir.empty() && outputDir.at(outputDir.length()-1)!=pathSep) { outputDir+=pathSep; } - XMLContentHandler handler(outputDir); - XMLErrorHandler errorHandler; + XMLContentHandler contentHandler(outputDir); + XMLHandlers handlers; + handlers.startElement = [&contentHandler](const std::string &name,const XMLHandlers::Attributes &attrs) { contentHandler.startElement(name,attrs); }; + handlers.endElement = [&contentHandler](const std::string &name) { contentHandler.endElement(name); }; + handlers.characters = [&contentHandler](const std::string &chars) { contentHandler.characters(chars); }; + handlers.error = [&contentHandler](const std::string &fileName,int lineNr,const std::string &msg) { contentHandler.error(fileName,lineNr,msg); }; for (int i=1;i<argc;i++) { if (std::string(argv[i])=="-o") @@ -367,14 +366,10 @@ int main(int argc,const char **argv) } else { - QString xmlFileName = argv[i]; - std::cout << "Processing " << xmlFileName.utf8() << "..." << std::endl; - QFile xmlFile(xmlFileName); - QXmlInputSource source(xmlFile); - QXmlSimpleReader reader; - reader.setContentHandler(&handler); - reader.setErrorHandler(&errorHandler); - reader.parse(source); + std::cout << "Processing " << argv[i] << "..." << std::endl; + std::string inputStr = fileToString(argv[i]); + XMLParser parser(handlers); + parser.parse(argv[i],inputStr.c_str(),false); } } } |