diff options
Diffstat (limited to 'tools/assistant/lib/qhelpsearchindexreader_clucene.cpp')
-rw-r--r-- | tools/assistant/lib/qhelpsearchindexreader_clucene.cpp | 419 |
1 files changed, 254 insertions, 165 deletions
diff --git a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp index b5bec44..ee6dcfb 100644 --- a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp +++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp @@ -39,16 +39,19 @@ ** ****************************************************************************/ -#include "qhelpenginecore.h" -#include "fulltextsearch/qsearchable_p.h" -#include "fulltextsearch/qqueryparser_p.h" #include "fulltextsearch/qindexreader_p.h" +#include "fulltextsearch/qqueryparser_p.h" +#include "fulltextsearch/qsearchable_p.h" +#include "qclucenefieldnames_p.h" +#include "qhelpenginecore.h" + #include "qhelpsearchindexreader_clucene_p.h" #include <QtCore/QDir> #include <QtCore/QSet> #include <QtCore/QString> #include <QtCore/QFileInfo> +#include <QtCore/QSharedPointer> #include <QtCore/QStringList> #include <QtCore/QTextStream> #include <QtCore/QMutexLocker> @@ -108,64 +111,88 @@ void QHelpSearchIndexReaderClucene::run() #if !defined(QT_NO_EXCEPTIONS) try { #endif - QCLuceneBooleanQuery booleanQuery; + QCLuceneBooleanQuery booleanQueryTitle; + QCLuceneBooleanQuery booleanQueryContent; QCLuceneStandardAnalyzer analyzer; - if (!buildQuery(booleanQuery, queryList, analyzer)) { + const QStringList& attribList = + engine.filterAttributes(engine.currentFilter()); + bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField, + attribList, booleanQueryTitle, analyzer); + bool contentQueryIsValid = buildQuery(queryList, ContentField, + attribList, booleanQueryContent, analyzer); + if (!titleQueryIsValid && !contentQueryIsValid) { emit searchingFinished(0); return; } - const QStringList attribList = engine.filterAttributes(engine.currentFilter()); - if (!attribList.isEmpty()) { - QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") - + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); + QCLuceneIndexSearcher indexSearcher(indexPath); - if (!query) { + // QCLuceneHits object must be allocated on the heap, because + // there is no default constructor. + QSharedPointer<QCLuceneHits> titleHits; + QSharedPointer<QCLuceneHits> contentHits; + if (titleQueryIsValid) { + titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryTitle))); + } + if (contentQueryIsValid) { + contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryContent))); + } + bool boost = true; + if ((titleHits.isNull() || titleHits->length() == 0) + && (contentHits.isNull() || contentHits->length() == 0)) { + booleanQueryTitle = QCLuceneBooleanQuery(); + booleanQueryContent = QCLuceneBooleanQuery(); + titleQueryIsValid = + buildTryHarderQuery(queryList, TitleTokenizedField, + attribList, booleanQueryTitle, analyzer); + contentQueryIsValid = + buildTryHarderQuery(queryList, ContentField, attribList, + booleanQueryContent, analyzer); + if (!titleQueryIsValid && !contentQueryIsValid) { emit searchingFinished(0); return; } - booleanQuery.add(query, true, true, false); - } - - QCLuceneIndexSearcher indexSearcher(indexPath); - QCLuceneHits hits = indexSearcher.search(booleanQuery); - - bool boost = true; - QCLuceneBooleanQuery tryHarderQuery; - if (hits.length() == 0) { - if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) { - if (!attribList.isEmpty()) { - QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") - + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), - analyzer); - tryHarderQuery.add(query, true, true, false); - } - hits = indexSearcher.search(tryHarderQuery); - boost = (hits.length() == 0); + if (titleQueryIsValid) { + titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryTitle))); + } + if (contentQueryIsValid) { + contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryContent))); } + boost = false; } + QList<QSharedPointer<QCLuceneHits> > cluceneHitsList; + if (!titleHits.isNull()) + cluceneHitsList.append(titleHits); + if (!contentHits.isNull()) + cluceneHitsList.append(contentHits); QSet<QString> pathSet; QCLuceneDocument document; const QStringList namespaceList = engine.registeredDocumentations(); - for (qint32 i = 0; i < hits.length(); i++) { - document = hits.document(i); - const QString path = document.get(QLatin1String("path")); - if (!pathSet.contains(path) && namespaceList.contains( - document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { - pathSet.insert(path); - hitList.append(qMakePair(path, document.get(QLatin1String("title")))); - } - document.clear(); + foreach (QSharedPointer<QCLuceneHits> hits, cluceneHitsList) { + for (qint32 i = 0; i < hits->length(); i++) { + document = hits->document(i); + const QString path = document.get(PathField); + if (!pathSet.contains(path) && namespaceList.contains( + document.get(NamespaceField), Qt::CaseInsensitive)) { + pathSet.insert(path); + hitList.append(qMakePair(path, document.get(TitleField))); + } + document.clear(); - mutex.lock(); - if (m_cancel) { + mutex.lock(); + if (m_cancel) { + mutex.unlock(); + emit searchingFinished(0); + return; + } mutex.unlock(); - emit searchingFinished(0); - return; } - mutex.unlock(); } indexSearcher.close(); @@ -185,144 +212,205 @@ void QHelpSearchIndexReaderClucene::run() } } -bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, - QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::buildQuery( + const QList<QHelpSearchQuery> &queries, const QString &fieldName, + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) { - const QLatin1String c("content"); - const QLatin1String t("titleTokenized"); - - QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); - QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); - if (query && query2) { - booleanQuery.add(query, true, false, false); - booleanQuery.add(query2, true, false, false); - return true; + bool queryIsValid = false; + foreach (const QHelpSearchQuery &query, queries) { + if (fieldName != ContentField && isNegativeQuery(query)) { + queryIsValid = false; + break; + } + switch (query.fieldName) { + case QHelpSearchQuery::FUZZY: + if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer)) + queryIsValid = true; + break; + case QHelpSearchQuery::WITHOUT: + if (fieldName != ContentField) + return false; + if (addWithoutQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::PHRASE: + if (addPhraseQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::ALL: + if (addAllQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::DEFAULT: + if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer)) + queryIsValid = true; + break; + case QHelpSearchQuery::ATLEAST: + if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer)) + queryIsValid = true; + break; + default: + Q_ASSERT(!"Invalid field name"); + } } - return false; + if (queryIsValid && !filterAttributes.isEmpty()) { + queryIsValid = + addAttributesQuery(filterAttributes, booleanQuery, analyzer); + } + + return queryIsValid; } -bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery, - const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::buildTryHarderQuery( + const QList<QHelpSearchQuery> &queries, const QString &fieldName, + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) { - foreach (const QHelpSearchQuery query, queryList) { - switch (query.fieldName) { - case QHelpSearchQuery::FUZZY: { - const QLatin1String fuzzy("~"); - foreach (const QString &term, query.wordList) { - if (term.isEmpty() - || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) { - return false; - } - } - } break; - - case QHelpSearchQuery::WITHOUT: { - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, query.wordList) { - if (stopWords.contains(term, Qt::CaseInsensitive)) - continue; - - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); - QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("titleTokenized"), term.toLower())); - - if (query && query2) { - booleanQuery.add(query, true, false, true); - booleanQuery.add(query2, true, false, true); - } else { - return false; - } - } - } break; - - case QHelpSearchQuery::PHRASE: { - const QString &term = query.wordList.at(0).toLower(); - if (term.contains(QLatin1Char(' '))) { - QStringList termList = term.split(QLatin1String(" ")); - QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, termList) { - if (!stopWords.contains(term, Qt::CaseInsensitive)) - q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower())); - } - booleanQuery.add(q, true, true, false); - } else { - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); - QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("titleTokenized"), term.toLower())); - - if (query && query2) { - booleanQuery.add(query, true, true, false); - booleanQuery.add(query2, true, false, false); - } else { - return false; - } - } - } break; + if (queries.isEmpty()) + return false; + const QHelpSearchQuery &query = queries.front(); + if (query.fieldName != QHelpSearchQuery::DEFAULT) + return false; + if (isNegativeQuery(query)) + return false; + if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer)) + return false; + if (filterAttributes.isEmpty()) + return true; + return addAttributesQuery(filterAttributes, booleanQuery, analyzer); +} - case QHelpSearchQuery::ALL: { - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, query.wordList) { - if (stopWords.contains(term, Qt::CaseInsensitive)) - continue; +bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const +{ + const QString &search = query.wordList.join(" "); + return search.contains('!') || search.contains('-') + || search.contains(QLatin1String(" NOT ")); +} - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); +bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + bool queryIsValid = false; + const QLatin1String fuzzy("~"); + foreach (const QString &term, query.wordList) { + if (!term.isEmpty()) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer); + if (lQuery != 0) { + booleanQuery.add(lQuery, true, false, false); + queryIsValid = true; + } + } + } + return queryIsValid; +} - if (query) { - booleanQuery.add(query, true, true, false); - } else { - return false; - } - } - } break; +bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, false, true); + queryIsValid = true; + } + return queryIsValid; +} - case QHelpSearchQuery::DEFAULT: { - foreach (const QString &term, query.wordList) { - QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), - QLatin1String("content"), analyzer); +bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QString &term = query.wordList.at(0).toLower(); + if (term.contains(QLatin1Char(' '))) { + const QStringList termList = term.split(QLatin1String(" ")); + QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); + const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, termList) { + if (!stopWords.contains(term, Qt::CaseInsensitive)) + q->addTerm(QCLuceneTerm(fieldName, term.toLower())); + } + if (!q->getTerms().isEmpty()) { + booleanQuery.add(q, true, true, false); + queryIsValid = true; + } + } else { + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, true, false); + queryIsValid = true; + } + return queryIsValid; +} - if (query) - booleanQuery.add(query, true, true, false); - } - } break; +bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, true, false); + queryIsValid = true; + } + return queryIsValid; +} - case QHelpSearchQuery::ATLEAST: { - foreach (const QString &term, query.wordList) { - if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer)) - return false; - } - } +bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query, + const QString &fieldName, bool allTermsRequired, + QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + bool queryIsValid = false; + foreach (const QString &term, query.wordList) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer); + if (lQuery) { + booleanQuery.add(lQuery, true, allTermsRequired, false); + queryIsValid = true; } } - - return true; + return queryIsValid; } -bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery, - const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::addAtLeastQuery( + const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer) { - bool retVal = false; - foreach (const QHelpSearchQuery query, queryList) { - switch (query.fieldName) { - default: break; - case QHelpSearchQuery::DEFAULT: { - foreach (const QString &term, query.wordList) { - QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), - QLatin1String("content"), analyzer); - - if (query) { - retVal = true; - booleanQuery.add(query, true, false, false); - } - } - } break; + bool queryIsValid = false; + foreach (const QString &term, query.wordList) { + if (!term.isEmpty()) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term, fieldName, analyzer); + if (lQuery) { + booleanQuery.add(lQuery, true, false, false); + queryIsValid = true; + } } } - return retVal; + return queryIsValid; +} + +bool QHelpSearchIndexReaderClucene::addAttributesQuery( + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+") + + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer); + if (!lQuery) + return false; + booleanQuery.add(lQuery, true, true, false); + return true; } void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine, @@ -336,21 +424,22 @@ void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engin QCLuceneStandardAnalyzer analyzer; QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse( - joinedQuery, QLatin1String("content"), analyzer); + joinedQuery, ContentField, analyzer); if (parsedQuery) { joinedQuery = parsedQuery->toString(); delete parsedQuery; } - int length = QString(QLatin1String("content:")).length(); - int index = joinedQuery.indexOf(QLatin1String("content:")); + const QString contentString(ContentField + QLatin1String(":")); + int length = contentString.length(); + int index = joinedQuery.indexOf(contentString); QString term; int nextIndex = 0; QStringList searchTerms; while (index != -1) { - nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1); + nextIndex = joinedQuery.indexOf(contentString, index + 1); term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified(); if (term.startsWith(QLatin1String("\"")) && term.endsWith(QLatin1String("\""))) { |