summaryrefslogtreecommitdiffstats
path: root/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/assistant/lib/qhelpsearchindexreader_clucene.cpp')
-rw-r--r--tools/assistant/lib/qhelpsearchindexreader_clucene.cpp419
1 files changed, 254 insertions, 165 deletions
diff --git a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
index b5bec44..ee6dcfb 100644
--- a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
+++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
@@ -39,16 +39,19 @@
**
****************************************************************************/
-#include "qhelpenginecore.h"
-#include "fulltextsearch/qsearchable_p.h"
-#include "fulltextsearch/qqueryparser_p.h"
#include "fulltextsearch/qindexreader_p.h"
+#include "fulltextsearch/qqueryparser_p.h"
+#include "fulltextsearch/qsearchable_p.h"
+#include "qclucenefieldnames_p.h"
+#include "qhelpenginecore.h"
+
#include "qhelpsearchindexreader_clucene_p.h"
#include <QtCore/QDir>
#include <QtCore/QSet>
#include <QtCore/QString>
#include <QtCore/QFileInfo>
+#include <QtCore/QSharedPointer>
#include <QtCore/QStringList>
#include <QtCore/QTextStream>
#include <QtCore/QMutexLocker>
@@ -108,64 +111,88 @@ void QHelpSearchIndexReaderClucene::run()
#if !defined(QT_NO_EXCEPTIONS)
try {
#endif
- QCLuceneBooleanQuery booleanQuery;
+ QCLuceneBooleanQuery booleanQueryTitle;
+ QCLuceneBooleanQuery booleanQueryContent;
QCLuceneStandardAnalyzer analyzer;
- if (!buildQuery(booleanQuery, queryList, analyzer)) {
+ const QStringList& attribList =
+ engine.filterAttributes(engine.currentFilter());
+ bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField,
+ attribList, booleanQueryTitle, analyzer);
+ bool contentQueryIsValid = buildQuery(queryList, ContentField,
+ attribList, booleanQueryContent, analyzer);
+ if (!titleQueryIsValid && !contentQueryIsValid) {
emit searchingFinished(0);
return;
}
- const QStringList attribList = engine.filterAttributes(engine.currentFilter());
- if (!attribList.isEmpty()) {
- QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
- + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer);
+ QCLuceneIndexSearcher indexSearcher(indexPath);
- if (!query) {
+ // QCLuceneHits object must be allocated on the heap, because
+ // there is no default constructor.
+ QSharedPointer<QCLuceneHits> titleHits;
+ QSharedPointer<QCLuceneHits> contentHits;
+ if (titleQueryIsValid) {
+ titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryTitle)));
+ }
+ if (contentQueryIsValid) {
+ contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryContent)));
+ }
+ bool boost = true;
+ if ((titleHits.isNull() || titleHits->length() == 0)
+ && (contentHits.isNull() || contentHits->length() == 0)) {
+ booleanQueryTitle = QCLuceneBooleanQuery();
+ booleanQueryContent = QCLuceneBooleanQuery();
+ titleQueryIsValid =
+ buildTryHarderQuery(queryList, TitleTokenizedField,
+ attribList, booleanQueryTitle, analyzer);
+ contentQueryIsValid =
+ buildTryHarderQuery(queryList, ContentField, attribList,
+ booleanQueryContent, analyzer);
+ if (!titleQueryIsValid && !contentQueryIsValid) {
emit searchingFinished(0);
return;
}
- booleanQuery.add(query, true, true, false);
- }
-
- QCLuceneIndexSearcher indexSearcher(indexPath);
- QCLuceneHits hits = indexSearcher.search(booleanQuery);
-
- bool boost = true;
- QCLuceneBooleanQuery tryHarderQuery;
- if (hits.length() == 0) {
- if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) {
- if (!attribList.isEmpty()) {
- QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
- + attribList.join(QLatin1String(" +")), QLatin1String("attribute"),
- analyzer);
- tryHarderQuery.add(query, true, true, false);
- }
- hits = indexSearcher.search(tryHarderQuery);
- boost = (hits.length() == 0);
+ if (titleQueryIsValid) {
+ titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryTitle)));
+ }
+ if (contentQueryIsValid) {
+ contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+ indexSearcher.search(booleanQueryContent)));
}
+ boost = false;
}
+ QList<QSharedPointer<QCLuceneHits> > cluceneHitsList;
+ if (!titleHits.isNull())
+ cluceneHitsList.append(titleHits);
+ if (!contentHits.isNull())
+ cluceneHitsList.append(contentHits);
QSet<QString> pathSet;
QCLuceneDocument document;
const QStringList namespaceList = engine.registeredDocumentations();
- for (qint32 i = 0; i < hits.length(); i++) {
- document = hits.document(i);
- const QString path = document.get(QLatin1String("path"));
- if (!pathSet.contains(path) && namespaceList.contains(
- document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) {
- pathSet.insert(path);
- hitList.append(qMakePair(path, document.get(QLatin1String("title"))));
- }
- document.clear();
+ foreach (QSharedPointer<QCLuceneHits> hits, cluceneHitsList) {
+ for (qint32 i = 0; i < hits->length(); i++) {
+ document = hits->document(i);
+ const QString path = document.get(PathField);
+ if (!pathSet.contains(path) && namespaceList.contains(
+ document.get(NamespaceField), Qt::CaseInsensitive)) {
+ pathSet.insert(path);
+ hitList.append(qMakePair(path, document.get(TitleField)));
+ }
+ document.clear();
- mutex.lock();
- if (m_cancel) {
+ mutex.lock();
+ if (m_cancel) {
+ mutex.unlock();
+ emit searchingFinished(0);
+ return;
+ }
mutex.unlock();
- emit searchingFinished(0);
- return;
}
- mutex.unlock();
}
indexSearcher.close();
@@ -185,144 +212,205 @@ void QHelpSearchIndexReaderClucene::run()
}
}
-bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery,
- QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::buildQuery(
+ const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
{
- const QLatin1String c("content");
- const QLatin1String t("titleTokenized");
-
- QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer);
- QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer);
- if (query && query2) {
- booleanQuery.add(query, true, false, false);
- booleanQuery.add(query2, true, false, false);
- return true;
+ bool queryIsValid = false;
+ foreach (const QHelpSearchQuery &query, queries) {
+ if (fieldName != ContentField && isNegativeQuery(query)) {
+ queryIsValid = false;
+ break;
+ }
+ switch (query.fieldName) {
+ case QHelpSearchQuery::FUZZY:
+ if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::WITHOUT:
+ if (fieldName != ContentField)
+ return false;
+ if (addWithoutQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::PHRASE:
+ if (addPhraseQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::ALL:
+ if (addAllQuery(query, fieldName, booleanQuery))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::DEFAULT:
+ if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ case QHelpSearchQuery::ATLEAST:
+ if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer))
+ queryIsValid = true;
+ break;
+ default:
+ Q_ASSERT(!"Invalid field name");
+ }
}
- return false;
+ if (queryIsValid && !filterAttributes.isEmpty()) {
+ queryIsValid =
+ addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+ }
+
+ return queryIsValid;
}
-bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery,
- const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(
+ const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
{
- foreach (const QHelpSearchQuery query, queryList) {
- switch (query.fieldName) {
- case QHelpSearchQuery::FUZZY: {
- const QLatin1String fuzzy("~");
- foreach (const QString &term, query.wordList) {
- if (term.isEmpty()
- || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) {
- return false;
- }
- }
- } break;
-
- case QHelpSearchQuery::WITHOUT: {
- QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
- foreach (const QString &term, query.wordList) {
- if (stopWords.contains(term, Qt::CaseInsensitive))
- continue;
-
- QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
- QLatin1String("content"), term.toLower()));
- QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
- QLatin1String("titleTokenized"), term.toLower()));
-
- if (query && query2) {
- booleanQuery.add(query, true, false, true);
- booleanQuery.add(query2, true, false, true);
- } else {
- return false;
- }
- }
- } break;
-
- case QHelpSearchQuery::PHRASE: {
- const QString &term = query.wordList.at(0).toLower();
- if (term.contains(QLatin1Char(' '))) {
- QStringList termList = term.split(QLatin1String(" "));
- QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
- QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
- foreach (const QString &term, termList) {
- if (!stopWords.contains(term, Qt::CaseInsensitive))
- q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower()));
- }
- booleanQuery.add(q, true, true, false);
- } else {
- QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
- QLatin1String("content"), term.toLower()));
- QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
- QLatin1String("titleTokenized"), term.toLower()));
-
- if (query && query2) {
- booleanQuery.add(query, true, true, false);
- booleanQuery.add(query2, true, false, false);
- } else {
- return false;
- }
- }
- } break;
+ if (queries.isEmpty())
+ return false;
+ const QHelpSearchQuery &query = queries.front();
+ if (query.fieldName != QHelpSearchQuery::DEFAULT)
+ return false;
+ if (isNegativeQuery(query))
+ return false;
+ if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer))
+ return false;
+ if (filterAttributes.isEmpty())
+ return true;
+ return addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+}
- case QHelpSearchQuery::ALL: {
- QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
- foreach (const QString &term, query.wordList) {
- if (stopWords.contains(term, Qt::CaseInsensitive))
- continue;
+bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const
+{
+ const QString &search = query.wordList.join(" ");
+ return search.contains('!') || search.contains('-')
+ || search.contains(QLatin1String(" NOT "));
+}
- QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
- QLatin1String("content"), term.toLower()));
+bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ const QLatin1String fuzzy("~");
+ foreach (const QString &term, query.wordList) {
+ if (!term.isEmpty()) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer);
+ if (lQuery != 0) {
+ booleanQuery.add(lQuery, true, false, false);
+ queryIsValid = true;
+ }
+ }
+ }
+ return queryIsValid;
+}
- if (query) {
- booleanQuery.add(query, true, true, false);
- } else {
- return false;
- }
- }
- } break;
+bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, false, true);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
- case QHelpSearchQuery::DEFAULT: {
- foreach (const QString &term, query.wordList) {
- QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
- QLatin1String("content"), analyzer);
+bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QString &term = query.wordList.at(0).toLower();
+ if (term.contains(QLatin1Char(' '))) {
+ const QStringList termList = term.split(QLatin1String(" "));
+ QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
+ const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, termList) {
+ if (!stopWords.contains(term, Qt::CaseInsensitive))
+ q->addTerm(QCLuceneTerm(fieldName, term.toLower()));
+ }
+ if (!q->getTerms().isEmpty()) {
+ booleanQuery.add(q, true, true, false);
+ queryIsValid = true;
+ }
+ } else {
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, true, false);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
- if (query)
- booleanQuery.add(query, true, true, false);
- }
- } break;
+bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+ bool queryIsValid = false;
+ const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+ QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+ fieldName, term.toLower()));
+ booleanQuery.add(lQuery, true, true, false);
+ queryIsValid = true;
+ }
+ return queryIsValid;
+}
- case QHelpSearchQuery::ATLEAST: {
- foreach (const QString &term, query.wordList) {
- if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer))
- return false;
- }
- }
+bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query,
+ const QString &fieldName, bool allTermsRequired,
+ QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ bool queryIsValid = false;
+ foreach (const QString &term, query.wordList) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer);
+ if (lQuery) {
+ booleanQuery.add(lQuery, true, allTermsRequired, false);
+ queryIsValid = true;
}
}
-
- return true;
+ return queryIsValid;
}
-bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery,
- const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::addAtLeastQuery(
+ const QHelpSearchQuery &query, const QString &fieldName,
+ QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer)
{
- bool retVal = false;
- foreach (const QHelpSearchQuery query, queryList) {
- switch (query.fieldName) {
- default: break;
- case QHelpSearchQuery::DEFAULT: {
- foreach (const QString &term, query.wordList) {
- QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
- QLatin1String("content"), analyzer);
-
- if (query) {
- retVal = true;
- booleanQuery.add(query, true, false, false);
- }
- }
- } break;
+ bool queryIsValid = false;
+ foreach (const QString &term, query.wordList) {
+ if (!term.isEmpty()) {
+ QCLuceneQuery *lQuery =
+ QCLuceneQueryParser::parse(term, fieldName, analyzer);
+ if (lQuery) {
+ booleanQuery.add(lQuery, true, false, false);
+ queryIsValid = true;
+ }
}
}
- return retVal;
+ return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAttributesQuery(
+ const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneAnalyzer &analyzer)
+{
+ QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+")
+ + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer);
+ if (!lQuery)
+ return false;
+ booleanQuery.add(lQuery, true, true, false);
+ return true;
}
void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine,
@@ -336,21 +424,22 @@ void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engin
QCLuceneStandardAnalyzer analyzer;
QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
- joinedQuery, QLatin1String("content"), analyzer);
+ joinedQuery, ContentField, analyzer);
if (parsedQuery) {
joinedQuery = parsedQuery->toString();
delete parsedQuery;
}
- int length = QString(QLatin1String("content:")).length();
- int index = joinedQuery.indexOf(QLatin1String("content:"));
+ const QString contentString(ContentField + QLatin1String(":"));
+ int length = contentString.length();
+ int index = joinedQuery.indexOf(contentString);
QString term;
int nextIndex = 0;
QStringList searchTerms;
while (index != -1) {
- nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1);
+ nextIndex = joinedQuery.indexOf(contentString, index + 1);
term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
if (term.startsWith(QLatin1String("\""))
&& term.endsWith(QLatin1String("\""))) {