diff options
Diffstat (limited to 'tools/assistant/lib')
-rw-r--r-- | tools/assistant/lib/lib.pro | 5 | ||||
-rw-r--r-- | tools/assistant/lib/qclucenefieldnames.cpp | 59 | ||||
-rw-r--r-- | tools/assistant/lib/qclucenefieldnames_p.h | 65 | ||||
-rw-r--r-- | tools/assistant/lib/qhelp_global.cpp | 14 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpcollectionhandler.cpp | 4 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpenginecore.cpp | 5 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpgenerator.cpp | 65 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpgenerator_p.h | 1 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpsearchindexreader_clucene.cpp | 419 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpsearchindexreader_clucene_p.h | 45 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp | 20 | ||||
-rw-r--r-- | tools/assistant/lib/qhelpsearchquerywidget.cpp | 39 |
12 files changed, 507 insertions, 234 deletions
diff --git a/tools/assistant/lib/lib.pro b/tools/assistant/lib/lib.pro index 51933de..26d3456 100644 --- a/tools/assistant/lib/lib.pro +++ b/tools/assistant/lib/lib.pro @@ -23,7 +23,6 @@ unix:QMAKE_PKGCONFIG_REQUIRES += QtNetwork \ QtSql \ QtXml LIBS_PRIVATE += -l$$qclucene - RESOURCES += helpsystem.qrc SOURCES += qhelpenginecore.cpp \ qhelpengine.cpp \ @@ -41,6 +40,7 @@ SOURCES += qhelpenginecore.cpp \ qhelpsearchindexwriter_default.cpp \ qhelpsearchindexreader_default.cpp \ qhelpsearchindexreader.cpp \ + qclucenefieldnames.cpp \ qhelp_global.cpp # access to clucene @@ -63,7 +63,8 @@ HEADERS += qhelpenginecore.h \ qhelpsearchindex_default_p.h \ qhelpsearchindexwriter_default_p.h \ qhelpsearchindexreader_default_p.h \ - qhelpsearchindexreader_p.h + qhelpsearchindexreader_p.h \ + qclucenefieldnames_p.h # access to clucene HEADERS += qhelpsearchindexwriter_clucene_p.h \ diff --git a/tools/assistant/lib/qclucenefieldnames.cpp b/tools/assistant/lib/qclucenefieldnames.cpp new file mode 100644 index 0000000..84e3a1a --- /dev/null +++ b/tools/assistant/lib/qclucenefieldnames.cpp @@ -0,0 +1,59 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the Qt Assistant of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qclucenefieldnames_p.h" + +QT_BEGIN_NAMESPACE + +namespace qt { +namespace fulltextsearch { +namespace clucene { +const QString AttributeField(QLatin1String("attribute")); +const QString ContentField(QLatin1String("content")); +const QString NamespaceField(QLatin1String("namespace")); +const QString PathField(QLatin1String("path")); +const QString TitleField(QLatin1String("title")); +const QString TitleTokenizedField(QLatin1String("titleTokenized")); +} // namespace clucene +} // namespace fulltextsearch +} // namespace qt + +QT_END_NAMESPACE diff --git a/tools/assistant/lib/qclucenefieldnames_p.h b/tools/assistant/lib/qclucenefieldnames_p.h new file mode 100644 index 0000000..489832f --- /dev/null +++ b/tools/assistant/lib/qclucenefieldnames_p.h @@ -0,0 +1,65 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the Qt Assistant of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QCLUCENEFIELDNAMES_P_H +#define QCLUCENEFIELDNAMES_P_H + +#include <QtCore/QtGlobal> +#include <QtCore/QString> + +QT_BEGIN_NAMESPACE + +namespace qt { +namespace fulltextsearch { +namespace clucene { + extern const QString AttributeField; + extern const QString ContentField; + extern const QString NamespaceField; + extern const QString PathField; + extern const QString TitleField; + extern const QString TitleTokenizedField; +} // namespace clucene +} // namespace fulltextsearch +} // namespace qt + +QT_END_NAMESPACE + +#endif // QCLUCENEFIELDNAMES_P_H diff --git a/tools/assistant/lib/qhelp_global.cpp b/tools/assistant/lib/qhelp_global.cpp index 749d2c9..332704a 100644 --- a/tools/assistant/lib/qhelp_global.cpp +++ b/tools/assistant/lib/qhelp_global.cpp @@ -86,17 +86,17 @@ QString QHelpGlobal::codecFromData(const QByteArray &data) QString QHelpGlobal::codecFromHtmlData(const QByteArray &data) { - QString content = QString::fromUtf8(data.constData(), data.size()); - int start = content.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive); + QString head = QString::fromUtf8(data.constData(), qMin(1000, data.size())); + int start = head.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive); if (start > 0) { int end; QRegExp r(QLatin1String("charset=([^\"\\s]+)")); while (start != -1) { - end = content.indexOf(QLatin1Char('>'), start) + 1; - const QString &meta = content.mid(start, end - start).toLower(); + end = head.indexOf(QLatin1Char('>'), start) + 1; + const QString &meta = head.mid(start, end - start).toLower(); if (r.indexIn(meta) != -1) return r.cap(1); - start = content.indexOf(QLatin1String("<meta"), end, + start = head.indexOf(QLatin1String("<meta"), end, Qt::CaseInsensitive); } } @@ -105,8 +105,8 @@ QString QHelpGlobal::codecFromHtmlData(const QByteArray &data) QString QHelpGlobal::codecFromXmlData(const QByteArray &data) { - QString content = QString::fromUtf8(data.constData(), data.size()); + QString head = QString::fromUtf8(data.constData(), qMin(1000, data.size())); const QRegExp encodingExp(QLatin1String("^\\s*<\\?xml version=" "\"\\d\\.\\d\" encoding=\"([^\"]+)\"\\?>.*")); - return encodingExp.exactMatch(content) ? encodingExp.cap(1) : QString(); + return encodingExp.exactMatch(head) ? encodingExp.cap(1) : QString(); } diff --git a/tools/assistant/lib/qhelpcollectionhandler.cpp b/tools/assistant/lib/qhelpcollectionhandler.cpp index 4aa7ab6..9092259 100644 --- a/tools/assistant/lib/qhelpcollectionhandler.cpp +++ b/tools/assistant/lib/qhelpcollectionhandler.cpp @@ -308,10 +308,8 @@ bool QHelpCollectionHandler::addCustomFilter(const QString &filterName, m_query.prepare(QLatin1String("SELECT Id FROM FilterNameTable WHERE Name=?")); m_query.bindValue(0, filterName); m_query.exec(); - while (m_query.next()) { + if (m_query.next()) nameId = m_query.value(0).toInt(); - break; - } m_query.exec(QLatin1String("SELECT Id, Name FROM FilterAttributeTable")); QStringList idsToInsert = attributes; diff --git a/tools/assistant/lib/qhelpenginecore.cpp b/tools/assistant/lib/qhelpenginecore.cpp index bd150d6..0510fd5 100644 --- a/tools/assistant/lib/qhelpenginecore.cpp +++ b/tools/assistant/lib/qhelpenginecore.cpp @@ -406,8 +406,9 @@ QStringList QHelpEngineCore::customFilters() const /*! Adds the new custom filter \a filterName. The filter attributes - are specified by \a attributes. The function returns false if - the filter can not be added, e.g. when the filter already exists. + are specified by \a attributes. If the filter already exists, + its attribute set is replaced. The function returns true if + the operation succeeded, otherwise it returns false. \sa customFilters(), removeCustomFilter() */ diff --git a/tools/assistant/lib/qhelpgenerator.cpp b/tools/assistant/lib/qhelpgenerator.cpp index 48d73aa..1bb4cc8 100644 --- a/tools/assistant/lib/qhelpgenerator.cpp +++ b/tools/assistant/lib/qhelpgenerator.cpp @@ -47,6 +47,7 @@ #include <QtCore/QFileInfo> #include <QtCore/QDir> #include <QtCore/QDebug> +#include <QtCore/QSet> #include <QtCore/QVariant> #include <QtCore/QDateTime> #include <QtCore/QTextCodec> @@ -824,4 +825,68 @@ bool QHelpGenerator::insertMetaData(const QMap<QString, QVariant> &metaData) return true; } +bool QHelpGenerator::checkLinks(const QHelpDataInterface &helpData) +{ + /* + * Step 1: Gather the canoncal file paths of all files in the project. + * We use a set, because there will be a lot of look-ups. + */ + QSet<QString> files; + foreach (const QHelpDataFilterSection &filterSection, helpData.filterSections()) { + foreach (const QString &file, filterSection.files()) { + QFileInfo fileInfo(helpData.rootPath() + QDir::separator() + file); + const QString &canonicalFileName = fileInfo.canonicalFilePath(); + if (!fileInfo.exists()) + emit warning(tr("File '%1' does not exist.").arg(file)); + else + files.insert(canonicalFileName); + } + } + + /* + * Step 2: Check the hypertext and image references of all HTML files. + * Note that we don't parse the files, but simply grep for the + * respective HTML elements. Therefore. contents that are e.g. + * commented out can cause false warning. + */ + bool allLinksOk = true; + foreach (const QString &fileName, files) { + if (!fileName.endsWith(QLatin1String("html")) + && !fileName.endsWith(QLatin1String("htm"))) + continue; + QFile htmlFile(fileName); + if (!htmlFile.open(QIODevice::ReadOnly)) { + emit warning(tr("File '%1' cannot be opened.").arg(fileName)); + continue; + } + const QRegExp linkPattern(QLatin1String("<(?:a href|img src)=\"?([^#\">]+)[#\">]")); + QTextStream stream(&htmlFile); + const QString codec = QHelpGlobal::codecFromData(htmlFile.read(1000)); + stream.setCodec(QTextCodec::codecForName(codec.toLatin1().constData())); + const QString &content = stream.readAll(); + QStringList invalidLinks; + for (int pos = linkPattern.indexIn(content); pos != -1; + pos = linkPattern.indexIn(content, pos + 1)) { + const QString& linkedFileName = linkPattern.cap(1); + if (linkedFileName.contains(QLatin1String("://"))) + continue; + const QString curDir = QFileInfo(fileName).dir().path(); + const QString &canonicalLinkedFileName = + QFileInfo(curDir + QDir::separator() + linkedFileName).canonicalFilePath(); + if (!files.contains(canonicalLinkedFileName) + && !invalidLinks.contains(canonicalLinkedFileName)) { + emit warning(tr("File '%1' contains an invalid link to file '%2'"). + arg(fileName).arg(linkedFileName)); + allLinksOk = false; + invalidLinks.append(canonicalLinkedFileName); + } + } + } + + if (!allLinksOk) + d->error = tr("Invalid links in HTML files."); + return allLinksOk; +} + QT_END_NAMESPACE + diff --git a/tools/assistant/lib/qhelpgenerator_p.h b/tools/assistant/lib/qhelpgenerator_p.h index 849e724..77189b0 100644 --- a/tools/assistant/lib/qhelpgenerator_p.h +++ b/tools/assistant/lib/qhelpgenerator_p.h @@ -74,6 +74,7 @@ public: bool generate(QHelpDataInterface *helpData, const QString &outputFileName); + bool checkLinks(const QHelpDataInterface &helpData); QString error() const; Q_SIGNALS: diff --git a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp index 954f41f..e180375 100644 --- a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp +++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp @@ -39,16 +39,19 @@ ** ****************************************************************************/ -#include "qhelpenginecore.h" -#include "fulltextsearch/qsearchable_p.h" -#include "fulltextsearch/qqueryparser_p.h" #include "fulltextsearch/qindexreader_p.h" +#include "fulltextsearch/qqueryparser_p.h" +#include "fulltextsearch/qsearchable_p.h" +#include "qclucenefieldnames_p.h" +#include "qhelpenginecore.h" + #include "qhelpsearchindexreader_clucene_p.h" #include <QtCore/QDir> #include <QtCore/QSet> #include <QtCore/QString> #include <QtCore/QFileInfo> +#include <QtCore/QSharedPointer> #include <QtCore/QStringList> #include <QtCore/QTextStream> #include <QtCore/QMutexLocker> @@ -108,64 +111,88 @@ void QHelpSearchIndexReaderClucene::run() #if !defined(QT_NO_EXCEPTIONS) try { #endif - QCLuceneBooleanQuery booleanQuery; + QCLuceneBooleanQuery booleanQueryTitle; + QCLuceneBooleanQuery booleanQueryContent; QCLuceneStandardAnalyzer analyzer; - if (!buildQuery(booleanQuery, queryList, analyzer)) { + const QStringList& attribList = + engine.filterAttributes(engine.currentFilter()); + bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField, + attribList, booleanQueryTitle, analyzer); + bool contentQueryIsValid = buildQuery(queryList, ContentField, + attribList, booleanQueryContent, analyzer); + if (!titleQueryIsValid && !contentQueryIsValid) { emit searchingFinished(0); return; } - const QStringList attribList = engine.filterAttributes(engine.currentFilter()); - if (!attribList.isEmpty()) { - QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") - + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); + QCLuceneIndexSearcher indexSearcher(indexPath); - if (!query) { + // QCLuceneHits object must be allocated on the heap, because + // there is no default constructor. + QSharedPointer<QCLuceneHits> titleHits; + QSharedPointer<QCLuceneHits> contentHits; + if (titleQueryIsValid) { + titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryTitle))); + } + if (contentQueryIsValid) { + contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryContent))); + } + bool boost = true; + if ((titleHits.isNull() || titleHits->length() == 0) + && (contentHits.isNull() || contentHits->length() == 0)) { + booleanQueryTitle = QCLuceneBooleanQuery(); + booleanQueryContent = QCLuceneBooleanQuery(); + titleQueryIsValid = + buildTryHarderQuery(queryList, TitleTokenizedField, + attribList, booleanQueryTitle, analyzer); + contentQueryIsValid = + buildTryHarderQuery(queryList, ContentField, attribList, + booleanQueryContent, analyzer); + if (!titleQueryIsValid && !contentQueryIsValid) { emit searchingFinished(0); return; } - booleanQuery.add(query, true, true, false); - } - - QCLuceneIndexSearcher indexSearcher(indexPath); - QCLuceneHits hits = indexSearcher.search(booleanQuery); - - bool boost = true; - QCLuceneBooleanQuery tryHarderQuery; - if (hits.length() == 0) { - if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) { - if (!attribList.isEmpty()) { - QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") - + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), - analyzer); - tryHarderQuery.add(query, true, true, false); - } - hits = indexSearcher.search(tryHarderQuery); - boost = (hits.length() == 0); + if (titleQueryIsValid) { + titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryTitle))); + } + if (contentQueryIsValid) { + contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( + indexSearcher.search(booleanQueryContent))); } + boost = false; } + QList<QSharedPointer<QCLuceneHits> > cluceneHitsList; + if (!titleHits.isNull()) + cluceneHitsList.append(titleHits); + if (!contentHits.isNull()) + cluceneHitsList.append(contentHits); QSet<QString> pathSet; QCLuceneDocument document; const QStringList namespaceList = engine.registeredDocumentations(); - for (qint32 i = 0; i < hits.length(); i++) { - document = hits.document(i); - const QString path = document.get(QLatin1String("path")); - if (!pathSet.contains(path) && namespaceList.contains( - document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { - pathSet.insert(path); - hitList.append(qMakePair(path, document.get(QLatin1String("title")))); - } - document.clear(); + foreach (QSharedPointer<QCLuceneHits> hits, cluceneHitsList) { + for (qint32 i = 0; i < hits->length(); i++) { + document = hits->document(i); + const QString path = document.get(PathField); + if (!pathSet.contains(path) && namespaceList.contains( + document.get(NamespaceField), Qt::CaseInsensitive)) { + pathSet.insert(path); + hitList.append(qMakePair(path, document.get(TitleField))); + } + document.clear(); - mutex.lock(); - if (m_cancel) { + mutex.lock(); + if (m_cancel) { + mutex.unlock(); + emit searchingFinished(0); + return; + } mutex.unlock(); - emit searchingFinished(0); - return; } - mutex.unlock(); } indexSearcher.close(); @@ -185,144 +212,205 @@ void QHelpSearchIndexReaderClucene::run() } } -bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, - QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::buildQuery( + const QList<QHelpSearchQuery> &queries, const QString &fieldName, + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) { - const QLatin1String c("content"); - const QLatin1String t("titleTokenized"); - - QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); - QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); - if (query && query2) { - booleanQuery.add(query, true, false, false); - booleanQuery.add(query2, true, false, false); - return true; + bool queryIsValid = false; + foreach (const QHelpSearchQuery &query, queries) { + if (fieldName != ContentField && isNegativeQuery(query)) { + queryIsValid = false; + break; + } + switch (query.fieldName) { + case QHelpSearchQuery::FUZZY: + if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer)) + queryIsValid = true; + break; + case QHelpSearchQuery::WITHOUT: + if (fieldName != ContentField) + return false; + if (addWithoutQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::PHRASE: + if (addPhraseQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::ALL: + if (addAllQuery(query, fieldName, booleanQuery)) + queryIsValid = true; + break; + case QHelpSearchQuery::DEFAULT: + if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer)) + queryIsValid = true; + break; + case QHelpSearchQuery::ATLEAST: + if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer)) + queryIsValid = true; + break; + default: + Q_ASSERT(!"Invalid field name"); + } } - return false; + if (queryIsValid && !filterAttributes.isEmpty()) { + queryIsValid = + addAttributesQuery(filterAttributes, booleanQuery, analyzer); + } + + return queryIsValid; } -bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery, - const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::buildTryHarderQuery( + const QList<QHelpSearchQuery> &queries, const QString &fieldName, + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) { - foreach (const QHelpSearchQuery query, queryList) { - switch (query.fieldName) { - case QHelpSearchQuery::FUZZY: { - const QLatin1String fuzzy("~"); - foreach (const QString &term, query.wordList) { - if (term.isEmpty() - || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) { - return false; - } - } - } break; - - case QHelpSearchQuery::WITHOUT: { - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, query.wordList) { - if (stopWords.contains(term, Qt::CaseInsensitive)) - continue; - - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); - QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("titleTokenized"), term.toLower())); - - if (query && query2) { - booleanQuery.add(query, true, false, true); - booleanQuery.add(query2, true, false, true); - } else { - return false; - } - } - } break; - - case QHelpSearchQuery::PHRASE: { - const QString &term = query.wordList.at(0).toLower(); - if (term.contains(QLatin1Char(' '))) { - QStringList termList = term.split(QLatin1String(" ")); - QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, termList) { - if (!stopWords.contains(term, Qt::CaseInsensitive)) - q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower())); - } - booleanQuery.add(q, true, true, false); - } else { - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); - QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("titleTokenized"), term.toLower())); - - if (query && query2) { - booleanQuery.add(query, true, true, false); - booleanQuery.add(query2, true, false, false); - } else { - return false; - } - } - } break; + if (queries.isEmpty()) + return false; + const QHelpSearchQuery &query = queries.front(); + if (query.fieldName != QHelpSearchQuery::DEFAULT) + return false; + if (isNegativeQuery(query)) + return false; + if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer)) + return false; + if (filterAttributes.isEmpty()) + return true; + return addAttributesQuery(filterAttributes, booleanQuery, analyzer); +} - case QHelpSearchQuery::ALL: { - QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); - foreach (const QString &term, query.wordList) { - if (stopWords.contains(term, Qt::CaseInsensitive)) - continue; +bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const +{ + const QString &search = query.wordList.join(" "); + return search.contains('!') || search.contains('-') + || search.contains(QLatin1String(" NOT ")); +} - QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( - QLatin1String("content"), term.toLower())); +bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + bool queryIsValid = false; + const QLatin1String fuzzy("~"); + foreach (const QString &term, query.wordList) { + if (!term.isEmpty()) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer); + if (lQuery != 0) { + booleanQuery.add(lQuery, true, false, false); + queryIsValid = true; + } + } + } + return queryIsValid; +} - if (query) { - booleanQuery.add(query, true, true, false); - } else { - return false; - } - } - } break; +bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, false, true); + queryIsValid = true; + } + return queryIsValid; +} - case QHelpSearchQuery::DEFAULT: { - foreach (const QString &term, query.wordList) { - QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), - QLatin1String("content"), analyzer); +bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QString &term = query.wordList.at(0).toLower(); + if (term.contains(QLatin1Char(' '))) { + const QStringList termList = term.split(QLatin1String(" ")); + QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); + const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, termList) { + if (!stopWords.contains(term, Qt::CaseInsensitive)) + q->addTerm(QCLuceneTerm(fieldName, term.toLower())); + } + if (!q->getTerms().isEmpty()) { + booleanQuery.add(q, true, true, false); + queryIsValid = true; + } + } else { + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, true, false); + queryIsValid = true; + } + return queryIsValid; +} - if (query) - booleanQuery.add(query, true, true, false); - } - } break; +bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query, + const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) +{ + bool queryIsValid = false; + const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString &term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( + fieldName, term.toLower())); + booleanQuery.add(lQuery, true, true, false); + queryIsValid = true; + } + return queryIsValid; +} - case QHelpSearchQuery::ATLEAST: { - foreach (const QString &term, query.wordList) { - if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer)) - return false; - } - } +bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query, + const QString &fieldName, bool allTermsRequired, + QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + bool queryIsValid = false; + foreach (const QString &term, query.wordList) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer); + if (lQuery) { + booleanQuery.add(lQuery, true, allTermsRequired, false); + queryIsValid = true; } } - - return true; + return queryIsValid; } -bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery, - const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) +bool QHelpSearchIndexReaderClucene::addAtLeastQuery( + const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer) { - bool retVal = false; - foreach (const QHelpSearchQuery query, queryList) { - switch (query.fieldName) { - default: break; - case QHelpSearchQuery::DEFAULT: { - foreach (const QString &term, query.wordList) { - QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), - QLatin1String("content"), analyzer); - - if (query) { - retVal = true; - booleanQuery.add(query, true, false, false); - } - } - } break; + bool queryIsValid = false; + foreach (const QString &term, query.wordList) { + if (!term.isEmpty()) { + QCLuceneQuery *lQuery = + QCLuceneQueryParser::parse(term, fieldName, analyzer); + if (lQuery) { + booleanQuery.add(lQuery, true, false, false); + queryIsValid = true; + } } } - return retVal; + return queryIsValid; +} + +bool QHelpSearchIndexReaderClucene::addAttributesQuery( + const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer) +{ + QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+") + + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer); + if (!lQuery) + return false; + booleanQuery.add(lQuery, true, true, false); + return true; } void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine, @@ -336,21 +424,22 @@ void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engin QCLuceneStandardAnalyzer analyzer; QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse( - joinedQuery, QLatin1String("content"), analyzer); + joinedQuery, ContentField, analyzer); if (parsedQuery) { joinedQuery = parsedQuery->toString(); delete parsedQuery; } - int length = QString(QLatin1String("content:")).length(); - int index = joinedQuery.indexOf(QLatin1String("content:")); + const QString contentString(ContentField + QLatin1String(":")); + int length = contentString.length(); + int index = joinedQuery.indexOf(contentString); QString term; int nextIndex = 0; QStringList searchTerms; while (index != -1) { - nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1); + nextIndex = joinedQuery.indexOf(contentString, index + 1); term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified(); if (term.startsWith(QLatin1String("\"")) && term.endsWith(QLatin1String("\""))) { diff --git a/tools/assistant/lib/qhelpsearchindexreader_clucene_p.h b/tools/assistant/lib/qhelpsearchindexreader_clucene_p.h index d7b539a..608668f 100644 --- a/tools/assistant/lib/qhelpsearchindexreader_clucene_p.h +++ b/tools/assistant/lib/qhelpsearchindexreader_clucene_p.h @@ -53,16 +53,19 @@ // We mean it. // -#include "qhelpsearchindexreader_p.h" +#include <QtCore/QList> +#include <QtCore/QString> +#include <QtCore/QStringList> #include "fulltextsearch/qanalyzer_p.h" #include "fulltextsearch/qquery_p.h" +#include "qhelpsearchindexreader_p.h" QT_BEGIN_NAMESPACE namespace qt { - namespace fulltextsearch { - namespace clucene { +namespace fulltextsearch { +namespace clucene { class QHelpSearchIndexReaderClucene : public QHelpSearchIndexReader { @@ -74,18 +77,38 @@ public: private: void run(); - bool defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, - QCLuceneStandardAnalyzer &analyzer); - bool buildQuery(QCLuceneBooleanQuery &booleanQuery, const QList<QHelpSearchQuery> &queryList, - QCLuceneStandardAnalyzer &analyzer); - bool buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery, - const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer); void boostSearchHits(const QHelpEngineCore &engine, QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList); + bool buildQuery(const QList<QHelpSearchQuery> &queries, + const QString &fieldName, + const QStringList &filterAttributes, + QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer); + bool buildTryHarderQuery(const QList<QHelpSearchQuery> &queries, + const QString &fieldName, + const QStringList &filterAttributes, + QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer); + bool addFuzzyQuery(const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer); + bool addWithoutQuery(const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery); + bool addPhraseQuery(const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery); + bool addAllQuery(const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery); + bool addDefaultQuery(const QHelpSearchQuery &query, const QString &fieldName, + bool allTermsRequired, QCLuceneBooleanQuery &booleanQuery, + QCLuceneAnalyzer &analyzer); + bool addAtLeastQuery(const QHelpSearchQuery &query, const QString &fieldName, + QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer); + bool addAttributesQuery(const QStringList &filterAttributes, + QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer); + bool isNegativeQuery(const QHelpSearchQuery &query) const; }; - } // namespace clucene - } // namespace fulltextsearch +} // namespace clucene +} // namespace fulltextsearch } // namespace qt QT_END_NAMESPACE diff --git a/tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp b/tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp index b9aedbe..f767e27 100644 --- a/tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp +++ b/tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp @@ -39,6 +39,7 @@ ** ****************************************************************************/ +#include "qclucenefieldnames_p.h" #include "qhelpenginecore.h" #include "qhelp_global.h" #include "fulltextsearch/qhits_p.h" @@ -407,17 +408,17 @@ public: QString parsedTitle = QHelpGlobal::documentTitle(data); if(!parsedData.isEmpty()) { - document->add(new QCLuceneField(QLatin1String("content"), + document->add(new QCLuceneField(ContentField, parsedData,QCLuceneField::INDEX_TOKENIZED)); - document->add(new QCLuceneField(QLatin1String("path"), fileName, + document->add(new QCLuceneField(PathField, fileName, QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); - document->add(new QCLuceneField(QLatin1String("title"), parsedTitle, + document->add(new QCLuceneField(TitleField, parsedTitle, QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); - document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle, + document->add(new QCLuceneField(TitleTokenizedField, parsedTitle, QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); - document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName, + document->add(new QCLuceneField(NamespaceField, namespaceName, QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); - document->add(new QCLuceneField(QLatin1String("attribute"), attributes, + document->add(new QCLuceneField(AttributeField, attributes, QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); return true; } @@ -713,9 +714,7 @@ void QHelpSearchIndexWriter::run() if (indexMap.contains(namespaceName)) { // make sure we really have content indexed for namespace - // NOTE: Extra variable just for GCC 3.3.5 - QLatin1String key("namespace"); - QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName)); + QCLuceneTermQuery query(QCLuceneTerm(NamespaceField, namespaceName)); QCLuceneIndexSearcher indexSearcher(indexPath); QCLuceneHits hits = indexSearcher.search(query); if (hits.length() <= 0) @@ -858,8 +857,7 @@ void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath, return; QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath); - reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"), - namespaceName)); + reader.deleteDocuments(QCLuceneTerm(NamespaceField, namespaceName)); reader.close(); } diff --git a/tools/assistant/lib/qhelpsearchquerywidget.cpp b/tools/assistant/lib/qhelpsearchquerywidget.cpp index f2f40ec..10d5470 100644 --- a/tools/assistant/lib/qhelpsearchquerywidget.cpp +++ b/tools/assistant/lib/qhelpsearchquerywidget.cpp @@ -120,33 +120,6 @@ private: // nothing todo } - QString escapeString(const QString &text) - { - QString retValue = text; - const QString escape(QLatin1String("\\")); - QStringList escapableCharsList; - escapableCharsList << QLatin1String("\\") << QLatin1String("+") - << QLatin1String("-") << QLatin1String("!") << QLatin1String("(") - << QLatin1String(")") << QLatin1String(":") << QLatin1String("^") - << QLatin1String("[") << QLatin1String("]") << QLatin1String("{") - << QLatin1String("}") << QLatin1String("~"); - - // make sure we won't end up with an empty string - foreach (const QString escapeChar, escapableCharsList) { - if (retValue.contains(escapeChar)) - retValue.replace(escapeChar, QLatin1String("")); - } - if (retValue.trimmed().isEmpty()) - return retValue; - - retValue = text; // now realy escape the string... - foreach (const QString escapeChar, escapableCharsList) { - if (retValue.contains(escapeChar)) - retValue.replace(escapeChar, escape + escapeChar); - } - return retValue; - } - QStringList buildTermList(const QString query) { bool s = false; @@ -295,14 +268,14 @@ private slots: #else if (defaultQuery->isEnabled()) { queryList.append(QHelpSearchQuery(QHelpSearchQuery::DEFAULT, - buildTermList(escapeString(defaultQuery->text())))); + buildTermList(defaultQuery->text()))); } else { const QRegExp exp(QLatin1String("\\s+")); QStringList lst = similarQuery->text().split(exp, QString::SkipEmptyParts); if (!lst.isEmpty()) { QStringList fuzzy; foreach (const QString term, lst) - fuzzy += buildTermList(escapeString(term)); + fuzzy += buildTermList(term); queryList.append(QHelpSearchQuery(QHelpSearchQuery::FUZZY, fuzzy)); } @@ -310,13 +283,13 @@ private slots: if (!lst.isEmpty()) { QStringList without; foreach (const QString term, lst) - without.append(escapeString(term)); + without.append(term); queryList.append(QHelpSearchQuery(QHelpSearchQuery::WITHOUT, without)); } if (!exactQuery->text().isEmpty()) { QString phrase = exactQuery->text().remove(QLatin1Char('\"')); - phrase = escapeString(phrase.simplified()); + phrase = phrase.simplified(); queryList.append(QHelpSearchQuery(QHelpSearchQuery::PHRASE, QStringList(phrase))); } @@ -324,7 +297,7 @@ private slots: if (!lst.isEmpty()) { QStringList all; foreach (const QString term, lst) - all.append(escapeString(term)); + all.append(term); queryList.append(QHelpSearchQuery(QHelpSearchQuery::ALL, all)); } @@ -332,7 +305,7 @@ private slots: if (!lst.isEmpty()) { QStringList atLeast; foreach (const QString term, lst) - atLeast += buildTermList(escapeString(term)); + atLeast += buildTermList(term); queryList.append(QHelpSearchQuery(QHelpSearchQuery::ATLEAST, atLeast)); } } |