/****************************************************************************
**
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the Qt Assistant of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** No Commercial Usage
** This file contains pre-release code and may not be distributed.
** You may use this file in accordance with the terms and conditions
** contained in the either Technology Preview License Agreement or the
** Beta Release License Agreement.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain
** additional rights. These rights are described in the Nokia Qt LGPL
** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
** package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
** $QT_END_LICENSE$
**
****************************************************************************/

#include "qhelpenginecore.h"
#include "qhelp_global.h"
#include "fulltextsearch/qhits_p.h"
#include "fulltextsearch/qquery_p.h"
#include "fulltextsearch/qanalyzer_p.h"
#include "fulltextsearch/qdocument_p.h"
#include "fulltextsearch/qsearchable_p.h"
#include "fulltextsearch/qindexreader_p.h"
#include "fulltextsearch/qindexwriter_p.h"
#include "qhelpsearchindexwriter_clucene_p.h"

#include <QtCore/QDir>
#include <QtCore/QString>
#include <QtCore/QFileInfo>
#include <QtCore/QTextCodec>
#include <QtCore/QTextStream>

#include <QtNetwork/QLocalSocket>
#include <QtNetwork/QLocalServer>

QT_BEGIN_NAMESPACE

namespace qt {
    namespace fulltextsearch {
        namespace clucene {

class DocumentHelper
{
public:
    DocumentHelper(const QString& fileName, const QByteArray &data)
        : fileName(fileName) , data(readData(data)) {}
    ~DocumentHelper() {}

    bool addFieldsToDocument(QCLuceneDocument *document,
        const QString &namespaceName, const QString &attributes = QString())
    {
        if (!document)
            return false;

        if(!data.isEmpty()) {
            QString parsedData = parseData();
            QString parsedTitle = QHelpGlobal::documentTitle(data);

            if(!parsedData.isEmpty()) {
                document->add(new QCLuceneField(QLatin1String("content"),
                    parsedData,QCLuceneField::INDEX_TOKENIZED));
                document->add(new QCLuceneField(QLatin1String("path"), fileName,
                    QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
                document->add(new QCLuceneField(QLatin1String("title"), parsedTitle,
                    QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
                document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle,
                    QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
                document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName,
                    QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
                document->add(new QCLuceneField(QLatin1String("attribute"), attributes,
                    QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
                return true;
            }
        }

        return false;
    }

private:
    QString readData(const QByteArray &data)
    {
        QTextStream textStream(data);
        QByteArray charSet = QHelpGlobal::charsetFromData(data).toLatin1();
        textStream.setCodec(QTextCodec::codecForName(charSet.constData()));

        QString stream = textStream.readAll();
        if (stream.isNull() || stream.isEmpty())
            return QString();

        return stream;
    }

    QString parseData() const
    {
        const int length = data.length();
        const QChar *buf = data.unicode();

        QString parsedContent;
        parsedContent.reserve(length);

        bool valid = true;
        int j = 0, count = 0;

        QChar c;
        while (j < length) {
            c = buf[j++];
            if (c == QLatin1Char('<') || c == QLatin1Char('&')) {
                if (count > 1)
                    parsedContent.append(QLatin1Char(' '));
                count = 0;
                valid = false;
                continue;
            }
            if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) {
                valid = true;
                continue;
            }
            if (!valid)
                continue;

            if (c.isLetterOrNumber() || c.isPrint()) {
                ++count;
                parsedContent.append(c.toLower());
            } else {
                if (count > 1)
                    parsedContent.append(QLatin1Char(' '));
                count = 0;
            }
        }

        return parsedContent;
    }

private:
    QString fileName;
    QString data;
};


QHelpSearchIndexWriter::QHelpSearchIndexWriter()
    : QThread(0)
    , m_cancel(false)
{
    // nothing todo
}

QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
{
    mutex.lock();
    this->m_cancel = true;
    waitCondition.wakeOne();
    mutex.unlock();

    wait();
}

void QHelpSearchIndexWriter::cancelIndexing()
{
    mutex.lock();
    this->m_cancel = true;
    mutex.unlock();
}

void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
    const QString &indexFilesFolder, bool reindex)
{
    mutex.lock();
    this->m_cancel = false;
    this->m_reindex = reindex;
    this->m_collectionFile = collectionFile;
    this->m_indexFilesFolder = indexFilesFolder;
    mutex.unlock();

    start(QThread::NormalPriority);
}

void QHelpSearchIndexWriter::optimizeIndex()
{
    if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) {
        if (QCLuceneIndexReader::isLocked(m_indexFilesFolder))
            return;

        QCLuceneStandardAnalyzer analyzer;
        QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false);
        writer.optimize();
        writer.close();
    }
}

void QHelpSearchIndexWriter::run()
{
    QMutexLocker mutexLocker(&mutex);

    if (m_cancel)
        return;

    const bool reindex = this->m_reindex;
    const QString collectionFile(this->m_collectionFile);

    mutexLocker.unlock();

    QHelpEngineCore engine(collectionFile, 0);
    if (!engine.setupData())
        return;

    const QLatin1String key("CluceneIndexedNamespaces");
    if (reindex)
        engine.setCustomValue(key, QLatin1String(""));

    QMap<QString, QDateTime> indexMap;
    const QLatin1String oldKey("CluceneSearchNamespaces");
    if (!engine.customValue(oldKey, QString()).isNull()) {
        // old style qhc file < 4.4.2, need to convert...
        const QStringList indexedNamespaces = engine.customValue(oldKey).
            toString().split(QLatin1String("|"), QString::SkipEmptyParts);
        foreach (const QString& nameSpace, indexedNamespaces)
            indexMap.insert(nameSpace, QDateTime());
        engine.removeCustomValue(oldKey);
    } else {
        QDataStream dataStream(engine.customValue(key).toByteArray());
        dataStream >> indexMap;
    }

    QString indexPath = m_indexFilesFolder;

    QFileInfo fInfo(indexPath);
    if (fInfo.exists() && !fInfo.isWritable()) {
        qWarning("Full Text Search, could not create index (missing permissions).");
        return;
    }

    emit indexingStarted();

    QCLuceneIndexWriter *writer = 0;
    QCLuceneStandardAnalyzer analyzer;
    const QStringList registeredDocs = engine.registeredDocumentations();

    QLocalSocket localSocket;
    localSocket.connectToServer(QString(QLatin1String("QtAssistant%1"))
        .arg(QLatin1String(QT_VERSION_STR)));

    QLocalServer localServer;
    bool otherInstancesRunning = true;
    if (!localSocket.waitForConnected()) {
        otherInstancesRunning = false;
        localServer.listen(QString(QLatin1String("QtAssistant%1"))
            .arg(QLatin1String(QT_VERSION_STR)));
    }

#if !defined(QT_NO_EXCEPTIONS)
    try {
#endif
        // check if it's locked, and if the other instance is running
        if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath))
            QCLuceneIndexReader::unlock(indexPath);

        if (QCLuceneIndexReader::isLocked(indexPath)) {
            // poll unless indexing finished to fake progress
            while (QCLuceneIndexReader::isLocked(indexPath)) {
                mutexLocker.relock();
                if (m_cancel)
                    break;
                mutexLocker.unlock();
                this->sleep(1);
            }
            emit indexingFinished();
            return;
        }

        if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) {
            foreach(const QString& namespaceName, registeredDocs) {
                mutexLocker.relock();
                if (m_cancel) {
                    emit indexingFinished();
                    return;
                }
                mutexLocker.unlock();

                if (!indexMap.contains(namespaceName)) {
                    // make sure we remove some partly indexed stuff
                    removeDocuments(indexPath, namespaceName);
                } else {
                    QString path = engine.documentationFileName(namespaceName);
                    if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
                        // make sure we remove some outdated indexed stuff
                        indexMap.remove(namespaceName);
                        removeDocuments(indexPath, namespaceName);
                    }

                    if (indexMap.contains(namespaceName)) {
                        // make sure we really have content indexed for namespace
                        // NOTE: Extra variable just for GCC 3.3.5
                        QLatin1String key("namespace");
                        QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName));
                        QCLuceneIndexSearcher indexSearcher(indexPath);
                        QCLuceneHits hits = indexSearcher.search(query);
                        if (hits.length() <= 0)
                            indexMap.remove(namespaceName);
                    }
                }
            }
            writer = new QCLuceneIndexWriter(indexPath, analyzer, false);
        } else {
            indexMap.clear();
            writer = new QCLuceneIndexWriter(indexPath, analyzer, true);
        }
#if !defined(QT_NO_EXCEPTIONS)
    } catch (...) {
        qWarning("Full Text Search, could not create index writer.");
        return;
    }
#endif

    writer->setMergeFactor(100);
    writer->setMinMergeDocs(1000);
    writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH);

    QStringList namespaces;
    foreach(const QString& namespaceName, registeredDocs) {
        mutexLocker.relock();
        if (m_cancel) {
            writer->close();
            delete writer;
            emit indexingFinished();
            return;
        }
        mutexLocker.unlock();

        namespaces.append(namespaceName);
        if (indexMap.contains(namespaceName))
            continue;

        const QList<QStringList> attributeSets =
            engine.filterAttributeSets(namespaceName);

        if (attributeSets.isEmpty()) {
            const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName,
                QStringList());
            if (!addDocuments(docFiles, engine, QStringList(), namespaceName,
                writer, analyzer))
                break;
        } else {
            bool bail = false;
            foreach (const QStringList& attributes, attributeSets) {
                const QList<QUrl> docFiles = indexableFiles(&engine,
                    namespaceName, attributes);
                if (!addDocuments(docFiles, engine, attributes, namespaceName,
                    writer, analyzer)) {
                    bail = true;
                    break;
                }
            }
            if (bail)
                break;
        }

        mutexLocker.relock();
        if (!m_cancel) {
            QString path(engine.documentationFileName(namespaceName));
            indexMap.insert(namespaceName, QFileInfo(path).lastModified());
            writeIndexMap(engine, indexMap);
        }
        mutexLocker.unlock();
    }

    writer->close();
    delete writer;

    mutexLocker.relock();
    if (!m_cancel) {
        mutexLocker.unlock();

        QStringList indexedNamespaces = indexMap.keys();
        foreach(const QString& namespaceName, indexedNamespaces) {
            mutexLocker.relock();
            if (m_cancel)
                break;
            mutexLocker.unlock();

            if (!namespaces.contains(namespaceName)) {
                indexMap.remove(namespaceName);
                writeIndexMap(engine, indexMap);
                removeDocuments(indexPath, namespaceName);
            }
        }
    }
    emit indexingFinished();
}

bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles,
    const QHelpEngineCore &engine, const QStringList &attributes,
    const QString &namespaceName, QCLuceneIndexWriter *writer,
    QCLuceneAnalyzer &analyzer)
{
    QMutexLocker locker(&mutex);
    const QString attrList = attributes.join(QLatin1String(" "));

    locker.unlock();
    foreach(const QUrl& url, docFiles) {
        QCLuceneDocument document;
        DocumentHelper helper(url.toString(), engine.fileData(url));
        if (helper.addFieldsToDocument(&document, namespaceName, attrList))
            writer->addDocument(document, analyzer);

        locker.relock();
        if (m_cancel)
            return false;
        locker.unlock();
    }

    return true;
}

void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath,
    const QString &namespaceName)
{
    if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath))
        return;

    QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath);
    reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"),
        namespaceName));

    reader.close();
}

bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore& engine,
    const QMap<QString, QDateTime>& indexMap)
{
    QByteArray bArray;

    QDataStream data(&bArray, QIODevice::ReadWrite);
    data << indexMap;

    return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"),
        bArray);
}

QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine,
    const QString &namespaceName, const QStringList &attributes) const
{
    QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes,
        QLatin1String("html"));
    docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm"));
    docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt"));

    return docFiles;
}


        }   // namespace clucene
    }   // namespace fulltextsearch
}   // namespace qt

QT_END_NAMESPACE