summaryrefslogtreecommitdiffstats
path: root/tools/assistant/compat/index.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/assistant/compat/index.cpp')
-rw-r--r--tools/assistant/compat/index.cpp581
1 files changed, 0 insertions, 581 deletions
diff --git a/tools/assistant/compat/index.cpp b/tools/assistant/compat/index.cpp
deleted file mode 100644
index ff54626..0000000
--- a/tools/assistant/compat/index.cpp
+++ /dev/null
@@ -1,581 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
-** All rights reserved.
-** Contact: Nokia Corporation (qt-info@nokia.com)
-**
-** This file is part of the Qt Assistant of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** No Commercial Usage
-** This file contains pre-release code and may not be distributed.
-** You may use this file in accordance with the terms and conditions
-** contained in the Technology Preview License Agreement accompanying
-** this package.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 2.1 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 2.1 requirements
-** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
-**
-** In addition, as a special exception, Nokia gives you certain additional
-** rights. These rights are described in the Nokia Qt LGPL Exception
-** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
-**
-** If you have questions regarding the use of this file, please contact
-** Nokia at qt-info@nokia.com.
-**
-**
-**
-**
-**
-**
-**
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include "index.h"
-
-#include <QFile>
-#include <QDir>
-#include <QStringList>
-#include <QApplication>
-#include <QByteArray>
-#include <QTextStream>
-#include <QtAlgorithms>
-#include <QUrl>
-#include <QTextCodec>
-#include <ctype.h>
-#include <QTextDocument>
-
-QT_BEGIN_NAMESPACE
-
-struct Term {
- Term() : frequency(-1) {}
- Term( const QString &t, int f, QVector<Document> l ) : term( t ), frequency( f ), documents( l ) {}
- QString term;
- int frequency;
- QVector<Document>documents;
- bool operator<( const Term &i2 ) const { return frequency < i2.frequency; }
-};
-
-QDataStream &operator>>( QDataStream &s, Document &l )
-{
- s >> l.docNumber;
- s >> l.frequency;
- return s;
-}
-
-QDataStream &operator<<( QDataStream &s, const Document &l )
-{
- s << (qint16)l.docNumber;
- s << (qint16)l.frequency;
- return s;
-}
-
-Index::Index( const QString &dp, const QString &hp )
- : QObject( 0 ), docPath( dp )
-{
- Q_UNUSED(hp);
-
- alreadyHaveDocList = false;
- lastWindowClosed = false;
- connect( qApp, SIGNAL(lastWindowClosed()),
- this, SLOT(setLastWinClosed()) );
-}
-
-Index::Index( const QStringList &dl, const QString &hp )
- : QObject( 0 )
-{
- Q_UNUSED(hp);
- docList = dl;
- alreadyHaveDocList = true;
- lastWindowClosed = false;
- connect( qApp, SIGNAL(lastWindowClosed()),
- this, SLOT(setLastWinClosed()) );
-}
-
-void Index::setLastWinClosed()
-{
- lastWindowClosed = true;
-}
-
-void Index::setDictionaryFile( const QString &f )
-{
- dictFile = f;
-}
-
-void Index::setDocListFile( const QString &f )
-{
- docListFile = f;
-}
-
-void Index::setDocList( const QStringList &lst )
-{
- docList = lst;
-}
-
-int Index::makeIndex()
-{
- if ( !alreadyHaveDocList )
- setupDocumentList();
- if ( docList.isEmpty() )
- return 1;
- QStringList::Iterator it = docList.begin();
- int steps = docList.count() / 100;
- if ( !steps )
- steps++;
- int prog = 0;
- for ( int i = 0; it != docList.end(); ++it, ++i ) {
- if ( lastWindowClosed ) {
- return -1;
- }
- QUrl url(*it);
- parseDocument( url.toLocalFile(), i );
- if ( i%steps == 0 ) {
- prog++;
- emit indexingProgress( prog );
- }
- }
- return 0;
-}
-
-void Index::setupDocumentList()
-{
- QDir d( docPath );
- QStringList filters;
- filters.append(QLatin1String("*.html"));
- QStringList lst = d.entryList(filters);
- QStringList::ConstIterator it = lst.constBegin();
- for ( ; it != lst.constEnd(); ++it )
- docList.append( QLatin1String("file:") + docPath + QLatin1String("/") + *it );
-}
-
-void Index::insertInDict( const QString &str, int docNum )
-{
- if ( str == QLatin1String("amp") || str == QLatin1String("nbsp"))
- return;
- Entry *e = 0;
- if ( dict.count() )
- e = dict[ str ];
-
- if ( e ) {
- if ( e->documents.last().docNumber != docNum )
- e->documents.append( Document(docNum, 1 ) );
- else
- e->documents.last().frequency++;
- } else {
- dict.insert( str, new Entry( docNum ) );
- }
-}
-
-QString Index::getCharsetForDocument(QFile *file)
-{
- QTextStream s(file);
- QString contents = s.readAll();
-
- QString encoding;
- int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive);
- if (start > 0) {
- int end = contents.indexOf(QLatin1String(">"), start);
- QString meta = contents.mid(start+5, end-start);
- meta = meta.toLower();
- QRegExp r(QLatin1String("charset=([^\"\\s]+)"));
- if (r.indexIn(meta) != -1) {
- encoding = r.cap(1);
- }
- }
-
- file->seek(0);
- if (encoding.isEmpty())
- return QLatin1String("utf-8");
- return encoding;
-}
-
-void Index::parseDocument( const QString &filename, int docNum )
-{
- QFile file( filename );
- if ( !file.open(QFile::ReadOnly) ) {
- qWarning( "can not open file %s", qPrintable(filename) );
- return;
- }
-
- QTextStream s(&file);
- QString en = getCharsetForDocument(&file);
- s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
-
- QString text = s.readAll();
- if (text.isNull())
- return;
-
- bool valid = true;
- const QChar *buf = text.unicode();
- QChar str[64];
- QChar c = buf[0];
- int j = 0;
- int i = 0;
- while ( j < text.length() ) {
- if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
- valid = false;
- if ( i > 1 )
- insertInDict( QString(str,i), docNum );
- i = 0;
- c = buf[++j];
- continue;
- }
- if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
- valid = true;
- c = buf[++j];
- continue;
- }
- if ( !valid ) {
- c = buf[++j];
- continue;
- }
- if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
- str[i] = c.toLower();
- ++i;
- } else {
- if ( i > 1 )
- insertInDict( QString(str,i), docNum );
- i = 0;
- }
- c = buf[++j];
- }
- if ( i > 1 )
- insertInDict( QString(str,i), docNum );
- file.close();
-}
-
-void Index::writeDict()
-{
- QFile f( dictFile );
- if ( !f.open(QFile::WriteOnly ) )
- return;
- QDataStream s( &f );
- for(QHash<QString, Entry *>::Iterator it = dict.begin(); it != dict.end(); ++it) {
- s << it.key();
- s << it.value()->documents.count();
- s << it.value()->documents;
- }
- f.close();
- writeDocumentList();
-}
-
-void Index::writeDocumentList()
-{
- QFile f( docListFile );
- if ( !f.open(QFile::WriteOnly ) )
- return;
- QDataStream s( &f );
- s << docList;
-}
-
-void Index::readDict()
-{
- QFile f( dictFile );
- if ( !f.open(QFile::ReadOnly ) )
- return;
-
- dict.clear();
- QDataStream s( &f );
- QString key;
- int numOfDocs;
- QVector<Document> docs;
- while ( !s.atEnd() ) {
- s >> key;
- s >> numOfDocs;
- docs.resize(numOfDocs);
- s >> docs;
- dict.insert( key, new Entry( docs ) );
- }
- f.close();
- readDocumentList();
-}
-
-void Index::readDocumentList()
-{
- QFile f( docListFile );
- if ( !f.open(QFile::ReadOnly ) )
- return;
- QDataStream s( &f );
- s >> docList;
-}
-
-QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords )
-{
- QList<Term> termList;
- for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) {
- Entry *e = 0;
- if ( (*it).contains(QLatin1Char('*')) ) {
- QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );
- termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) );
- } else if ( dict[ *it ] ) {
- e = dict[ *it ];
- termList.append( Term( *it, e->documents.count(), e->documents ) );
- } else {
- return QStringList();
- }
- }
- if ( !termList.count() )
- return QStringList();
- qSort(termList);
-
- QVector<Document> minDocs = termList.takeFirst().documents;
- for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
- Term *t = &(*it);
- QVector<Document> docs = t->documents;
- for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) {
- bool found = false;
- for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) {
- if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {
- (*minDoc_it).frequency += (*doc_it).frequency;
- found = true;
- break;
- }
- }
- if ( !found )
- minDoc_it = minDocs.erase( minDoc_it );
- else
- ++minDoc_it;
- }
- }
-
- QStringList results;
- qSort( minDocs );
- if ( termSeq.isEmpty() ) {
- for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it)
- results << docList.at((int)(*it).docNumber);
- return results;
- }
-
- QString fileName;
- for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) {
- fileName = docList[ (int)(*it).docNumber ];
- if ( searchForPattern( termSeq, seqWords, fileName ) )
- results << fileName;
- }
- return results;
-}
-
-QString Index::getDocumentTitle( const QString &fullFileName )
-{
- QUrl url(fullFileName);
- QString fileName = url.toLocalFile();
-
- if (documentTitleCache.contains(fileName))
- return documentTitleCache.value(fileName);
-
- QFile file( fileName );
- if ( !file.open( QFile::ReadOnly ) ) {
- qWarning( "cannot open file %s", qPrintable(fileName) );
- return fileName;
- }
- QTextStream s( &file );
- QString text = s.readAll();
-
- int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7;
- int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive);
-
- QString title = tr("Untitled");
- if (end - start > 0) {
- title = text.mid(start, end - start);
- if (Qt::mightBeRichText(title)) {
- QTextDocument doc;
- doc.setHtml(title);
- title = doc.toPlainText();
- }
- }
- documentTitleCache.insert(fileName, title);
- return title;
-}
-
-QStringList Index::getWildcardTerms( const QString &term )
-{
- QStringList lst;
- QStringList terms = split( term );
- QStringList::Iterator iter;
-
- for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) {
- int index = 0;
- bool found = false;
- QString text( it.key() );
- for ( iter = terms.begin(); iter != terms.end(); ++iter ) {
- if ( *iter == QLatin1String("*") ) {
- found = true;
- continue;
- }
- if ( iter == terms.begin() && (*iter)[0] != text[0] ) {
- found = false;
- break;
- }
- index = text.indexOf( *iter, index );
- if ( *iter == terms.last() && index != (int)text.length()-1 ) {
- index = text.lastIndexOf( *iter );
- if ( index != (int)text.length() - (int)(*iter).length() ) {
- found = false;
- break;
- }
- }
- if ( index != -1 ) {
- found = true;
- index += (*iter).length();
- continue;
- } else {
- found = false;
- break;
- }
- }
- if ( found )
- lst << text;
- }
-
- return lst;
-}
-
-QStringList Index::split( const QString &str )
-{
- QStringList lst;
- int j = 0;
- int i = str.indexOf(QLatin1Char('*'), j );
-
- if (str.startsWith(QLatin1String("*")))
- lst << QLatin1String("*");
-
- while ( i != -1 ) {
- if ( i > j && i <= (int)str.length() ) {
- lst << str.mid( j, i - j );
- lst << QLatin1String("*");
- }
- j = i + 1;
- i = str.indexOf(QLatin1Char('*'), j );
- }
-
- int l = str.length() - 1;
- if ( str.mid( j, l - j + 1 ).length() > 0 )
- lst << str.mid( j, l - j + 1 );
-
- return lst;
-}
-
-QVector<Document> Index::setupDummyTerm( const QStringList &terms )
-{
- QList<Term> termList;
- for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) {
- Entry *e = 0;
- if ( dict[ *it ] ) {
- e = dict[ *it ];
- termList.append( Term( *it, e->documents.count(), e->documents ) );
- }
- }
- QVector<Document> maxList(0);
- if ( !termList.count() )
- return maxList;
- qSort(termList);
-
- maxList = termList.takeLast().documents;
- for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
- Term *t = &(*it);
- QVector<Document> docs = t->documents;
- for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) {
- if ( maxList.indexOf( *docIt ) == -1 )
- maxList.append( *docIt );
- }
- }
- return maxList;
-}
-
-void Index::buildMiniDict( const QString &str )
-{
- if ( miniDict[ str ] )
- miniDict[ str ]->positions.append( wordNum );
- ++wordNum;
-}
-
-bool Index::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName )
-{
- QUrl url(fileName);
- QString fName = url.toLocalFile();
- QFile file( fName );
- if ( !file.open( QFile::ReadOnly ) ) {
- qWarning( "cannot open file %s", qPrintable(fName) );
- return false;
- }
-
- wordNum = 3;
- miniDict.clear();
- QStringList::ConstIterator cIt = words.begin();
- for ( ; cIt != words.end(); ++cIt )
- miniDict.insert( *cIt, new PosEntry( 0 ) );
-
- QTextStream s( &file );
- QString text = s.readAll();
- bool valid = true;
- const QChar *buf = text.unicode();
- QChar str[64];
- QChar c = buf[0];
- int j = 0;
- int i = 0;
- while ( j < text.length() ) {
- if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
- valid = false;
- if ( i > 1 )
- buildMiniDict( QString(str,i) );
- i = 0;
- c = buf[++j];
- continue;
- }
- if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
- valid = true;
- c = buf[++j];
- continue;
- }
- if ( !valid ) {
- c = buf[++j];
- continue;
- }
- if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
- str[i] = c.toLower();
- ++i;
- } else {
- if ( i > 1 )
- buildMiniDict( QString(str,i) );
- i = 0;
- }
- c = buf[++j];
- }
- if ( i > 1 )
- buildMiniDict( QString(str,i) );
- file.close();
-
- QStringList::ConstIterator patIt = patterns.begin();
- QStringList wordLst;
- QList<uint> a, b;
- QList<uint>::iterator aIt;
- for ( ; patIt != patterns.end(); ++patIt ) {
- wordLst = (*patIt).split(QLatin1Char(' '));
- a = miniDict[ wordLst[0] ]->positions;
- for ( int j = 1; j < (int)wordLst.count(); ++j ) {
- b = miniDict[ wordLst[j] ]->positions;
- aIt = a.begin();
- while ( aIt != a.end() ) {
- if ( b.contains( *aIt + 1 )) {
- (*aIt)++;
- ++aIt;
- } else {
- aIt = a.erase( aIt );
- }
- }
- }
- }
- if ( a.count() )
- return true;
- return false;
-}
-
-QT_END_NAMESPACE