diff options
Diffstat (limited to 'tools/assistant/compat/index.cpp')
-rw-r--r-- | tools/assistant/compat/index.cpp | 581 |
1 files changed, 0 insertions, 581 deletions
diff --git a/tools/assistant/compat/index.cpp b/tools/assistant/compat/index.cpp deleted file mode 100644 index ff54626..0000000 --- a/tools/assistant/compat/index.cpp +++ /dev/null @@ -1,581 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). -** All rights reserved. -** Contact: Nokia Corporation (qt-info@nokia.com) -** -** This file is part of the Qt Assistant of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** No Commercial Usage -** This file contains pre-release code and may not be distributed. -** You may use this file in accordance with the terms and conditions -** contained in the Technology Preview License Agreement accompanying -** this package. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 2.1 requirements -** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -** -** In addition, as a special exception, Nokia gives you certain additional -** rights. These rights are described in the Nokia Qt LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -** -** If you have questions regarding the use of this file, please contact -** Nokia at qt-info@nokia.com. -** -** -** -** -** -** -** -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "index.h" - -#include <QFile> -#include <QDir> -#include <QStringList> -#include <QApplication> -#include <QByteArray> -#include <QTextStream> -#include <QtAlgorithms> -#include <QUrl> -#include <QTextCodec> -#include <ctype.h> -#include <QTextDocument> - -QT_BEGIN_NAMESPACE - -struct Term { - Term() : frequency(-1) {} - Term( const QString &t, int f, QVector<Document> l ) : term( t ), frequency( f ), documents( l ) {} - QString term; - int frequency; - QVector<Document>documents; - bool operator<( const Term &i2 ) const { return frequency < i2.frequency; } -}; - -QDataStream &operator>>( QDataStream &s, Document &l ) -{ - s >> l.docNumber; - s >> l.frequency; - return s; -} - -QDataStream &operator<<( QDataStream &s, const Document &l ) -{ - s << (qint16)l.docNumber; - s << (qint16)l.frequency; - return s; -} - -Index::Index( const QString &dp, const QString &hp ) - : QObject( 0 ), docPath( dp ) -{ - Q_UNUSED(hp); - - alreadyHaveDocList = false; - lastWindowClosed = false; - connect( qApp, SIGNAL(lastWindowClosed()), - this, SLOT(setLastWinClosed()) ); -} - -Index::Index( const QStringList &dl, const QString &hp ) - : QObject( 0 ) -{ - Q_UNUSED(hp); - docList = dl; - alreadyHaveDocList = true; - lastWindowClosed = false; - connect( qApp, SIGNAL(lastWindowClosed()), - this, SLOT(setLastWinClosed()) ); -} - -void Index::setLastWinClosed() -{ - lastWindowClosed = true; -} - -void Index::setDictionaryFile( const QString &f ) -{ - dictFile = f; -} - -void Index::setDocListFile( const QString &f ) -{ - docListFile = f; -} - -void Index::setDocList( const QStringList &lst ) -{ - docList = lst; -} - -int Index::makeIndex() -{ - if ( !alreadyHaveDocList ) - setupDocumentList(); - if ( docList.isEmpty() ) - return 1; - QStringList::Iterator it = docList.begin(); - int steps = docList.count() / 100; - if ( !steps ) - steps++; - int prog = 0; - for ( int i = 0; it != docList.end(); ++it, ++i ) { - if ( lastWindowClosed ) { - return -1; - } - QUrl url(*it); - parseDocument( url.toLocalFile(), i ); - if ( i%steps == 0 ) { - prog++; - emit indexingProgress( prog ); - } - } - return 0; -} - -void Index::setupDocumentList() -{ - QDir d( docPath ); - QStringList filters; - filters.append(QLatin1String("*.html")); - QStringList lst = d.entryList(filters); - QStringList::ConstIterator it = lst.constBegin(); - for ( ; it != lst.constEnd(); ++it ) - docList.append( QLatin1String("file:") + docPath + QLatin1String("/") + *it ); -} - -void Index::insertInDict( const QString &str, int docNum ) -{ - if ( str == QLatin1String("amp") || str == QLatin1String("nbsp")) - return; - Entry *e = 0; - if ( dict.count() ) - e = dict[ str ]; - - if ( e ) { - if ( e->documents.last().docNumber != docNum ) - e->documents.append( Document(docNum, 1 ) ); - else - e->documents.last().frequency++; - } else { - dict.insert( str, new Entry( docNum ) ); - } -} - -QString Index::getCharsetForDocument(QFile *file) -{ - QTextStream s(file); - QString contents = s.readAll(); - - QString encoding; - int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive); - if (start > 0) { - int end = contents.indexOf(QLatin1String(">"), start); - QString meta = contents.mid(start+5, end-start); - meta = meta.toLower(); - QRegExp r(QLatin1String("charset=([^\"\\s]+)")); - if (r.indexIn(meta) != -1) { - encoding = r.cap(1); - } - } - - file->seek(0); - if (encoding.isEmpty()) - return QLatin1String("utf-8"); - return encoding; -} - -void Index::parseDocument( const QString &filename, int docNum ) -{ - QFile file( filename ); - if ( !file.open(QFile::ReadOnly) ) { - qWarning( "can not open file %s", qPrintable(filename) ); - return; - } - - QTextStream s(&file); - QString en = getCharsetForDocument(&file); - s.setCodec(QTextCodec::codecForName(en.toLatin1().constData())); - - QString text = s.readAll(); - if (text.isNull()) - return; - - bool valid = true; - const QChar *buf = text.unicode(); - QChar str[64]; - QChar c = buf[0]; - int j = 0; - int i = 0; - while ( j < text.length() ) { - if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { - valid = false; - if ( i > 1 ) - insertInDict( QString(str,i), docNum ); - i = 0; - c = buf[++j]; - continue; - } - if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { - valid = true; - c = buf[++j]; - continue; - } - if ( !valid ) { - c = buf[++j]; - continue; - } - if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { - str[i] = c.toLower(); - ++i; - } else { - if ( i > 1 ) - insertInDict( QString(str,i), docNum ); - i = 0; - } - c = buf[++j]; - } - if ( i > 1 ) - insertInDict( QString(str,i), docNum ); - file.close(); -} - -void Index::writeDict() -{ - QFile f( dictFile ); - if ( !f.open(QFile::WriteOnly ) ) - return; - QDataStream s( &f ); - for(QHash<QString, Entry *>::Iterator it = dict.begin(); it != dict.end(); ++it) { - s << it.key(); - s << it.value()->documents.count(); - s << it.value()->documents; - } - f.close(); - writeDocumentList(); -} - -void Index::writeDocumentList() -{ - QFile f( docListFile ); - if ( !f.open(QFile::WriteOnly ) ) - return; - QDataStream s( &f ); - s << docList; -} - -void Index::readDict() -{ - QFile f( dictFile ); - if ( !f.open(QFile::ReadOnly ) ) - return; - - dict.clear(); - QDataStream s( &f ); - QString key; - int numOfDocs; - QVector<Document> docs; - while ( !s.atEnd() ) { - s >> key; - s >> numOfDocs; - docs.resize(numOfDocs); - s >> docs; - dict.insert( key, new Entry( docs ) ); - } - f.close(); - readDocumentList(); -} - -void Index::readDocumentList() -{ - QFile f( docListFile ); - if ( !f.open(QFile::ReadOnly ) ) - return; - QDataStream s( &f ); - s >> docList; -} - -QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords ) -{ - QList<Term> termList; - for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) { - Entry *e = 0; - if ( (*it).contains(QLatin1Char('*')) ) { - QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) ); - termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) ); - } else if ( dict[ *it ] ) { - e = dict[ *it ]; - termList.append( Term( *it, e->documents.count(), e->documents ) ); - } else { - return QStringList(); - } - } - if ( !termList.count() ) - return QStringList(); - qSort(termList); - - QVector<Document> minDocs = termList.takeFirst().documents; - for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { - Term *t = &(*it); - QVector<Document> docs = t->documents; - for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) { - bool found = false; - for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) { - if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) { - (*minDoc_it).frequency += (*doc_it).frequency; - found = true; - break; - } - } - if ( !found ) - minDoc_it = minDocs.erase( minDoc_it ); - else - ++minDoc_it; - } - } - - QStringList results; - qSort( minDocs ); - if ( termSeq.isEmpty() ) { - for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) - results << docList.at((int)(*it).docNumber); - return results; - } - - QString fileName; - for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) { - fileName = docList[ (int)(*it).docNumber ]; - if ( searchForPattern( termSeq, seqWords, fileName ) ) - results << fileName; - } - return results; -} - -QString Index::getDocumentTitle( const QString &fullFileName ) -{ - QUrl url(fullFileName); - QString fileName = url.toLocalFile(); - - if (documentTitleCache.contains(fileName)) - return documentTitleCache.value(fileName); - - QFile file( fileName ); - if ( !file.open( QFile::ReadOnly ) ) { - qWarning( "cannot open file %s", qPrintable(fileName) ); - return fileName; - } - QTextStream s( &file ); - QString text = s.readAll(); - - int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7; - int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive); - - QString title = tr("Untitled"); - if (end - start > 0) { - title = text.mid(start, end - start); - if (Qt::mightBeRichText(title)) { - QTextDocument doc; - doc.setHtml(title); - title = doc.toPlainText(); - } - } - documentTitleCache.insert(fileName, title); - return title; -} - -QStringList Index::getWildcardTerms( const QString &term ) -{ - QStringList lst; - QStringList terms = split( term ); - QStringList::Iterator iter; - - for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) { - int index = 0; - bool found = false; - QString text( it.key() ); - for ( iter = terms.begin(); iter != terms.end(); ++iter ) { - if ( *iter == QLatin1String("*") ) { - found = true; - continue; - } - if ( iter == terms.begin() && (*iter)[0] != text[0] ) { - found = false; - break; - } - index = text.indexOf( *iter, index ); - if ( *iter == terms.last() && index != (int)text.length()-1 ) { - index = text.lastIndexOf( *iter ); - if ( index != (int)text.length() - (int)(*iter).length() ) { - found = false; - break; - } - } - if ( index != -1 ) { - found = true; - index += (*iter).length(); - continue; - } else { - found = false; - break; - } - } - if ( found ) - lst << text; - } - - return lst; -} - -QStringList Index::split( const QString &str ) -{ - QStringList lst; - int j = 0; - int i = str.indexOf(QLatin1Char('*'), j ); - - if (str.startsWith(QLatin1String("*"))) - lst << QLatin1String("*"); - - while ( i != -1 ) { - if ( i > j && i <= (int)str.length() ) { - lst << str.mid( j, i - j ); - lst << QLatin1String("*"); - } - j = i + 1; - i = str.indexOf(QLatin1Char('*'), j ); - } - - int l = str.length() - 1; - if ( str.mid( j, l - j + 1 ).length() > 0 ) - lst << str.mid( j, l - j + 1 ); - - return lst; -} - -QVector<Document> Index::setupDummyTerm( const QStringList &terms ) -{ - QList<Term> termList; - for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) { - Entry *e = 0; - if ( dict[ *it ] ) { - e = dict[ *it ]; - termList.append( Term( *it, e->documents.count(), e->documents ) ); - } - } - QVector<Document> maxList(0); - if ( !termList.count() ) - return maxList; - qSort(termList); - - maxList = termList.takeLast().documents; - for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { - Term *t = &(*it); - QVector<Document> docs = t->documents; - for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) { - if ( maxList.indexOf( *docIt ) == -1 ) - maxList.append( *docIt ); - } - } - return maxList; -} - -void Index::buildMiniDict( const QString &str ) -{ - if ( miniDict[ str ] ) - miniDict[ str ]->positions.append( wordNum ); - ++wordNum; -} - -bool Index::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName ) -{ - QUrl url(fileName); - QString fName = url.toLocalFile(); - QFile file( fName ); - if ( !file.open( QFile::ReadOnly ) ) { - qWarning( "cannot open file %s", qPrintable(fName) ); - return false; - } - - wordNum = 3; - miniDict.clear(); - QStringList::ConstIterator cIt = words.begin(); - for ( ; cIt != words.end(); ++cIt ) - miniDict.insert( *cIt, new PosEntry( 0 ) ); - - QTextStream s( &file ); - QString text = s.readAll(); - bool valid = true; - const QChar *buf = text.unicode(); - QChar str[64]; - QChar c = buf[0]; - int j = 0; - int i = 0; - while ( j < text.length() ) { - if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { - valid = false; - if ( i > 1 ) - buildMiniDict( QString(str,i) ); - i = 0; - c = buf[++j]; - continue; - } - if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { - valid = true; - c = buf[++j]; - continue; - } - if ( !valid ) { - c = buf[++j]; - continue; - } - if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { - str[i] = c.toLower(); - ++i; - } else { - if ( i > 1 ) - buildMiniDict( QString(str,i) ); - i = 0; - } - c = buf[++j]; - } - if ( i > 1 ) - buildMiniDict( QString(str,i) ); - file.close(); - - QStringList::ConstIterator patIt = patterns.begin(); - QStringList wordLst; - QList<uint> a, b; - QList<uint>::iterator aIt; - for ( ; patIt != patterns.end(); ++patIt ) { - wordLst = (*patIt).split(QLatin1Char(' ')); - a = miniDict[ wordLst[0] ]->positions; - for ( int j = 1; j < (int)wordLst.count(); ++j ) { - b = miniDict[ wordLst[j] ]->positions; - aIt = a.begin(); - while ( aIt != a.end() ) { - if ( b.contains( *aIt + 1 )) { - (*aIt)++; - ++aIt; - } else { - aIt = a.erase( aIt ); - } - } - } - } - if ( a.count() ) - return true; - return false; -} - -QT_END_NAMESPACE |