summaryrefslogtreecommitdiffstats
path: root/src/searchindex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/searchindex.cpp')
-rw-r--r--src/searchindex.cpp285
1 files changed, 0 insertions, 285 deletions
diff --git a/src/searchindex.cpp b/src/searchindex.cpp
deleted file mode 100644
index 400702b..0000000
--- a/src/searchindex.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/******************************************************************************
- *
- *
- *
- * Copyright (C) 1997-2005 by Dimitri van Heesch.
- *
- * Permission to use, copy, modify, and distribute this software and its
- * documentation under the terms of the GNU General Public License is hereby
- * granted. No representations are made about the suitability of this software
- * for any purpose. It is provided "as is" without express or implied warranty.
- * See the GNU General Public License for more details.
- *
- * Documents produced by Doxygen are derivative works derived from the
- * input used in their production; they are not affected by this license.
- *
- */
-
-#include "qtbc.h"
-#include "searchindex.h"
-#include "config.h"
-#include <qfile.h>
-#include <ctype.h>
-
-
-// file format: (all multi-byte values are stored in big endian format)
-// 4 byte header
-// 256*256*4 byte index (4 bytes)
-// for each index entry: a zero terminated list of words
-// for each word: a \0 terminated string + 4 byte offset to the stats info
-// padding bytes to align at 4 byte boundary
-// for each word: the number of urls (4 bytes)
-// + for each url containing the word 8 bytes statistics
-// (4 bytes index to url string + 4 bytes frequency counter)
-// for each url: a \0 terminated string
-
-const int numIndexEntries = 256*256;
-
-//--------------------------------------------------------------------
-
-IndexWord::IndexWord(const char *word) : m_word(word), m_urls(17)
-{
- m_urls.setAutoDelete(TRUE);
- //printf("IndexWord::IndexWord(%s)\n",word);
-}
-
-void IndexWord::addUrlIndex(int idx,bool hiPriority)
-{
- //printf("IndexWord::addUrlIndex(%d,%d)\n",idx,hiPriority);
- URLInfo *ui = m_urls.find(idx);
- if (ui==0)
- {
- //printf("URLInfo::URLInfo(%d)\n",idx);
- ui=new URLInfo(idx,0);
- m_urls.insert(idx,ui);
- }
- ui->freq+=2;
- if (hiPriority) ui->freq|=1; // mark as high priority document
-}
-
-//--------------------------------------------------------------------
-
-SearchIndex::SearchIndex() : m_words(328829), m_index(numIndexEntries), m_urlIndex(-1)
-{
- int i;
- m_words.setAutoDelete(TRUE);
- m_urls.setAutoDelete(TRUE);
- for (i=0;i<numIndexEntries;i++) m_index.insert(i,new QList<IndexWord>);
-}
-
-void SearchIndex::setCurrentDoc(const char *name,const char *baseName,const char *anchor)
-{
- //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
- QCString url=baseName+Config_getString("HTML_FILE_EXTENSION");
- if (anchor) url+=(QCString)"#"+anchor;
- m_urlIndex++;
- m_urls.insert(m_urlIndex,new URL(name,url));
-}
-
-static int charsToIndex(const char *word)
-{
- if (word==0) return -1;
-
- // Fast string hashing algorithm
- //register ushort h=0;
- //const char *k = word;
- //ushort mask=0xfc00;
- //while ( *k )
- //{
- // h = (h&mask)^(h<<6)^(*k++);
- //}
- //return h;
-
- // Simple hashing that allows for substring searching
- uint c1=word[0];
- if (c1==0) return -1;
- uint c2=word[1];
- if (c2==0) return -1;
- return c1*256+c2;
-}
-
-void SearchIndex::addWord(const char *word,bool hiPriority)
-{
- //printf("SearchIndex::addWord(%s,%d)\n",word,hiPriority);
- //QString wStr=QString(word).lower();
- QString wStr(word);
- wStr=wStr.lower();
- if (wStr.isEmpty()) return;
- IndexWord *w = m_words[wStr];
- if (w==0)
- {
- int idx=charsToIndex(wStr);
- if (idx<0) return;
- w = new IndexWord(wStr);
- //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
- m_index[idx]->append(w);
- m_words.insert(wStr,w);
- }
- w->addUrlIndex(m_urlIndex,hiPriority);
-}
-
-
-static void writeInt(QFile &f,int index)
-{
- f.putch(((uint)index)>>24);
- f.putch((((uint)index)>>16)&0xff);
- f.putch((((uint)index)>>8)&0xff);
- f.putch(((uint)index)&0xff);
-}
-
-static void writeString(QFile &f,const char *s)
-{
- const char *p = s;
- while (*p) f.putch(*p++);
- f.putch(0);
-}
-
-void SearchIndex::write(const char *fileName)
-{
- int i;
- int size=4; // for the header
- size+=4*numIndexEntries; // for the index
- int wordsOffset = size;
- // first pass: compute the size of the wordlist
- for (i=0;i<numIndexEntries;i++)
- {
- QList<IndexWord> *wlist = m_index[i];
- if (!wlist->isEmpty())
- {
- QListIterator<IndexWord> iwi(*wlist);
- IndexWord *iw;
- for (iwi.toFirst();(iw=iwi.current());++iwi)
- {
- int ws = iw->word().length()+1;
- size+=ws+4; // word + url info list offset
- }
- size+=1; // zero list terminator
- }
- }
-
- // second pass: compute the offsets in the index
- int indexOffsets[numIndexEntries];
- int offset=wordsOffset;
- for (i=0;i<numIndexEntries;i++)
- {
- QList<IndexWord> *wlist = m_index[i];
- if (!wlist->isEmpty())
- {
- indexOffsets[i]=offset;
- QListIterator<IndexWord> iwi(*wlist);
- IndexWord *iw;
- for (iwi.toFirst();(iw=iwi.current());++iwi)
- {
- offset+= iw->word().length()+1;
- offset+=4; // word + offset to url info array
- }
- offset+=1; // zero list terminator
- }
- else
- {
- indexOffsets[i]=0;
- }
- }
- int padding = size;
- size = (size+3)&~3; // round up to 4 byte boundary
- padding = size - padding;
-
- //int statsOffset = size;
- QDictIterator<IndexWord> wdi(m_words);
- //IndexWord *iw;
- int *wordStatOffsets = new int[m_words.count()];
-
- int count=0;
-
- // third pass: compute offset to stats info for each word
- for (i=0;i<numIndexEntries;i++)
- {
- QList<IndexWord> *wlist = m_index[i];
- if (!wlist->isEmpty())
- {
- QListIterator<IndexWord> iwi(*wlist);
- IndexWord *iw;
- for (iwi.toFirst();(iw=iwi.current());++iwi)
- {
- //printf("wordStatOffsets[%d]=%d\n",count,size);
- wordStatOffsets[count++] = size;
- size+=4+iw->urls().count()*8; // count + (url_index,freq) per url
- }
- }
- }
- int *urlOffsets = new int[m_urls.count()];
- //int urlsOffset = size;
- QIntDictIterator<URL> udi(m_urls);
- URL *url;
- for (udi.toFirst();(url=udi.current());++udi)
- {
- urlOffsets[udi.currentKey()]=size;
- size+=url->name.length()+1+
- url->url.length()+1;
- }
- //printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);
- QFile f(fileName);
- if (f.open(IO_WriteOnly))
- {
- // write header
- f.putch('D'); f.putch('O'); f.putch('X'); f.putch('S');
- // write index
- for (i=0;i<numIndexEntries;i++)
- {
- writeInt(f,indexOffsets[i]);
- }
- // write word lists
- count=0;
- for (i=0;i<numIndexEntries;i++)
- {
- QList<IndexWord> *wlist = m_index[i];
- if (!wlist->isEmpty())
- {
- QListIterator<IndexWord> iwi(*wlist);
- IndexWord *iw;
- for (iwi.toFirst();(iw=iwi.current());++iwi)
- {
- writeString(f,iw->word());
- writeInt(f,wordStatOffsets[count++]);
- }
- f.putch(0);
- }
- }
- // write extra padding bytes
- for (i=0;i<padding;i++) f.putch(0);
- // write word statistics
- for (i=0;i<numIndexEntries;i++)
- {
- QList<IndexWord> *wlist = m_index[i];
- if (!wlist->isEmpty())
- {
- QListIterator<IndexWord> iwi(*wlist);
- IndexWord *iw;
- for (iwi.toFirst();(iw=iwi.current());++iwi)
- {
- int numUrls = iw->urls().count();
- writeInt(f,numUrls);
- QIntDictIterator<URLInfo> uli(iw->urls());
- URLInfo *ui;
- for (uli.toFirst();(ui=uli.current());++uli)
- {
- writeInt(f,urlOffsets[ui->urlIdx]);
- writeInt(f,ui->freq);
- }
- }
- }
- }
- // write urls
- QIntDictIterator<URL> udi(m_urls);
- URL *url;
- for (udi.toFirst();(url=udi.current());++udi)
- {
- writeString(f,url->name);
- writeString(f,url->url);
- }
- }
-
- delete urlOffsets;
- delete wordStatOffsets;
-}
-