Release-1.3.8-20040913

author: Dimitri van Heesch <dimitri@stack.nl> 2004-09-13 17:26:00 (GMT)
committer: Dimitri van Heesch <dimitri@stack.nl> 2004-09-13 17:26:00 (GMT)
commit: 2fbdd54de993944146888498d997e6ade29e8d8c (patch)
tree: e74e4a8d2b3a92ce8032ad6114a63f1ddb8fed48 /src/searchindex.cpp
parent: 4ccd672ea5dbda17649f2e33577b5ab7eb02db7e (diff)
download: Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.zip
Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.tar.gz
Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.tar.bz2
1 files changed, 20 insertions, 26 deletions
diff --git a/src/searchindex.cpp b/src/searchindex.cpp
index b85d87e..ba04b6f 100644
--- a/src/searchindex.cpp
+++ b/src/searchindex.cpp
@@ -21,14 +21,16 @@
 #include <qfile.h>
 
 
-// file format:
+// file format: (all multi-byte values are stored in big endian format)
 //   4 byte header
-//   256*256*4 byte index
+//   256*256*4 byte index (4 bytes)
 //   for each index entry: a zero terminated list of words 
-//   for each word: a 0 terminated string + 4 bytes stats index
+//   for each word: a \0 terminated string + 4 byte offset to the stats info
 //   padding bytes to align at 4 byte boundary
-//   for each word: a counter + for each url containing the word 8 bytes statistics
-//   for each url: a 0 terminated string
+//   for each word: the number of urls (4 bytes) 
+//               + for each url containing the word 8 bytes statistics
+//                 (4 bytes index to url string + 4 bytes frequency counter)
+//   for each url: a \0 terminated string
 
 const int numIndexEntries = 256*256;
 
@@ -37,17 +39,21 @@ const int numIndexEntries = 256*256;
 IndexWord::IndexWord(const char *word) : m_word(word), m_urls(17)
 {
   m_urls.setAutoDelete(TRUE);
+  //printf("IndexWord::IndexWord(%s)\n",word);
 }
 
-void IndexWord::addUrlIndex(int idx)
+void IndexWord::addUrlIndex(int idx,bool hiPriority)
 {
+  //printf("IndexWord::addUrlIndex(%d,%d)\n",idx,hiPriority);
   URLInfo *ui = m_urls.find(idx);
   if (ui==0)
   {
+    //printf("URLInfo::URLInfo(%d)\n",idx);
     ui=new URLInfo(idx,0);
     m_urls.insert(idx,ui);
   }
-  ui->freq++;
+  ui->freq+=2;
+  if (hiPriority) ui->freq|=1; // mark as high priority document
 }
 
 //--------------------------------------------------------------------
@@ -62,6 +68,7 @@ SearchIndex::SearchIndex() : m_words(328829), m_index(numIndexEntries), m_urlInd
 
 void SearchIndex::setCurrentDoc(const char *name,const char *baseName,const char *anchor)
 {
+  //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
   QCString url=baseName+Config_getString("HTML_FILE_EXTENSION");
   if (anchor) url+=(QCString)"#"+anchor;  
   m_urlIndex++;
@@ -79,9 +86,11 @@ static int charsToIndex(const char *word)
   return c1*256+c2;
 }
 
-void SearchIndex::addWord(const char *word)
+void SearchIndex::addWord(const char *word,bool hiPriority)
 {
-  QString wStr=QString(word).lower();
+  //printf("SearchIndex::addWord(%s,%d)\n",word,hiPriority);
+  //QString wStr=QString(word).lower();
+  QString wStr(word);
   if (wStr.isEmpty()) return;
   IndexWord *w = m_words[wStr];
   if (w==0)
@@ -91,9 +100,9 @@ void SearchIndex::addWord(const char *word)
     w = new IndexWord(wStr);
     //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
     m_index[idx]->append(w);
-    m_words.insert(word,w);
+    m_words.insert(wStr,w);
   }
-  w->addUrlIndex(m_urlIndex);
+  w->addUrlIndex(m_urlIndex,hiPriority);
 }
 
 
@@ -257,21 +266,6 @@ void SearchIndex::write(const char *fileName)
     }
   }
 
-  //for (wdi.toFirst();(iw=wdi.current());++wdi)
-  //{
-  //  printf("Word %s:\n",wdi.currentKey().data());
-  //  QIntDictIterator<URLInfo> udi(iw->urls());
-  //  URLInfo *ui;
-  //  for (udi.toFirst();(ui=udi.current());++udi)
-  //  {
-  //    printf("  url[%d]=(name=%s,url=%s),freq=%d\n",
-  //        ui->urlIdx,
-  //        m_urls[ui->urlIdx]->name.data(),
-  //        m_urls[ui->urlIdx]->url.data(),
-  //        ui->freq);
-  //  }
-  //}
-
   delete urlOffsets;
   delete wordStatOffsets;
 }
author	Dimitri van Heesch <dimitri@stack.nl>	2004-09-13 17:26:00 (GMT)
committer	Dimitri van Heesch <dimitri@stack.nl>	2004-09-13 17:26:00 (GMT)
commit	2fbdd54de993944146888498d997e6ade29e8d8c (patch)
tree	e74e4a8d2b3a92ce8032ad6114a63f1ddb8fed48 /src/searchindex.cpp
parent	4ccd672ea5dbda17649f2e33577b5ab7eb02db7e (diff)
download	Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.zip Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.tar.gz Doxygen-2fbdd54de993944146888498d997e6ade29e8d8c.tar.bz2