summaryrefslogtreecommitdiffstats
path: root/addon/doxysearch/doxysearch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'addon/doxysearch/doxysearch.cpp')
-rw-r--r--addon/doxysearch/doxysearch.cpp434
1 files changed, 434 insertions, 0 deletions
diff --git a/addon/doxysearch/doxysearch.cpp b/addon/doxysearch/doxysearch.cpp
new file mode 100644
index 0000000..7b90c82
--- /dev/null
+++ b/addon/doxysearch/doxysearch.cpp
@@ -0,0 +1,434 @@
+/******************************************************************************
+ *
+ * Copyright (C) 1997-2012 by Dimitri van Heesch.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation under the terms of the GNU General Public License is hereby
+ * granted. No representations are made about the suitability of this software
+ * for any purpose. It is provided "as is" without express or implied warranty.
+ * See the GNU General Public License for more details.
+ *
+ * Documents produced by Doxygen are derivative works derived from the
+ * input used in their production; they are not affected by this license.
+ *
+ */
+
+// STL includes
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <algorithm>
+
+// Xapian includes
+#include <xapian.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/stat.h>
+#endif
+
+#define FIELD_TYPE 1
+#define FIELD_NAME 2
+#define FIELD_ARGS 3
+#define FIELD_TAG 4
+#define FIELD_URL 5
+#define FIELD_KEYW 6
+#define FIELD_DOC 7
+
+#define HEX2DEC(x) (((x)>='0' && (x)<='9')?((x)-'0'):\
+ ((x)>='a' && (x)<='f')?((x)-'a'+10):\
+ ((x)>='A' && (x)<='F')?((x)-'A'+10):-1)
+
+
+bool dirExists(const std::string& dirName)
+{
+#ifdef _WIN32
+ DWORD ftyp = GetFileAttributesA(dirName.c_str());
+ if (ftyp == INVALID_FILE_ATTRIBUTES)
+ return false; //something is wrong with your path!
+
+ if (ftyp & FILE_ATTRIBUTE_DIRECTORY)
+ return true; // this is a directory!
+#else
+ struct stat sb;
+
+ if (stat(dirName.c_str(), &sb)==0 && S_ISDIR(sb.st_mode))
+ {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+
+/** decodes a URI encoded string into a normal string. */
+static std::string uriDecode(const std::string & sSrc)
+{
+ // Note from RFC1630: "Sequences which start with a percent
+ // sign but are not followed by two hexadecimal characters
+ // (0-9, A-F) are reserved for future extension"
+
+ const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
+ const int SRC_LEN = sSrc.length();
+ const unsigned char * const SRC_END = pSrc + SRC_LEN;
+ // last decodable '%'
+ const unsigned char * const SRC_LAST_DEC = SRC_END - 2;
+
+ char * const pStart = new char[SRC_LEN];
+ char * pEnd = pStart;
+
+ while (pSrc < SRC_LAST_DEC)
+ {
+ if (*pSrc == '%') // replace %2A with corresponding ASCII character
+ {
+ char dec1, dec2;
+ unsigned char c1=*(pSrc+1);
+ unsigned char c2=*(pSrc+2);
+ if (-1 != (dec1 = HEX2DEC(c1))
+ && -1 != (dec2 = HEX2DEC(c2)))
+ {
+ *pEnd++ = (dec1 << 4) + dec2;
+ pSrc += 3;
+ continue;
+ }
+ }
+ else if (*pSrc == '+') // replace '+' with space
+ {
+ *pEnd++ = ' '; pSrc++;
+ continue;
+ }
+ *pEnd++ = *pSrc++;
+ }
+
+ // the last 2- chars
+ while (pSrc < SRC_END) *pEnd++ = *pSrc++;
+
+ std::string sResult(pStart, pEnd);
+ delete [] pStart;
+ return sResult;
+}
+
+/** return list of strings that result when splitting \a s using
+ * delimeter \a delim
+ */
+static std::vector<std::string> split(const std::string &s, char delim)
+{
+ std::vector<std::string> elems;
+ std::stringstream ss(s);
+ std::string item;
+ while (getline(ss, item, delim)) elems.push_back(item);
+ return elems;
+}
+
+/** Read type T from string \a s */
+template<class T>
+T fromString(const std::string& s)
+{
+ std::istringstream stream (s);
+ T t;
+ stream >> t;
+ return t;
+}
+
+/** Class that holds the startin position of a word */
+struct WordPosition
+{
+ WordPosition(int s,int i) : start(s), index(i) {}
+ int start;
+ int index;
+};
+
+/** Class representing the '<' operator for WordPosition objects based on position. */
+struct WordPosition_less
+{
+ bool operator()(const WordPosition &p1,const WordPosition &p2)
+ {
+ return p1.start<p2.start;
+ }
+};
+
+/** Class that holds a text fragment */
+struct Fragment
+{
+ Fragment(const std::string &t,int occ) : text(t), occurrences(occ) {}
+ std::string text;
+ int occurrences;
+};
+
+/** Class representing the '>' operator for Fragment objects based on occurrence. */
+struct Fragment_greater
+{
+ bool operator()(const Fragment &p1,const Fragment &p2)
+ {
+ return p1.occurrences>p2.occurrences;
+ }
+};
+
+/** Class representing a range within a string */
+struct Range
+{
+ Range(int s,int e) : start(s), end(e) {}
+ int start;
+ int end;
+};
+
+/** Returns true if [start..start+len] is inside one of the \a ranges. */
+static bool insideRange(const std::vector<Range> &ranges,int start,int len)
+{
+ for (std::vector<Range>::const_iterator it = ranges.begin();
+ it!=ranges.end(); ++it
+ )
+ {
+ Range r = *it;
+ if (start>=r.start && start+len<r.end)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** Returns a list of text \a fragments from \a s containing one or
+ * more \a words. The list is sorted occording to the
+ * number of occurrences of words within the fragment.
+ */
+static void highlighter(const std::string &s,
+ const std::vector<std::string> &words,
+ std::vector<Fragment> &fragments)
+{
+ const std::string spanStart="<span class=\"hl\">";
+ const std::string spanEnd="</span>";
+ const std::string dots="...";
+ const int fragLen = 60;
+ int sl=s.length();
+
+ // find positions of words in s
+ size_t j=0;
+ std::vector<WordPosition> positions;
+ for (std::vector<std::string>::const_iterator it=words.begin();
+ it!=words.end();
+ ++it,++j
+ )
+ {
+ int pos=0;
+ size_t i;
+ std::string word = *it;
+ while ((i=s.find(word,pos))!=std::string::npos)
+ {
+ positions.push_back(WordPosition(i,j));
+ pos=i+word.length();
+ }
+ }
+ // sort on position
+ std::sort(positions.begin(),positions.end(),WordPosition_less());
+ // get fragments around words
+ std::vector<Range> ranges;
+ for (std::vector<WordPosition>::const_iterator it=positions.begin();
+ it!=positions.end();
+ ++it)
+ {
+ WordPosition wp = *it;
+ std::string w = words[wp.index];
+ int i=wp.start;
+ int wl=w.length();
+ if (!insideRange(ranges,i,wl))
+ {
+ if (wl>fragLen)
+ {
+ fragments.push_back(Fragment(spanStart+w+spanEnd,1));
+ ranges.push_back(Range(i,i+wl));
+ }
+ else
+ {
+ std::string startFragment,endFragment;
+ int bi=i-(fragLen-wl)/2;
+ int ei=i+wl+(fragLen-wl)/2;
+ int occ=0;
+ if (bi<0) { ei-=bi; bi=0; } else startFragment=dots;
+ if (ei>sl) { ei=sl; } else endFragment=dots;
+ while (bi>0 && !isspace(s[bi])) bi--; // round to start of the word
+ while (ei<sl && !isspace(s[ei])) ei++; // round to end of the word
+ // highlight any word in s between indexes bi and ei
+ std::string fragment=startFragment;
+ int pos=bi;
+ for (std::vector<WordPosition>::const_iterator it2=positions.begin();
+ it2!=positions.end();
+ ++it2)
+ {
+ WordPosition wp2 = *it2;
+ std::string w2 = words[wp2.index];
+ int wl2 = w2.length();
+ if (wp2.start>=bi && wp2.start+wl2<=ei) // word is inside the range!
+ {
+ fragment+=s.substr(pos,wp2.start-pos)+
+ spanStart+
+ s.substr(wp2.start,wl2)+
+ spanEnd;
+ pos=wp2.start+wl2;
+ occ++;
+ }
+ }
+ fragment+=s.substr(pos,ei-pos)+endFragment;
+ fragments.push_back(Fragment(fragment,occ));
+ ranges.push_back(Range(bi,ei));
+ }
+ }
+ }
+ std::sort(fragments.begin(),fragments.end(),Fragment_greater());
+}
+
+/** Escapes a string such that is can be included in a JSON structure */
+static std::string escapeString(const std::string &s)
+{
+ std::stringstream dst;
+ for (unsigned int i=0;i<s.length();i++)
+ {
+ char ch = s[i];
+ switch (ch)
+ {
+ case '\"': dst << "\\\""; break;
+ default: dst << ch; break;
+ }
+ }
+ return dst.str();
+}
+
+static void showError(const std::string &callback,const std::string &error)
+{
+ std::cout << callback << "({\"error\":\"" << error << "\"})";
+ exit(0);
+}
+
+/** Main routine */
+int main(int argc,char **argv)
+{
+ // process inputs that were passed to us via QUERY_STRING
+ std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n";
+ std::string callback;
+ try
+ {
+ // get input parameters
+ const char *queryEnv = getenv("QUERY_STRING");
+ std::string queryString;
+ if (queryEnv)
+ {
+ queryString = queryEnv;
+ }
+ else if (argc>=2)
+ {
+ queryString = argv[1];
+ }
+ else
+ {
+ std::cout << "No input!\n";
+ exit(1);
+ }
+
+ // parse query string
+ std::vector<std::string> parts = split(queryString,'&');
+ std::string searchFor,callback;
+ int num=1,page=0;
+ for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it)
+ {
+ std::vector<std::string> kv = split(*it,'=');
+ if (kv.size()==2)
+ {
+ std::string val = uriDecode(kv[1]);
+ if (kv[0]=="q") searchFor = val;
+ else if (kv[0]=="n") num = fromString<int>(val);
+ else if (kv[0]=="p") page = fromString<int>(val);
+ else if (kv[0]=="cb") callback = val;
+ }
+ }
+
+ std::string indexDir = "doxysearch.db";
+
+ if (queryString=="test") // user test
+ {
+ bool dbOk = dirExists(indexDir);
+ if (dbOk)
+ {
+ std::cout << "Test successful.";
+ }
+ else
+ {
+ std::cout << "Test failed: cannot find search index " << indexDir;
+ }
+ exit(0);
+ }
+
+ // create query
+ Xapian::Database db(indexDir);
+ Xapian::Enquire enquire(db);
+ Xapian::Query query;
+ std::vector<std::string> words = split(searchFor,' ');
+ for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it)
+ {
+ query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it));
+ }
+ enquire.set_query(query);
+
+ // get results
+ Xapian::MSet matches = enquire.get_mset(page*num,num);
+ unsigned int hits = matches.get_matches_estimated();
+ unsigned int offset = page*num;
+ unsigned int pages = num>0 ? (hits+num-1)/num : 0;
+ if (offset>hits) offset=hits;
+ if (offset+num>hits) num=hits-offset;
+
+ // write results as JSONP
+ std::cout << callback.c_str() << "(";
+ std::cout << "{" << std::endl
+ << " \"hits\":" << hits << "," << std::endl
+ << " \"first\":" << offset << "," << std::endl
+ << " \"count\":" << num << "," << std::endl
+ << " \"page\":" << page << "," << std::endl
+ << " \"pages\":" << pages << "," << std::endl
+ << " \"query\": \"" << escapeString(searchFor) << "\"," << std::endl
+ << " \"items\":[" << std::endl;
+ // foreach search result
+ unsigned int o = offset;
+ for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o)
+ {
+ std::vector<Fragment> hl;
+ Xapian::Document doc = i.get_document();
+ highlighter(doc.get_value(FIELD_DOC),words,hl);
+ std::cout << " {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl
+ << " \"name\": \"" << doc.get_value(FIELD_NAME) << doc.get_value(FIELD_ARGS) << "\"," << std::endl
+ << " \"tag\": \"" << doc.get_value(FIELD_TAG) << "\"," << std::endl
+ << " \"url\": \"" << doc.get_value(FIELD_URL) << "\"," << std::endl;
+ std::cout << " \"fragments\":[" << std::endl;
+ int c=0;
+ bool first=true;
+ for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c)
+ {
+ if (!first) std::cout << "," << std::endl;
+ std::cout << " \"" << escapeString((*it).text) << "\"";
+ first=false;
+ }
+ if (!first) std::cout << std::endl;
+ std::cout << " ]" << std::endl;
+ std::cout << " }";
+ if (o<offset+num-1) std::cout << ",";
+ std::cout << std::endl;
+ }
+ std::cout << " ]" << std::endl << "})" << std::endl;
+ }
+ catch (const Xapian::Error &e) // Xapian exception
+ {
+ showError(callback,e.get_description());
+ }
+ catch (...) // Any other exception
+ {
+ showError(callback,"Unknown Exception!");
+ exit(1);
+ }
+ return 0;
+}