diff options
Diffstat (limited to 'addon/doxysearch/doxysearch.cpp')
-rw-r--r-- | addon/doxysearch/doxysearch.cpp | 434 |
1 files changed, 434 insertions, 0 deletions
diff --git a/addon/doxysearch/doxysearch.cpp b/addon/doxysearch/doxysearch.cpp new file mode 100644 index 0000000..7b90c82 --- /dev/null +++ b/addon/doxysearch/doxysearch.cpp @@ -0,0 +1,434 @@ +/****************************************************************************** + * + * Copyright (C) 1997-2012 by Dimitri van Heesch. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation under the terms of the GNU General Public License is hereby + * granted. No representations are made about the suitability of this software + * for any purpose. It is provided "as is" without express or implied warranty. + * See the GNU General Public License for more details. + * + * Documents produced by Doxygen are derivative works derived from the + * input used in their production; they are not affected by this license. + * + */ + +// STL includes +#include <cstdio> +#include <cstdlib> +#include <string> +#include <vector> +#include <sstream> +#include <iostream> +#include <fstream> +#include <string> +#include <algorithm> + +// Xapian includes +#include <xapian.h> + +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/stat.h> +#endif + +#define FIELD_TYPE 1 +#define FIELD_NAME 2 +#define FIELD_ARGS 3 +#define FIELD_TAG 4 +#define FIELD_URL 5 +#define FIELD_KEYW 6 +#define FIELD_DOC 7 + +#define HEX2DEC(x) (((x)>='0' && (x)<='9')?((x)-'0'):\ + ((x)>='a' && (x)<='f')?((x)-'a'+10):\ + ((x)>='A' && (x)<='F')?((x)-'A'+10):-1) + + +bool dirExists(const std::string& dirName) +{ +#ifdef _WIN32 + DWORD ftyp = GetFileAttributesA(dirName.c_str()); + if (ftyp == INVALID_FILE_ATTRIBUTES) + return false; //something is wrong with your path! + + if (ftyp & FILE_ATTRIBUTE_DIRECTORY) + return true; // this is a directory! +#else + struct stat sb; + + if (stat(dirName.c_str(), &sb)==0 && S_ISDIR(sb.st_mode)) + { + return true; + } +#endif + + return false; +} + + +/** decodes a URI encoded string into a normal string. */ +static std::string uriDecode(const std::string & sSrc) +{ + // Note from RFC1630: "Sequences which start with a percent + // sign but are not followed by two hexadecimal characters + // (0-9, A-F) are reserved for future extension" + + const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); + const int SRC_LEN = sSrc.length(); + const unsigned char * const SRC_END = pSrc + SRC_LEN; + // last decodable '%' + const unsigned char * const SRC_LAST_DEC = SRC_END - 2; + + char * const pStart = new char[SRC_LEN]; + char * pEnd = pStart; + + while (pSrc < SRC_LAST_DEC) + { + if (*pSrc == '%') // replace %2A with corresponding ASCII character + { + char dec1, dec2; + unsigned char c1=*(pSrc+1); + unsigned char c2=*(pSrc+2); + if (-1 != (dec1 = HEX2DEC(c1)) + && -1 != (dec2 = HEX2DEC(c2))) + { + *pEnd++ = (dec1 << 4) + dec2; + pSrc += 3; + continue; + } + } + else if (*pSrc == '+') // replace '+' with space + { + *pEnd++ = ' '; pSrc++; + continue; + } + *pEnd++ = *pSrc++; + } + + // the last 2- chars + while (pSrc < SRC_END) *pEnd++ = *pSrc++; + + std::string sResult(pStart, pEnd); + delete [] pStart; + return sResult; +} + +/** return list of strings that result when splitting \a s using + * delimeter \a delim + */ +static std::vector<std::string> split(const std::string &s, char delim) +{ + std::vector<std::string> elems; + std::stringstream ss(s); + std::string item; + while (getline(ss, item, delim)) elems.push_back(item); + return elems; +} + +/** Read type T from string \a s */ +template<class T> +T fromString(const std::string& s) +{ + std::istringstream stream (s); + T t; + stream >> t; + return t; +} + +/** Class that holds the startin position of a word */ +struct WordPosition +{ + WordPosition(int s,int i) : start(s), index(i) {} + int start; + int index; +}; + +/** Class representing the '<' operator for WordPosition objects based on position. */ +struct WordPosition_less +{ + bool operator()(const WordPosition &p1,const WordPosition &p2) + { + return p1.start<p2.start; + } +}; + +/** Class that holds a text fragment */ +struct Fragment +{ + Fragment(const std::string &t,int occ) : text(t), occurrences(occ) {} + std::string text; + int occurrences; +}; + +/** Class representing the '>' operator for Fragment objects based on occurrence. */ +struct Fragment_greater +{ + bool operator()(const Fragment &p1,const Fragment &p2) + { + return p1.occurrences>p2.occurrences; + } +}; + +/** Class representing a range within a string */ +struct Range +{ + Range(int s,int e) : start(s), end(e) {} + int start; + int end; +}; + +/** Returns true if [start..start+len] is inside one of the \a ranges. */ +static bool insideRange(const std::vector<Range> &ranges,int start,int len) +{ + for (std::vector<Range>::const_iterator it = ranges.begin(); + it!=ranges.end(); ++it + ) + { + Range r = *it; + if (start>=r.start && start+len<r.end) + { + return true; + } + } + return false; +} + +/** Returns a list of text \a fragments from \a s containing one or + * more \a words. The list is sorted occording to the + * number of occurrences of words within the fragment. + */ +static void highlighter(const std::string &s, + const std::vector<std::string> &words, + std::vector<Fragment> &fragments) +{ + const std::string spanStart="<span class=\"hl\">"; + const std::string spanEnd="</span>"; + const std::string dots="..."; + const int fragLen = 60; + int sl=s.length(); + + // find positions of words in s + size_t j=0; + std::vector<WordPosition> positions; + for (std::vector<std::string>::const_iterator it=words.begin(); + it!=words.end(); + ++it,++j + ) + { + int pos=0; + size_t i; + std::string word = *it; + while ((i=s.find(word,pos))!=std::string::npos) + { + positions.push_back(WordPosition(i,j)); + pos=i+word.length(); + } + } + // sort on position + std::sort(positions.begin(),positions.end(),WordPosition_less()); + // get fragments around words + std::vector<Range> ranges; + for (std::vector<WordPosition>::const_iterator it=positions.begin(); + it!=positions.end(); + ++it) + { + WordPosition wp = *it; + std::string w = words[wp.index]; + int i=wp.start; + int wl=w.length(); + if (!insideRange(ranges,i,wl)) + { + if (wl>fragLen) + { + fragments.push_back(Fragment(spanStart+w+spanEnd,1)); + ranges.push_back(Range(i,i+wl)); + } + else + { + std::string startFragment,endFragment; + int bi=i-(fragLen-wl)/2; + int ei=i+wl+(fragLen-wl)/2; + int occ=0; + if (bi<0) { ei-=bi; bi=0; } else startFragment=dots; + if (ei>sl) { ei=sl; } else endFragment=dots; + while (bi>0 && !isspace(s[bi])) bi--; // round to start of the word + while (ei<sl && !isspace(s[ei])) ei++; // round to end of the word + // highlight any word in s between indexes bi and ei + std::string fragment=startFragment; + int pos=bi; + for (std::vector<WordPosition>::const_iterator it2=positions.begin(); + it2!=positions.end(); + ++it2) + { + WordPosition wp2 = *it2; + std::string w2 = words[wp2.index]; + int wl2 = w2.length(); + if (wp2.start>=bi && wp2.start+wl2<=ei) // word is inside the range! + { + fragment+=s.substr(pos,wp2.start-pos)+ + spanStart+ + s.substr(wp2.start,wl2)+ + spanEnd; + pos=wp2.start+wl2; + occ++; + } + } + fragment+=s.substr(pos,ei-pos)+endFragment; + fragments.push_back(Fragment(fragment,occ)); + ranges.push_back(Range(bi,ei)); + } + } + } + std::sort(fragments.begin(),fragments.end(),Fragment_greater()); +} + +/** Escapes a string such that is can be included in a JSON structure */ +static std::string escapeString(const std::string &s) +{ + std::stringstream dst; + for (unsigned int i=0;i<s.length();i++) + { + char ch = s[i]; + switch (ch) + { + case '\"': dst << "\\\""; break; + default: dst << ch; break; + } + } + return dst.str(); +} + +static void showError(const std::string &callback,const std::string &error) +{ + std::cout << callback << "({\"error\":\"" << error << "\"})"; + exit(0); +} + +/** Main routine */ +int main(int argc,char **argv) +{ + // process inputs that were passed to us via QUERY_STRING + std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n"; + std::string callback; + try + { + // get input parameters + const char *queryEnv = getenv("QUERY_STRING"); + std::string queryString; + if (queryEnv) + { + queryString = queryEnv; + } + else if (argc>=2) + { + queryString = argv[1]; + } + else + { + std::cout << "No input!\n"; + exit(1); + } + + // parse query string + std::vector<std::string> parts = split(queryString,'&'); + std::string searchFor,callback; + int num=1,page=0; + for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it) + { + std::vector<std::string> kv = split(*it,'='); + if (kv.size()==2) + { + std::string val = uriDecode(kv[1]); + if (kv[0]=="q") searchFor = val; + else if (kv[0]=="n") num = fromString<int>(val); + else if (kv[0]=="p") page = fromString<int>(val); + else if (kv[0]=="cb") callback = val; + } + } + + std::string indexDir = "doxysearch.db"; + + if (queryString=="test") // user test + { + bool dbOk = dirExists(indexDir); + if (dbOk) + { + std::cout << "Test successful."; + } + else + { + std::cout << "Test failed: cannot find search index " << indexDir; + } + exit(0); + } + + // create query + Xapian::Database db(indexDir); + Xapian::Enquire enquire(db); + Xapian::Query query; + std::vector<std::string> words = split(searchFor,' '); + for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it) + { + query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it)); + } + enquire.set_query(query); + + // get results + Xapian::MSet matches = enquire.get_mset(page*num,num); + unsigned int hits = matches.get_matches_estimated(); + unsigned int offset = page*num; + unsigned int pages = num>0 ? (hits+num-1)/num : 0; + if (offset>hits) offset=hits; + if (offset+num>hits) num=hits-offset; + + // write results as JSONP + std::cout << callback.c_str() << "("; + std::cout << "{" << std::endl + << " \"hits\":" << hits << "," << std::endl + << " \"first\":" << offset << "," << std::endl + << " \"count\":" << num << "," << std::endl + << " \"page\":" << page << "," << std::endl + << " \"pages\":" << pages << "," << std::endl + << " \"query\": \"" << escapeString(searchFor) << "\"," << std::endl + << " \"items\":[" << std::endl; + // foreach search result + unsigned int o = offset; + for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o) + { + std::vector<Fragment> hl; + Xapian::Document doc = i.get_document(); + highlighter(doc.get_value(FIELD_DOC),words,hl); + std::cout << " {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl + << " \"name\": \"" << doc.get_value(FIELD_NAME) << doc.get_value(FIELD_ARGS) << "\"," << std::endl + << " \"tag\": \"" << doc.get_value(FIELD_TAG) << "\"," << std::endl + << " \"url\": \"" << doc.get_value(FIELD_URL) << "\"," << std::endl; + std::cout << " \"fragments\":[" << std::endl; + int c=0; + bool first=true; + for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c) + { + if (!first) std::cout << "," << std::endl; + std::cout << " \"" << escapeString((*it).text) << "\""; + first=false; + } + if (!first) std::cout << std::endl; + std::cout << " ]" << std::endl; + std::cout << " }"; + if (o<offset+num-1) std::cout << ","; + std::cout << std::endl; + } + std::cout << " ]" << std::endl << "})" << std::endl; + } + catch (const Xapian::Error &e) // Xapian exception + { + showError(callback,e.get_description()); + } + catch (...) // Any other exception + { + showError(callback,"Unknown Exception!"); + exit(1); + } + return 0; +} |