diff options
author | dimitri <dimitri@afe2bf4a-e733-0410-8a33-86f594647bc7> | 2009-08-14 14:49:07 (GMT) |
---|---|---|
committer | dimitri <dimitri@afe2bf4a-e733-0410-8a33-86f594647bc7> | 2009-08-14 14:49:07 (GMT) |
commit | 9e6be9a8ae24b788cf2463a703bda48cbd77c773 (patch) | |
tree | fed426d0d7216311cbd009a1fcd2786176478b5e /src/search.php | |
parent | 6e28050ef5483e624122b0bacb998c40664f78ee (diff) | |
download | Doxygen-9e6be9a8ae24b788cf2463a703bda48cbd77c773.zip Doxygen-9e6be9a8ae24b788cf2463a703bda48cbd77c773.tar.gz Doxygen-9e6be9a8ae24b788cf2463a703bda48cbd77c773.tar.bz2 |
Release-1.5.9-20090814
Diffstat (limited to 'src/search.php')
-rw-r--r-- | src/search.php | 324 |
1 files changed, 0 insertions, 324 deletions
diff --git a/src/search.php b/src/search.php deleted file mode 100644 index f86184c..0000000 --- a/src/search.php +++ /dev/null @@ -1,324 +0,0 @@ -function readInt($file) -{ - $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file)); - $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file)); - return ($b1<<24)|($b2<<16)|($b3<<8)|$b4; -} - -function readString($file) -{ - $result=""; - while (ord($c=fgetc($file))) $result.=$c; - return $result; -} - -function readHeader($file) -{ - $header =fgetc($file); $header.=fgetc($file); - $header.=fgetc($file); $header.=fgetc($file); - return $header; -} - -function computeIndex($word) -{ - // Fast string hashing - //$lword = strtolower($word); - //$l = strlen($lword); - //for ($i=0;$i<$l;$i++) - //{ - // $c = ord($lword{$i}); - // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff; - //} - //return $v; - - // Simple hashing that allows for substring search - if (strlen($word)<2) return -1; - // high char of the index - $hi = ord($word{0}); - if ($hi==0) return -1; - // low char of the index - $lo = ord($word{1}); - if ($lo==0) return -1; - // return index - return $hi*256+$lo; -} - -function search($file,$word,&$statsList) -{ - $index = computeIndex($word); - if ($index!=-1) // found a valid index - { - fseek($file,$index*4+4); // 4 bytes per entry, skip header - $index = readInt($file); - if ($index) // found words matching the hash key - { - $start=sizeof($statsList); - $count=$start; - fseek($file,$index); - $w = readString($file); - while ($w) - { - $statIdx = readInt($file); - if ($word==substr($w,0,strlen($word))) - { // found word that matches (as substring) - $statsList[$count++]=array( - "word"=>$word, - "match"=>$w, - "index"=>$statIdx, - "full"=>strlen($w)==strlen($word), - "docs"=>array() - ); - } - $w = readString($file); - } - $totalHi=0; - $totalFreqHi=0; - $totalFreqLo=0; - for ($count=$start;$count<sizeof($statsList);$count++) - { - $statInfo = &$statsList[$count]; - $multiplier = 1; - // whole word matches have a double weight - if ($statInfo["full"]) $multiplier=2; - fseek($file,$statInfo["index"]); - $numDocs = readInt($file); - $docInfo = array(); - // read docs info + occurrence frequency of the word - for ($i=0;$i<$numDocs;$i++) - { - $idx=readInt($file); - $freq=readInt($file); - $docInfo[$i]=array("idx" => $idx, - "freq" => $freq>>1, - "rank" => 0.0, - "hi" => $freq&1 - ); - if ($freq&1) // word occurs in high priority doc - { - $totalHi++; - $totalFreqHi+=$freq*$multiplier; - } - else // word occurs in low priority doc - { - $totalFreqLo+=$freq*$multiplier; - } - } - // read name and url info for the doc - for ($i=0;$i<$numDocs;$i++) - { - fseek($file,$docInfo[$i]["idx"]); - $docInfo[$i]["name"]=readString($file); - $docInfo[$i]["url"]=readString($file); - } - $statInfo["docs"]=$docInfo; - } - $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi; - for ($count=$start;$count<sizeof($statsList);$count++) - { - $statInfo = &$statsList[$count]; - $multiplier = 1; - // whole word matches have a double weight - if ($statInfo["full"]) $multiplier=2; - for ($i=0;$i<sizeof($statInfo["docs"]);$i++) - { - $docInfo = &$statInfo["docs"]; - // compute frequency rank of the word in each doc - $freq=$docInfo[$i]["freq"]; - if ($docInfo[$i]["hi"]) - { - $statInfo["docs"][$i]["rank"]= - (float)($freq*$multiplier+$totalFreqLo)/$totalFreq; - } - else - { - $statInfo["docs"][$i]["rank"]= - (float)($freq*$multiplier)/$totalFreq; - } - } - } - } - } - return $statsList; -} - -function combine_results($results,&$docs) -{ - foreach ($results as $wordInfo) - { - $docsList = &$wordInfo["docs"]; - foreach ($docsList as $di) - { - $key=$di["url"]; - $rank=$di["rank"]; - if (in_array($key, array_keys($docs))) - { - $docs[$key]["rank"]+=$rank; - } - else - { - $docs[$key] = array("url"=>$key, - "name"=>$di["name"], - "rank"=>$rank - ); - } - $docs[$key]["words"][] = array( - "word"=>$wordInfo["word"], - "match"=>$wordInfo["match"], - "freq"=>$di["freq"] - ); - } - } - return $docs; -} - -function filter_results($docs,&$requiredWords,&$forbiddenWords) -{ - $filteredDocs=array(); - while (list ($key, $val) = each ($docs)) - { - $words = &$docs[$key]["words"]; - $copy=1; // copy entry by default - if (sizeof($requiredWords)>0) - { - foreach ($requiredWords as $reqWord) - { - $found=0; - foreach ($words as $wordInfo) - { - $found = $wordInfo["word"]==$reqWord; - if ($found) break; - } - if (!$found) - { - $copy=0; // document contains none of the required words - break; - } - } - } - if (sizeof($forbiddenWords)>0) - { - foreach ($words as $wordInfo) - { - if (in_array($wordInfo["word"],$forbiddenWords)) - { - $copy=0; // document contains a forbidden word - break; - } - } - } - if ($copy) $filteredDocs[$key]=$docs[$key]; - } - return $filteredDocs; -} - -function compare_rank($a,$b) -{ - if ($a["rank"] == $b["rank"]) - { - return 0; - } - return ($a["rank"]>$b["rank"]) ? -1 : 1; -} - -function sort_results($docs,&$sorted) -{ - $sorted = $docs; - usort($sorted,"compare_rank"); - return $sorted; -} - -function report_results(&$docs) -{ - echo "<table cellspacing=\"2\">\n"; - echo " <tr>\n"; - echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n"; - echo " </tr>\n"; - $numDocs = sizeof($docs); - if ($numDocs==0) - { - echo " <tr>\n"; - echo " <td colspan=\"2\">".matches_text(0)."</td>\n"; - echo " </tr>\n"; - } - else - { - echo " <tr>\n"; - echo " <td colspan=\"2\">".matches_text($numDocs); - echo "\n"; - echo " </td>\n"; - echo " </tr>\n"; - $num=1; - foreach ($docs as $doc) - { - echo " <tr>\n"; - echo " <td align=\"right\">$num.</td>"; - echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n"; - echo " <tr>\n"; - echo " <td></td><td class=\"tiny\">".report_matches()." "; - foreach ($doc["words"] as $wordInfo) - { - $word = $wordInfo["word"]; - $matchRight = substr($wordInfo["match"],strlen($word)); - echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") "; - } - echo " </td>\n"; - echo " </tr>\n"; - $num++; - } - } - echo "</table>\n"; -} - -function main() -{ - if(strcmp('4.1.0', phpversion()) > 0) - { - die("Error: PHP version 4.1.0 or above required!"); - } - if (!($file=fopen("search.idx","rb"))) - { - die("Error: Search index file could NOT be opened!"); - } - if (readHeader($file)!="DOXS") - { - die("Error: Header of index file is invalid!"); - } - $query=""; - if (array_key_exists("query", $_GET)) - { - $query=$_GET["query"]; - } - end_form(ereg_replace("[^[:alnum:]:\\.\\t ]", " ", $query )); - echo " \n<div class=\"searchresults\">\n"; - $results = array(); - $requiredWords = array(); - $forbiddenWords = array(); - $foundWords = array(); - $word=strtok($query," "); - while ($word) // for each word in the search query - { - if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; } - if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; } - if (!in_array($word,$foundWords)) - { - $foundWords[]=$word; - search($file,strtolower($word),$results); - } - $word=strtok(" "); - } - $docs = array(); - combine_results($results,$docs); - // filter out documents with forbidden word or that do not contain - // required words - $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords); - // sort the results based on rank - $sorted = array(); - sort_results($filteredDocs,$sorted); - // report results to the user - report_results($sorted); - echo "</div>\n"; - fclose($file); -} - -main(); - |