diff options
Diffstat (limited to 'src/search_functions_php.h')
-rw-r--r-- | src/search_functions_php.h | 374 |
1 files changed, 374 insertions, 0 deletions
diff --git a/src/search_functions_php.h b/src/search_functions_php.h new file mode 100644 index 0000000..27c722c --- /dev/null +++ b/src/search_functions_php.h @@ -0,0 +1,374 @@ +"<script language=\"PHP\">\n" +"require_once \"search-config.php\";\n" +"\n" +"function end_form($value)\n" +"{\n" +" global $config;\n" +" global $translator;\n" +" if ($config['DISABLE_INDEX'] == false)\n" +" {\n" +" echo \" <input type=\\\"text\\\" id=\\\"MSearchField\\\" name=\\\"query\\\" value=\\\"$value\\\" size=\\\"20\\\" accesskey=\\\"S\\\" onfocus=\\\"searchBox.OnSearchFieldFocus(true)\\\" onblur=\\\"searchBox.OnSearchFieldFocus(false)\\\"/>\\n </form>\\n </div><div class=\\\"right\\\"></div>\\n </div>\\n </li>\\n </ul>\\n </div>\\n</div>\\n\";\n" +" }\n" +" if ($config['GENERATE_TREEVIEW'])\n" +" {\n" +" echo $translator['split_bar'];\n" +" }\n" +"}\n" +"\n" +"function end_page()\n" +"{\n" +" global $config;\n" +" global $translator;\n" +" if ($config['GENERATE_TREEVIEW'])\n" +" {\n" +" echo \"</div>\\n<div id=\\\"nav-path\\\" class=\\\"navpath\\\">\\n <ul>\\n <li class=\\\"footer\\\">\";\n" +" echo $translator['logo'];\n" +" echo \"</li>\\n </ul>\\n</div>\";\n" +" }\n" +" echo \"</body></html>\";\n" +"}\n" +"\n" +"function search_results()\n" +"{\n" +" global $translator;\n" +" return $translator['search_results_title'];\n" +"}\n" +"\n" +"function matches_text($num)\n" +"{\n" +" global $translator;\n" +" $string = $translator['search_results'][($num>2)?2:$num];\n" +" // The eval is used so that translator strings can contain $num.\n" +" eval(\"\\$result = \\\"$string\\\";\");\n" +" return $result;\n" +"}\n" +"\n" +"function report_matches()\n" +"{\n" +" global $translator;\n" +" return $translator['search_matches'];\n" +"}\n" +"\n" +"function readInt($file)\n" +"{\n" +" $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));\n" +" $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));\n" +" return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;\n" +"}\n" +"\n" +"function readString($file)\n" +"{\n" +" $result=\"\";\n" +" while (ord($c=fgetc($file))) $result.=$c;\n" +" return $result;\n" +"}\n" +"\n" +"function readHeader($file)\n" +"{\n" +" $header =fgetc($file); $header.=fgetc($file);\n" +" $header.=fgetc($file); $header.=fgetc($file);\n" +" return $header;\n" +"}\n" +"\n" +"function computeIndex($word)\n" +"{\n" +" // Simple hashing that allows for substring search\n" +" if (strlen($word)<2) return -1;\n" +" // high char of the index\n" +" $hi = ord($word{0});\n" +" if ($hi==0) return -1;\n" +" // low char of the index\n" +" $lo = ord($word{1});\n" +" if ($lo==0) return -1;\n" +" // return index\n" +" return $hi*256+$lo;\n" +"}\n" +"\n" +"function search($file,$word,&$statsList)\n" +"{\n" +" $index = computeIndex($word);\n" +" if ($index!=-1) // found a valid index\n" +" {\n" +" fseek($file,$index*4+4); // 4 bytes per entry, skip header\n" +" $index = readInt($file);\n" +" if ($index) // found words matching the hash key\n" +" {\n" +" $start=sizeof($statsList);\n" +" $count=$start;\n" +" fseek($file,$index);\n" +" $w = readString($file);\n" +" while ($w)\n" +" {\n" +" $statIdx = readInt($file);\n" +" if ($word==substr($w,0,strlen($word)))\n" +" { // found word that matches (as substring)\n" +" $statsList[$count++]=array(\n" +" \"word\"=>$word,\n" +" \"match\"=>$w,\n" +" \"index\"=>$statIdx,\n" +" \"full\"=>strlen($w)==strlen($word),\n" +" \"docs\"=>array()\n" +" );\n" +" }\n" +" $w = readString($file);\n" +" }\n" +" $totalHi=0;\n" +" $totalFreqHi=0;\n" +" $totalFreqLo=0;\n" +" for ($count=$start;$count<sizeof($statsList);$count++)\n" +" {\n" +" $statInfo = &$statsList[$count];\n" +" $multiplier = 1;\n" +" // whole word matches have a double weight\n" +" if ($statInfo[\"full\"]) $multiplier=2;\n" +" fseek($file,$statInfo[\"index\"]); \n" +" $numDocs = readInt($file);\n" +" $docInfo = array();\n" +" // read docs info + occurrence frequency of the word\n" +" for ($i=0;$i<$numDocs;$i++)\n" +" {\n" +" $idx=readInt($file); \n" +" $freq=readInt($file); \n" +" $docInfo[$i]=array(\"idx\" => $idx,\n" +" \"freq\" => $freq>>1,\n" +" \"rank\" => 0.0,\n" +" \"hi\" => $freq&1\n" +" );\n" +" if ($freq&1) // word occurs in high priority doc\n" +" {\n" +" $totalHi++;\n" +" $totalFreqHi+=$freq*$multiplier;\n" +" }\n" +" else // word occurs in low priority doc\n" +" {\n" +" $totalFreqLo+=$freq*$multiplier;\n" +" }\n" +" }\n" +" // read name and url info for the doc\n" +" for ($i=0;$i<$numDocs;$i++)\n" +" {\n" +" fseek($file,$docInfo[$i][\"idx\"]);\n" +" $docInfo[$i][\"name\"]=readString($file);\n" +" $docInfo[$i][\"url\"]=readString($file);\n" +" }\n" +" $statInfo[\"docs\"]=$docInfo;\n" +" }\n" +" $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;\n" +" for ($count=$start;$count<sizeof($statsList);$count++)\n" +" {\n" +" $statInfo = &$statsList[$count];\n" +" $multiplier = 1;\n" +" // whole word matches have a double weight\n" +" if ($statInfo[\"full\"]) $multiplier=2;\n" +" for ($i=0;$i<sizeof($statInfo[\"docs\"]);$i++)\n" +" {\n" +" $docInfo = &$statInfo[\"docs\"];\n" +" // compute frequency rank of the word in each doc\n" +" $freq=$docInfo[$i][\"freq\"];\n" +" if ($docInfo[$i][\"hi\"])\n" +" {\n" +" $statInfo[\"docs\"][$i][\"rank\"]=\n" +" (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;\n" +" }\n" +" else\n" +" {\n" +" $statInfo[\"docs\"][$i][\"rank\"]=\n" +" (float)($freq*$multiplier)/$totalFreq;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" }\n" +" return $statsList;\n" +"}\n" +"\n" +"function combine_results($results,&$docs)\n" +"{\n" +" foreach ($results as $wordInfo)\n" +" {\n" +" $docsList = &$wordInfo[\"docs\"];\n" +" foreach ($docsList as $di)\n" +" {\n" +" $key=$di[\"url\"];\n" +" $rank=$di[\"rank\"];\n" +" if (isset($docs[$key]))\n" +" {\n" +" $docs[$key][\"rank\"]+=$rank;\n" +" }\n" +" else\n" +" {\n" +" $docs[$key] = array(\"url\"=>$key,\n" +" \"name\"=>$di[\"name\"],\n" +" \"rank\"=>$rank\n" +" );\n" +" }\n" +" $docs[$key][\"words\"][] = array(\n" +" \"word\"=>$wordInfo[\"word\"],\n" +" \"match\"=>$wordInfo[\"match\"],\n" +" \"freq\"=>$di[\"freq\"]\n" +" );\n" +" }\n" +" }\n" +" return $docs;\n" +"}\n" +"\n" +"function filter_results($docs,&$requiredWords,&$forbiddenWords)\n" +"{\n" +" $filteredDocs=array();\n" +" while (list ($key, $val) = each ($docs)) \n" +" {\n" +" $words = &$docs[$key][\"words\"];\n" +" $copy=1; // copy entry by default\n" +" if (sizeof($requiredWords)>0)\n" +" {\n" +" foreach ($requiredWords as $reqWord)\n" +" {\n" +" $found=0;\n" +" foreach ($words as $wordInfo)\n" +" { \n" +" $found = $wordInfo[\"word\"]==$reqWord;\n" +" if ($found) break;\n" +" }\n" +" if (!$found) \n" +" {\n" +" $copy=0; // document contains none of the required words\n" +" break;\n" +" }\n" +" }\n" +" }\n" +" if (sizeof($forbiddenWords)>0)\n" +" {\n" +" foreach ($words as $wordInfo)\n" +" {\n" +" if (in_array($wordInfo[\"word\"],$forbiddenWords))\n" +" {\n" +" $copy=0; // document contains a forbidden word\n" +" break;\n" +" }\n" +" }\n" +" }\n" +" if ($copy) $filteredDocs[$key]=$docs[$key];\n" +" }\n" +" return $filteredDocs;\n" +"}\n" +"\n" +"function compare_rank($a,$b)\n" +"{\n" +" if ($a[\"rank\"] == $b[\"rank\"]) \n" +" {\n" +" return 0;\n" +" }\n" +" return ($a[\"rank\"]>$b[\"rank\"]) ? -1 : 1; \n" +"}\n" +"\n" +"function sort_results($docs,&$sorted)\n" +"{\n" +" $sorted = $docs;\n" +" usort($sorted,\"compare_rank\");\n" +" return $sorted;\n" +"}\n" +"\n" +"function report_results(&$docs)\n" +"{\n" +" echo \"<div class=\\\"header\\\">\";\n" +" echo \" <div class=\\\"headertitle\\\">\\n\";\n" +" echo \" <h1>\".search_results().\"</h1>\\n\";\n" +" echo \" </div>\\n\";\n" +" echo \"</div>\\n\";\n" +" echo \"<div class=\\\"searchresults\\\">\\n\";\n" +" echo \"<table cellspacing=\\\"2\\\">\\n\";\n" +" $numDocs = sizeof($docs);\n" +" if ($numDocs==0)\n" +" {\n" +" echo \" <tr>\\n\";\n" +" echo \" <td colspan=\\\"2\\\">\".matches_text(0).\"</td>\\n\";\n" +" echo \" </tr>\\n\";\n" +" }\n" +" else\n" +" {\n" +" echo \" <tr>\\n\";\n" +" echo \" <td colspan=\\\"2\\\">\".matches_text($numDocs);\n" +" echo \"\\n\";\n" +" echo \" </td>\\n\";\n" +" echo \" </tr>\\n\";\n" +" $num=1;\n" +" foreach ($docs as $doc)\n" +" {\n" +" echo \" <tr>\\n\";\n" +" echo \" <td align=\\\"right\\\">$num.</td>\";\n" +" echo \"<td><a class=\\\"el\\\" href=\\\"\".$doc[\"url\"].\"\\\">\".$doc[\"name\"].\"</a></td>\\n\";\n" +" echo \" <tr>\\n\";\n" +" echo \" <td></td><td class=\\\"tiny\\\">\".report_matches().\" \";\n" +" foreach ($doc[\"words\"] as $wordInfo)\n" +" {\n" +" $word = $wordInfo[\"word\"];\n" +" $matchRight = substr($wordInfo[\"match\"],strlen($word));\n" +" echo \"<b>$word</b>$matchRight(\".$wordInfo[\"freq\"].\") \";\n" +" }\n" +" echo \" </td>\\n\";\n" +" echo \" </tr>\\n\";\n" +" $num++;\n" +" }\n" +" }\n" +" echo \"</table>\\n\";\n" +" echo \"</div>\\n\";\n" +"}\n" +"\n" +"function run_query($query)\n" +"{\n" +" if(strcmp('4.1.0', phpversion()) > 0) \n" +" {\n" +" die(\"Error: PHP version 4.1.0 or above required!\");\n" +" }\n" +" if (!($file=fopen(\"search/search.idx\",\"rb\"))) \n" +" {\n" +" die(\"Error: Search index file could NOT be opened!\");\n" +" }\n" +" if (readHeader($file)!=\"DOXS\")\n" +" {\n" +" die(\"Error: Header of index file is invalid!\");\n" +" }\n" +" $results = array();\n" +" $requiredWords = array();\n" +" $forbiddenWords = array();\n" +" $foundWords = array();\n" +" $word=strtok($query,\" \");\n" +" while ($word) // for each word in the search query\n" +" {\n" +" if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }\n" +" if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }\n" +" if (!in_array($word,$foundWords))\n" +" {\n" +" $foundWords[]=$word;\n" +" search($file,strtolower($word),$results);\n" +" }\n" +" $word=strtok(\" \");\n" +" }\n" +" fclose($file);\n" +" $docs = array();\n" +" combine_results($results,$docs);\n" +" // filter out documents with forbidden word or that do not contain\n" +" // required words\n" +" $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);\n" +" // sort the results based on rank\n" +" $sorted = array();\n" +" sort_results($filteredDocs,$sorted);\n" +" return $sorted;\n" +"}\n" +"\n" +"function main()\n" +"{\n" +" $query = \"\";\n" +" if (array_key_exists(\"query\", $_GET))\n" +" {\n" +" $query=$_GET[\"query\"];\n" +" }\n" +" $sorted = run_query($query);\n" +" // Now output the HTML stuff...\n" +" // End the HTML form\n" +" end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.]/i\", \" \", $query ));\n" +" // report results to the user\n" +" report_results($sorted);\n" +" end_page();\n" +"}\n" +"</script>\n" |