diff options
Diffstat (limited to 'src/search_php.h')
-rw-r--r-- | src/search_php.h | 324 |
1 files changed, 0 insertions, 324 deletions
diff --git a/src/search_php.h b/src/search_php.h deleted file mode 100644 index afbfc74..0000000 --- a/src/search_php.h +++ /dev/null @@ -1,324 +0,0 @@ -"function readInt($file)\n" -"{\n" -" $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));\n" -" $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));\n" -" return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;\n" -"}\n" -"\n" -"function readString($file)\n" -"{\n" -" $result=\"\";\n" -" while (ord($c=fgetc($file))) $result.=$c;\n" -" return $result;\n" -"}\n" -"\n" -"function readHeader($file)\n" -"{\n" -" $header =fgetc($file); $header.=fgetc($file);\n" -" $header.=fgetc($file); $header.=fgetc($file);\n" -" return $header;\n" -"}\n" -"\n" -"function computeIndex($word)\n" -"{\n" -" // Fast string hashing\n" -" //$lword = strtolower($word);\n" -" //$l = strlen($lword);\n" -" //for ($i=0;$i<$l;$i++)\n" -" //{\n" -" // $c = ord($lword{$i});\n" -" // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;\n" -" //}\n" -" //return $v;\n" -"\n" -" // Simple hashing that allows for substring search\n" -" if (strlen($word)<2) return -1;\n" -" // high char of the index\n" -" $hi = ord($word{0});\n" -" if ($hi==0) return -1;\n" -" // low char of the index\n" -" $lo = ord($word{1});\n" -" if ($lo==0) return -1;\n" -" // return index\n" -" return $hi*256+$lo;\n" -"}\n" -"\n" -"function search($file,$word,&$statsList)\n" -"{\n" -" $index = computeIndex($word);\n" -" if ($index!=-1) // found a valid index\n" -" {\n" -" fseek($file,$index*4+4); // 4 bytes per entry, skip header\n" -" $index = readInt($file);\n" -" if ($index) // found words matching the hash key\n" -" {\n" -" $start=sizeof($statsList);\n" -" $count=$start;\n" -" fseek($file,$index);\n" -" $w = readString($file);\n" -" while ($w)\n" -" {\n" -" $statIdx = readInt($file);\n" -" if ($word==substr($w,0,strlen($word)))\n" -" { // found word that matches (as substring)\n" -" $statsList[$count++]=array(\n" -" \"word\"=>$word,\n" -" \"match\"=>$w,\n" -" \"index\"=>$statIdx,\n" -" \"full\"=>strlen($w)==strlen($word),\n" -" \"docs\"=>array()\n" -" );\n" -" }\n" -" $w = readString($file);\n" -" }\n" -" $totalHi=0;\n" -" $totalFreqHi=0;\n" -" $totalFreqLo=0;\n" -" for ($count=$start;$count<sizeof($statsList);$count++)\n" -" {\n" -" $statInfo = &$statsList[$count];\n" -" $multiplier = 1;\n" -" // whole word matches have a double weight\n" -" if ($statInfo[\"full\"]) $multiplier=2;\n" -" fseek($file,$statInfo[\"index\"]); \n" -" $numDocs = readInt($file);\n" -" $docInfo = array();\n" -" // read docs info + occurrence frequency of the word\n" -" for ($i=0;$i<$numDocs;$i++)\n" -" {\n" -" $idx=readInt($file); \n" -" $freq=readInt($file); \n" -" $docInfo[$i]=array(\"idx\" => $idx,\n" -" \"freq\" => $freq>>1,\n" -" \"rank\" => 0.0,\n" -" \"hi\" => $freq&1\n" -" );\n" -" if ($freq&1) // word occurs in high priority doc\n" -" {\n" -" $totalHi++;\n" -" $totalFreqHi+=$freq*$multiplier;\n" -" }\n" -" else // word occurs in low priority doc\n" -" {\n" -" $totalFreqLo+=$freq*$multiplier;\n" -" }\n" -" }\n" -" // read name and url info for the doc\n" -" for ($i=0;$i<$numDocs;$i++)\n" -" {\n" -" fseek($file,$docInfo[$i][\"idx\"]);\n" -" $docInfo[$i][\"name\"]=readString($file);\n" -" $docInfo[$i][\"url\"]=readString($file);\n" -" }\n" -" $statInfo[\"docs\"]=$docInfo;\n" -" }\n" -" $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;\n" -" for ($count=$start;$count<sizeof($statsList);$count++)\n" -" {\n" -" $statInfo = &$statsList[$count];\n" -" $multiplier = 1;\n" -" // whole word matches have a double weight\n" -" if ($statInfo[\"full\"]) $multiplier=2;\n" -" for ($i=0;$i<sizeof($statInfo[\"docs\"]);$i++)\n" -" {\n" -" $docInfo = &$statInfo[\"docs\"];\n" -" // compute frequency rank of the word in each doc\n" -" $freq=$docInfo[$i][\"freq\"];\n" -" if ($docInfo[$i][\"hi\"])\n" -" {\n" -" $statInfo[\"docs\"][$i][\"rank\"]=\n" -" (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;\n" -" }\n" -" else\n" -" {\n" -" $statInfo[\"docs\"][$i][\"rank\"]=\n" -" (float)($freq*$multiplier)/$totalFreq;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" }\n" -" return $statsList;\n" -"}\n" -"\n" -"function combine_results($results,&$docs)\n" -"{\n" -" foreach ($results as $wordInfo)\n" -" {\n" -" $docsList = &$wordInfo[\"docs\"];\n" -" foreach ($docsList as $di)\n" -" {\n" -" $key=$di[\"url\"];\n" -" $rank=$di[\"rank\"];\n" -" if (in_array($key, array_keys($docs)))\n" -" {\n" -" $docs[$key][\"rank\"]+=$rank;\n" -" }\n" -" else\n" -" {\n" -" $docs[$key] = array(\"url\"=>$key,\n" -" \"name\"=>$di[\"name\"],\n" -" \"rank\"=>$rank\n" -" );\n" -" }\n" -" $docs[$key][\"words\"][] = array(\n" -" \"word\"=>$wordInfo[\"word\"],\n" -" \"match\"=>$wordInfo[\"match\"],\n" -" \"freq\"=>$di[\"freq\"]\n" -" );\n" -" }\n" -" }\n" -" return $docs;\n" -"}\n" -"\n" -"function filter_results($docs,&$requiredWords,&$forbiddenWords)\n" -"{\n" -" $filteredDocs=array();\n" -" while (list ($key, $val) = each ($docs)) \n" -" {\n" -" $words = &$docs[$key][\"words\"];\n" -" $copy=1; // copy entry by default\n" -" if (sizeof($requiredWords)>0)\n" -" {\n" -" foreach ($requiredWords as $reqWord)\n" -" {\n" -" $found=0;\n" -" foreach ($words as $wordInfo)\n" -" { \n" -" $found = $wordInfo[\"word\"]==$reqWord;\n" -" if ($found) break;\n" -" }\n" -" if (!$found) \n" -" {\n" -" $copy=0; // document contains none of the required words\n" -" break;\n" -" }\n" -" }\n" -" }\n" -" if (sizeof($forbiddenWords)>0)\n" -" {\n" -" foreach ($words as $wordInfo)\n" -" {\n" -" if (in_array($wordInfo[\"word\"],$forbiddenWords))\n" -" {\n" -" $copy=0; // document contains a forbidden word\n" -" break;\n" -" }\n" -" }\n" -" }\n" -" if ($copy) $filteredDocs[$key]=$docs[$key];\n" -" }\n" -" return $filteredDocs;\n" -"}\n" -"\n" -"function compare_rank($a,$b)\n" -"{\n" -" if ($a[\"rank\"] == $b[\"rank\"]) \n" -" {\n" -" return 0;\n" -" }\n" -" return ($a[\"rank\"]>$b[\"rank\"]) ? -1 : 1; \n" -"}\n" -"\n" -"function sort_results($docs,&$sorted)\n" -"{\n" -" $sorted = $docs;\n" -" usort($sorted,\"compare_rank\");\n" -" return $sorted;\n" -"}\n" -"\n" -"function report_results(&$docs)\n" -"{\n" -" echo \"<table cellspacing=\\\"2\\\">\\n\";\n" -" echo \" <tr>\\n\";\n" -" echo \" <td colspan=\\\"2\\\"><h2>\".search_results().\"</h2></td>\\n\";\n" -" echo \" </tr>\\n\";\n" -" $numDocs = sizeof($docs);\n" -" if ($numDocs==0)\n" -" {\n" -" echo \" <tr>\\n\";\n" -" echo \" <td colspan=\\\"2\\\">\".matches_text(0).\"</td>\\n\";\n" -" echo \" </tr>\\n\";\n" -" }\n" -" else\n" -" {\n" -" echo \" <tr>\\n\";\n" -" echo \" <td colspan=\\\"2\\\">\".matches_text($numDocs);\n" -" echo \"\\n\";\n" -" echo \" </td>\\n\";\n" -" echo \" </tr>\\n\";\n" -" $num=1;\n" -" foreach ($docs as $doc)\n" -" {\n" -" echo \" <tr>\\n\";\n" -" echo \" <td align=\\\"right\\\">$num.</td>\";\n" -" echo \"<td><a class=\\\"el\\\" href=\\\"\".$doc[\"url\"].\"\\\">\".$doc[\"name\"].\"</a></td>\\n\";\n" -" echo \" <tr>\\n\";\n" -" echo \" <td></td><td class=\\\"tiny\\\">\".report_matches().\" \";\n" -" foreach ($doc[\"words\"] as $wordInfo)\n" -" {\n" -" $word = $wordInfo[\"word\"];\n" -" $matchRight = substr($wordInfo[\"match\"],strlen($word));\n" -" echo \"<b>$word</b>$matchRight(\".$wordInfo[\"freq\"].\") \";\n" -" }\n" -" echo \" </td>\\n\";\n" -" echo \" </tr>\\n\";\n" -" $num++;\n" -" }\n" -" }\n" -" echo \"</table>\\n\";\n" -"}\n" -"\n" -"function main()\n" -"{\n" -" if(strcmp('4.1.0', phpversion()) > 0) \n" -" {\n" -" die(\"Error: PHP version 4.1.0 or above required!\");\n" -" }\n" -" if (!($file=fopen(\"search.idx\",\"rb\"))) \n" -" {\n" -" die(\"Error: Search index file could NOT be opened!\");\n" -" }\n" -" if (readHeader($file)!=\"DOXS\")\n" -" {\n" -" die(\"Error: Header of index file is invalid!\");\n" -" }\n" -" $query=\"\";\n" -" if (array_key_exists(\"query\", $_GET))\n" -" {\n" -" $query=$_GET[\"query\"];\n" -" }\n" -" end_form(ereg_replace(\"[^[:alnum:]:\\\\.\\\\t ]\", \" \", $query ));\n" -" echo \" \\n<div class=\\\"searchresults\\\">\\n\";\n" -" $results = array();\n" -" $requiredWords = array();\n" -" $forbiddenWords = array();\n" -" $foundWords = array();\n" -" $word=strtok($query,\" \");\n" -" while ($word) // for each word in the search query\n" -" {\n" -" if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }\n" -" if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }\n" -" if (!in_array($word,$foundWords))\n" -" {\n" -" $foundWords[]=$word;\n" -" search($file,strtolower($word),$results);\n" -" }\n" -" $word=strtok(\" \");\n" -" }\n" -" $docs = array();\n" -" combine_results($results,$docs);\n" -" // filter out documents with forbidden word or that do not contain\n" -" // required words\n" -" $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);\n" -" // sort the results based on rank\n" -" $sorted = array();\n" -" sort_results($filteredDocs,$sorted);\n" -" // report results to the user\n" -" report_results($sorted);\n" -" echo \"</div>\\n\";\n" -" fclose($file);\n" -"}\n" -"\n" -"main();\n" -"\n" |