Hey guys tüm bitmiş (proje için gerekli olan standart kadar) dışında birden fazla kelime aramak için edememek, burada temel bir test dosyası arama motoru var. Bu yolu ive yaptım mümkün olup olmadığını Im sadece merak?
İşte ne ive yapılır:
<?php
// Calculate the start time of page loading
$starttime = microtime();
$startarray = explode(" ", $starttime);
$starttime = $startarray[1] + $startarray[0];
// Check to see if a search word has been submitted
if(isset($_GET['search']))
{
$searchWord = $_GET['search'];
}
else
{
$searchWord = null;
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Untitled Document</title>
<link href="style.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="wrapper">
<div id="searchbar">
<h1>PHP Search</h1>
<form name='searchform' id='searchform' action='<?php echo $_SERVER['PHP_SELF']; ?>' method='get'>
<input type='text' name='search' id='search' value='<?php echo $_GET['search']; ?>' />
<input type='submit' value='Search' />
</form>
<br />
<br />
</div><!-- close searchbar -->
<?php
// Includes external php file that holds list of common/stop words
include "commonwords.php";
// Initialise index array
$index = array();
// Initialise words array
$words = array();
// Initialise number of docs (in directory) array
$num_docs = 0;
// All files with a .txt extension
foreach (glob("./files/*.txt") as $filename) {
// Includes the file based on the include_path
$content = file_get_contents($filename, true);
// Used to replace all illegal characters and spaces etc
$pat[0] = "/^\s+/";
$pat[1] = "/\s{2,}/";
$pat[2] = "/\s+\$/";
$rep[0] = "";
$rep[1] = " ";
$rep[2] = "";
$new_content = preg_replace("/[^A-Za-z0-9\s\s+]/", "", $content);
$new_content = preg_replace($pat, $rep, $new_content);
$new_content = strtolower($new_content);
preg_match_all('/\S+/',$new_content,$matches,PREG_SET_ORDER);
// For each word found in new_content, add it to the words array
// If already added, increment the number of times it appears
foreach ($matches as $match) {
if (!isset($words[$filename][$match[0]]))
$words[$filename][$match[0]]=0;
$words[$filename][$match[0]]++;
}
// Compare list of words to stop words and remove results
foreach ($commonWords as $value)
if (isset($words[$filename][$value]))
unset($words[$filename][$value]);
// Total amount of words in each file
$totalCount = count($words[$filename]);
// And another item to the list
$index[] = array(
'filename' => $filename,
'word' => $words[$filename],
'all_words_count' => $totalCount
);
// Increment the number of documents by 1
$num_docs++;
}
// Check if search has been performed, then continue
//if(isset($_GET['search']))
if(!is_null($searchWord))
{
// Initialise number of results
$num_results = 0;
// Check index array for search term
foreach ($index as $result) {
if (array_key_exists($searchWord,$result['word']))
{
// Add results to results array
$results[] = array(
'filename' => $result['filename'],
'word' => $searchWord,
'count' => $result['word'][$searchWord],
'all_words_count' => $result['all_words_count'],
'TF' => (float) $result['word'][$searchWord]/$result['all_words_count']
);
// Increment the number of results
$num_results++;
}
}
//echo $num_results."<br>".$num_docs."<br>";
// Calculate the total number of documents over the number of documents containing search term
$DF = $num_docs / $num_results;
//echo $DF."<br>";
// Calculate the IDF by using log on the previous (DF) result
$IDF = (float) log10($DF);
//echo $IDF."<br>";
// Create new results array containing the TF-IDF score for each result
foreach($results as $result) {
$results2[] = array(
'filename' => $result['filename'],
'word' => $searchWord,
'count' => $result['count'],
'all_words_count' => $result['all_words_count'],
'TF' => (float) $result['word'][$searchWord]/$result['all_words_count'],
'TFIDF' => (float) $result['TF']*$IDF
);
}
// Echos the number of documents the search term was found in
echo "<br>Search term found in ".$num_results." documents.<br><br>";
// Used to get and order the new results array based on the TF-IDF score
foreach ($results2 as $key => $row) {
$count[$key] = $row['TFIDF'];
}
// Sort desc based on TF-IDF score
array_multisort($count, SORT_DESC, $results2);
// Echo out the new results array
foreach($results2 as $result) {
echo "Found ".$searchWord." in <a href='".$result['filename']."'>".$result['filename']."</a> ".$result['count']." times. Total words in file: ".$result['all_words_count'].". TF-IDF = ".$result['TFIDF']."<br>";
}
// Calculates the end time of the page loading and deductes the time taken to load page
$endtime = microtime();
$endarray = explode(" ", $endtime);
$endtime = $endarray[1] + $endarray[0];
$totaltime = $endtime - $starttime;
$totaltime = round($totaltime,5);
echo "<div id='timetaken'><p>This page loaded in $totaltime seconds.</p></div>";
}
?>
</div><!-- close wrapper -->
</body>
</html>