Hogging Bellek benim DOMDocument / DOMXPath PHP Senaryo tutun?

0 Cevap

Bu komut bir forumda belli bağlantıları tarama ve kullanıcı adı, sonrası tarihi ve sonrası sayı ayıklamak için yapılmıştır.

Bu harika çalışıyor, tek sorun bu bellek domuzu ve yaklaşık bir buçuk saat sonra önemli ölçüde yavaşlar olmasıdır.

Herkes bunu hızlandırmak için bir öneriniz var mı? Ben senaryoyu başlatmak için benim sunucuda bir WGET çalışan oldum.

Thanks, Nick

   <?
//this php script is going to download pages and tear them apart from ###

/*
Here's the process:

1. prepare url 
2. get new HTML document from the web
3. extract xpath data
4. input in mysql database
*/


$baseURL="http://www.###.com";

//end viewtopic.php?p=357850
for ($post = 325479; $post <= 357850; $post++) {

//connect to mysql
if (!mysql_connect('localhost','###','###')) echo mysql_error;
mysql_select_db('###');

//check to see if the post is already indexed
$result = mysql_query("SELECT postnumber FROM ### WHERE postnumber = '$post'");
if (mysql_num_rows($result) > 0) {
    //echo "Already in the database." . "<br>";
    mysql_close();
    continue;
}

$url=$baseURL."/viewtopic.php?p=".$post;
//echo $url."<br>";

//get new HTML document
$html = new DOMDocument(); 
$html->loadHTMLFile($url);

$xpath = new DOMXpath($html);

//select the page elements that you want
//I want the parent of the TD class = forumRow
$links = $xpath->query( "//td[@class='forumRow']/parent::tr" ); 

    foreach($links as $results){
        $newDom = new DOMDocument;
        $newDom->appendChild($newDom->importNode($results,true));

        $xpath = new DOMXpath ($newDom);

        //which parts of the selection do you want?
        $time_stamp = trim($xpath->query("//td[2]/table/tr/td/span")->item(0)->nodeValue);
        $user_name = trim($xpath->query("//a[@class='genmed']")->item(0)->nodeValue);
        $post_number = trim($xpath->query("//td/a/@name")->item(0)->nodeValue);

        $return[] = array(
            'time_stamp' => $time_stamp,
            'username' => $user_name,
            'post_number' => $post_number,
            );
    }

    foreach ($return as $output) {
        if (strlen($output['time_stamp']) > 0 && strlen($output['username']) > 0) 
          {
          //$timestamp = substr($output['time_stamp'],8,25);
          //echo $timestamp . "<br>";
          //$unixtimestamp = strtotime($timestamp);
          //echo $unixtimestamp;
          //echo $output['time_stamp']."<br>";
          preg_match("/[Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec]{3} \d{1,2}[,] \d{4} \d{1,2}[:]\d{2}/", $output['time_stamp'],$matches). "<br>";
          $unixtimestamp = strtotime($matches[0]);

          //YYYY-MM-DD HH:MM:SS
          $phpdate=date("Y-m-d H:i:s",$unixtimestamp);
          $username=$output['username'];
          $post_number=$output['post_number'];
          //echo $phpdate ." by ". $username . " #" . $post_number ;

          $result = mysql_query("SELECT postnumber FROM ### WHERE postnumber = '$post_number'");
          if (mysql_num_rows($result) == 0) {         
            if (mysql_query("INSERT INTO ### VALUES('','$url','$username','$phpdate','$post_number')")) echo "Y";
            else echo "N";
            mysql_close();
          }
          echo "<br>";
          }
    }
}
?>

0 Cevap