Modify to fit your own needs (proxies, url length, pages scraped etc), enjoy.
Code:
<?php
class scraper
{
var $ch;
var $result;
function __construct(){}
private function init()
{
$ch = curl_init();
$this->ch = $ch;
$agent = array( 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
'Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.1; .NET CLR 1.1.4322)',
'Opera/9.20 (Windows NT 6.0; U; en)',
'Opera/9.00 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.50',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.0',
'Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Opera 7.02 [en]',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1' );
shuffle($agent);
curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,1);
curl_setopt($this->ch,CURLOPT_USERAGENT,$agent[0]);
curl_setopt($this->ch,CURLOPT_TIMEOUT,10);
curl_setopt($this->ch,CURLOPT_******SESSION,1);
curl_setopt($this->ch,CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,0);
return;
}
private function get($url)
{
curl_setopt($this->ch,CURLOPT_URL,$url);
curl_setopt($this->ch,CURLOPT_POST,0);
$s = curl_exec($this->ch);
return $s;
}
/* parse related */
private function parse_all($source,$tag1,$tag2)
{
$source=str_replace($tag1,'<tiny:parse>',$source);
$source=str_replace($tag2,'</tiny:parse>',$source);
preg_match_all('#<tiny:parse>(.*?)</tiny:parse>#',$source,$result);
return($result[1]);
}
function go($keyword)
{
$this->init();
$start = 0; while($start<200)
{
$s = $this->get('google.com/search?hl=en&q='.urlencode($keyword).'&start='.$start.'&sa=N');
$urls = $this->parse_all($s,'<h3 class=r><a href="','" class=l>');
if(is_array($urls) && count($urls)>0)
{
foreach($urls as $url)
{
if(strlen($url)>40) $fin[] = $url;
}
}
$start = $start + 10;
}
array_unique($fin);
foreach($fin as $result) echo $result.'<br />';
}
}
$q = $_GET['q'];
if(trim($q)=='')
{
echo 'You must provide a query';
}else{
$scr = new scraper();
$scr->go($q);
}
?>