PPV SE Scraper

Discussion in 'Pay Per View' started by StackingDough, Dec 30, 2009.

  1. StackingDough

    StackingDough Newbie

    Joined:
    Dec 30, 2009
    Messages:
    12
    Likes Received:
    41
    Occupation:
    Makin' RAIN
    Modify to fit your own needs (proxies, url length, pages scraped etc), enjoy.


    Code:
       <?php
    class scraper
    {
        var $ch;
        var $result;
        
        function __construct(){}
        
        private function init()
        {    
            $ch = curl_init();
            $this->ch = $ch;
            
            $agent = array(    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6',
                            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
                            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
                            'Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.1; .NET CLR 1.1.4322)',
                            'Opera/9.20 (Windows NT 6.0; U; en)',
                            'Opera/9.00 (Windows NT 5.1; U; en)',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.50',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.0',
                            'Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Opera 7.02 [en]',
                            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1' );
                        
            shuffle($agent);
                    
            curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
            curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,1);
            curl_setopt($this->ch,CURLOPT_USERAGENT,$agent[0]);
            curl_setopt($this->ch,CURLOPT_TIMEOUT,10);
            curl_setopt($this->ch,CURLOPT_******SESSION,1);
            curl_setopt($this->ch,CURLOPT_SSL_VERIFYHOST,0);
            curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,0);
            
            return;
        }
        
        private function get($url)
        {
            curl_setopt($this->ch,CURLOPT_URL,$url);
            curl_setopt($this->ch,CURLOPT_POST,0);
            
            $s = curl_exec($this->ch);
    
            return $s;        
        }
        
        /* parse related */
        private function parse_all($source,$tag1,$tag2)
        {
            $source=str_replace($tag1,'<tiny:parse>',$source);
            $source=str_replace($tag2,'</tiny:parse>',$source);
            
            preg_match_all('#<tiny:parse>(.*?)</tiny:parse>#',$source,$result);
    
            return($result[1]);        
        }
        
        function go($keyword)
        {
            $this->init();
            
            $start = 0; while($start<200)
            {
                $s = $this->get('google.com/search?hl=en&q='.urlencode($keyword).'&start='.$start.'&sa=N');
                $urls = $this->parse_all($s,'<h3 class=r><a href="','" class=l>');
    
                if(is_array($urls) && count($urls)>0)
                {
                    foreach($urls as $url)
                    {
                        if(strlen($url)>40) $fin[] = $url;
                    }
                }
                
                $start = $start + 10;
            }
            
            array_unique($fin);
            
            foreach($fin as $result) echo $result.'<br />';
        }
    }
    
    $q = $_GET['q'];
    
    if(trim($q)=='')
    {
        echo 'You must provide a query';
    }else{
        $scr = new scraper();
        $scr->go($q);
    }
    ?> 
    
     
    • Thanks Thanks x 2
  2. fear91

    fear91 Regular Member

    Joined:
    Dec 14, 2007
    Messages:
    366
    Likes Received:
    88
    Good Share!
     
  3. fistuk

    fistuk Newbie

    Joined:
    Dec 2, 2008
    Messages:
    23
    Likes Received:
    68
    Thanks a lot for the share.

    Not that I care but why reinvent the wheel?
    Laser URL is such a great tool and free...
     
  4. terebl7

    terebl7 Newbie

    Joined:
    Jul 26, 2009
    Messages:
    11
    Likes Received:
    1
    how can i use it? thanks