1. This site uses cookies. By continuing to use this site, you are agreeing to our use of cookies. Learn More.

PPV SE Scraper

Discussion in 'Pay Per View' started by StackingDough, Dec 30, 2009.

  1. StackingDough

    StackingDough Newbie

    Joined:
    Dec 30, 2009
    Messages:
    12
    Likes Received:
    41
    Occupation:
    Makin' RAIN
    Modify to fit your own needs (proxies, url length, pages scraped etc), enjoy.


    Code:
       <?php
    class scraper
    {
        var $ch;
        var $result;
        
        function __construct(){}
        
        private function init()
        {    
            $ch = curl_init();
            $this->ch = $ch;
            
            $agent = array(    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6',
                            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
                            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)',
                            'Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.1; .NET CLR 1.1.4322)',
                            'Opera/9.20 (Windows NT 6.0; U; en)',
                            'Opera/9.00 (Windows NT 5.1; U; en)',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.50',
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.0',
                            'Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Opera 7.02 [en]',
                            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1' );
                        
            shuffle($agent);
                    
            curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
            curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,1);
            curl_setopt($this->ch,CURLOPT_USERAGENT,$agent[0]);
            curl_setopt($this->ch,CURLOPT_TIMEOUT,10);
            curl_setopt($this->ch,CURLOPT_******SESSION,1);
            curl_setopt($this->ch,CURLOPT_SSL_VERIFYHOST,0);
            curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,0);
            
            return;
        }
        
        private function get($url)
        {
            curl_setopt($this->ch,CURLOPT_URL,$url);
            curl_setopt($this->ch,CURLOPT_POST,0);
            
            $s = curl_exec($this->ch);
    
            return $s;        
        }
        
        /* parse related */
        private function parse_all($source,$tag1,$tag2)
        {
            $source=str_replace($tag1,'<tiny:parse>',$source);
            $source=str_replace($tag2,'</tiny:parse>',$source);
            
            preg_match_all('#<tiny:parse>(.*?)</tiny:parse>#',$source,$result);
    
            return($result[1]);        
        }
        
        function go($keyword)
        {
            $this->init();
            
            $start = 0; while($start<200)
            {
                $s = $this->get('google.com/search?hl=en&q='.urlencode($keyword).'&start='.$start.'&sa=N');
                $urls = $this->parse_all($s,'<h3 class=r><a href="','" class=l>');
    
                if(is_array($urls) && count($urls)>0)
                {
                    foreach($urls as $url)
                    {
                        if(strlen($url)>40) $fin[] = $url;
                    }
                }
                
                $start = $start + 10;
            }
            
            array_unique($fin);
            
            foreach($fin as $result) echo $result.'<br />';
        }
    }
    
    $q = $_GET['q'];
    
    if(trim($q)=='')
    {
        echo 'You must provide a query';
    }else{
        $scr = new scraper();
        $scr->go($q);
    }
    ?> 
    
     
    • Thanks Thanks x 2
  2. fear91

    fear91 Regular Member

    Joined:
    Dec 14, 2007
    Messages:
    366
    Likes Received:
    88
    Good Share!
     
  3. fistuk

    fistuk Newbie

    Joined:
    Dec 2, 2008
    Messages:
    23
    Likes Received:
    68
    Thanks a lot for the share.

    Not that I care but why reinvent the wheel?
    Laser URL is such a great tool and free...
     
  4. terebl7

    terebl7 Newbie

    Joined:
    Jul 26, 2009
    Messages:
    11
    Likes Received:
    1
    how can i use it? thanks