CURL抓取網頁內容並用正則提取。

守護大白菜發表於2017-06-05

    <?php  
    header("Content-Type:text/html;charset=UTF-8");  
    /* 
     * CURL網頁抓取 
     * */  
    class Curl{  
        var $setopt;  
        var $data;  
        function __construct($url){  
            $this->setopt =array(  
        CURLOPT_URL => "$url",  
        CURLOPT_RETURNTRANSFER => true,  
        CURLOPT_FOLLOWLOCATION => true,  
    );  
        }  
        function exec(){  
            $ch = curl_init();  
            curl_setopt_array($ch,$this->setopt);  
            $this->data = curl_exec($ch);  
            curl_close($ch);  
            return $this->data;  
        }  
    };  
    /* 
     * 抓取回來的網頁進行正則查詢 
     * id是按ID查詢內容 
     * tagName是標籤查詢 
     * className按類名查詢*/  
    class Preg{  
        function id($data,$id){  
            preg_match('/<(.*)\s*id=.*('.$id.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);  
            return $str[0];  
        }  
          
        function tagName($data,$tag){  
            preg_match('/<'.$tag.'.*>\s*(.*)\s*<\/'.$tag.'>/',$data,$str);  
            return $str[1];  
        }  
          
        function className($data,$class){  
            preg_match('/<(.*)\s*class=.*('.$class.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);  
            return $str[0];  
        }  
          
    }  
      
    $c = new Curl('http://www.baidu.com');  
    $data = $c->exec();  
    $data = @iconv("gb2312", "utf-8",$data);  
    $preg = new Preg();  
    echo $preg->tagName($data,'div');  

轉自:    http://blog.csdn.net/qq435792305/article/details/8502027




相關文章