class PhpSpiders {
public $useragent ; //user-agent
public $title ; // 标题
public $encoding ; //编码
public $status ; //状态码
public $url ;
public $text ; //内容
public $content ; //源代码
//规则
public $pattern_key = array (
‘title’ => ‘/<title>(\s*.*)<\/title>/isum’ , //获取title
‘descriptions’ => ‘/<meta +name=”[d|D]escription” +content=”(.*)” +\/>/’ , //获取描述
‘charset’ => ‘/charset=\”?([\w-]+)\”?/i’ , // 获取charset 编码
);
function __construct ( $url ){
$this -> url = $url ;
$this -> useragent = “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36” ;
}
function getData () {
$curl = curl_init ();
curl_setopt ( $curl , CURLOPT_AUTOREFERER , 1 );
curl_setopt ( $curl , CURLOPT_CONNECTTIMEOUT , 10 );
curl_setopt ( $curl , CURLOPT_RETURNTRANSFER , 1 ); //信息流形式返回数据
curl_setopt ( $curl , CURLOPT_USERAGENT , $this -> useragent );
curl_setopt ( $curl , CURLOPT_HTTP_VERSION , CURL_HTTP_VERSION_1_0 );
curl_setopt ( $curl , CURLOPT_SSL_VERIFYPEER , 0 ); //禁用后cURL将终止从服务端进行验证
curl_setopt ( $curl , CURLOPT_URL , $this -> url );
try {
$this -> content = curl_exec ( $curl );
curl_close ( $curl );
} catch ( Exception $e ) {
echo $e . getMessage ();
return false ;
}
}
function getEncoding (){ //获取编码
if ( $this -> content ) {
preg_match ( $this -> pattern_key [ ‘charset’ ], $this -> content , $this -> encoding );
return $this -> encoding [ 1 ];
}
}
function getTitle () {
$this -> getData ();
preg_match ( $this -> pattern_key [ ‘title’ ], $this -> content , $keyContent );
return $keyContent [ 1 ];
}
}
$url = “http://php.net/manual/en/function.curl-setopt.php” ;
$spider = new PhpSpiders ( $url );
$spider -> getData ();
print_r ( $spider -> getTitle ());