$url = "https://www.xxx.com";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_PROXY, '183.111.xxx.xxx');
curl_setopt($ch, CURLOPT_PROXYPORT, '9999');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_POST, 1);
$output = curl_exec($ch);
$errorinfo = curl_error($ch);
curl_close($ch);
话不多说,直接上代码 $url = "https://www.xxx.com"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); //请求url地址 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //设置获取页面内容 curl_setopt($ch, CURLOPT_HEADER, 0); //不设置头部 curl_setopt($ch, CURLOPT_PROXY, '
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $requestUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
public function spider($url){
$ch =
curl_init(); //
Curl 初始化
$timeout = 30; // 超时时间:30s
$
ip="192.168.1.101";
$ua='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'; // 伪造抓取 UA
curl_setopt($ch,
CURLOPT_URL, $url); // 设置
Curl 目标
curl_setopt($ch,
CURLOPT_RETURNTRANSFER, 1); //
Curl 请求有返回的值
curl_setopt($ch,
CURLOPT_CONNECTTIMEOUT, $timeout); // 设置抓取超时时间
curl_setopt($ch,
CURLOPT_FOLLOWLOCATION, 1); // 跟踪重定向
curl_setopt($ch,
CURLOPT_ENCODING, ""); // 设置编码
curl_setopt($ch,
CURLOPT_REFERER, $url); // 伪造来源网址
curl_setopt($ch,
CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:'.$
ip, 'CLIENT-
IP:'.$
ip)); //伪造
IP
curl_setopt($ch,
CURLOPT_USERAGENT, $ua); // 伪造ua
curl_setopt($ch,
CURLOPT_ENCODING, 'gz
ip'); // 取消gz
ip压缩
curl_setopt($ch,
CURLOPT_SSL_VERIFYPEER, FALSE); // https
请求 不验证证书和hosts
curl_setopt($ch,
CURLOPT_SSL_VERIFYHOST, FALSE);
$content =
curl_exec($ch);
curl_close($ch); // 结束
Curl
return $content; // 函数返回内容
分享一个php脚本,使用代理ip来访问网页,方便抓取数据什么的~
什么情况下会用到代理IP?比如你要抓取一个网站数据,该网站有100万条内容,他们做了IP限制,每个IP每小时只能抓1000条,如果单个IP去抓因为受限,需要40天左右才能采集完,如果用了代理IP,不停的切换IP,就可以突破每小时1000条的频率限制,从而提高效率。
脚本开始:
<?php
$gourl = "[https://www.juwf.cn/xy/286062883](https://www.juwf.cn/xy/2860628
'sleep' => 120 ,
'random_ua' => true ,
'
curl_request' => "'http://www.google.com' -X POST -H 'Origin: http://www.google.com'" ,
'
curl_continue' => function () { return true ; },
'
curl_continue_per_
ip' => function ( $ resp ) {
return !! preg_matc