├── README.md └── dynamic_ip.php /README.md: -------------------------------------------------------------------------------- 1 | # 爬虫动态防屏蔽之动态IP更新策略 2 | 3 | 本程序为爬虫抓取相关数据,若被屏蔽动态更换IP的解决方案,程序为PHP版本。 4 | 5 | DEMO程序可用! -------------------------------------------------------------------------------- /dynamic_ip.php: -------------------------------------------------------------------------------- 1 | 6 | * @version v1.0 7 | * @copyright https://hizdm.cn 8 | */ 9 | class dynamic{ 10 | public function __construct() { 11 | // $this->mysql = new pdomysql(); 12 | } 13 | 14 | /** 15 | * 测试网络(有问题) 16 | */ 17 | public function testNet() { 18 | $errNum = 0; // 采集错误数量 19 | 20 | for ($i=0; $i < 10; $i++) { 21 | $url = '测试网址'; 22 | 23 | $ch = curl_init(); 24 | curl_setopt($ch, CURLOPT_URL, $url); 25 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); 26 | curl_setopt($ch, CURLOPT_TIMEOUT, 30); 27 | curl_setopt($ch, CURLOPT_HTTPHEADER, array( 28 | 'Accept: ext/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 29 | 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36', 30 | 'Connection: Keep-Alive') 31 | ); 32 | 33 | ob_start(); 34 | ob_get_clean(); 35 | 36 | ob_start(); 37 | $out = curl_exec($ch); 38 | $output = ob_get_contents(); 39 | ob_end_clean(); 40 | curl_close($ch); 41 | 42 | // 采集内容分析 43 | } 44 | echo $aa = "Step1:ERROR-number: $errNum \n"; 45 | 46 | // 大于3次默认网络被屏蔽,尝试重启路由器(数量可自由定义) 47 | if ($errNum > 3) { 48 | echo $bb = "Step2:Reboot Router Begin! \n"; 49 | $reset_result = $this->resetIp(); 50 | if ( ! empty($reset_result) && ($reset_result == 'reset-ok')) { 51 | echo $cc = "Reboot Router Success! \n"; 52 | echo $dd = "Step3:Ping Networks Begin! \n"; 53 | for ($j=1; $j < 4; $j++) { 54 | sleep(10); 55 | $pingRes = $this->pingIp(); 56 | echo $ee = "Ping Networks $j \n"; 57 | if ($pingRes != 'no') { 58 | echo $ff = "Ping Networks Success! \n"; 59 | echo $gg = "Step4:Collect Begin! \n"; 60 | $onlineip = $this->getIp(); 61 | $file = 'operate.log'; 62 | $handle = fopen($file, 'a'); 63 | $content = $aa . $bb . $cc . $dd . $ee . $ff . $gg . date('Y-m-d H:i:s') . '-' . $onlineip . "\n"; 64 | fwrite($handle, $content); 65 | fclose($handle); 66 | $this->crawl(); 67 | break; 68 | } 69 | 70 | if ($j == 3) { 71 | $dynamic_obj = new dynamic(); 72 | $dynamic_obj->testNet(); 73 | exit; 74 | } 75 | } 76 | } 77 | } 78 | } 79 | 80 | /** 81 | * 重启路由器 82 | * @return [type] [description] 83 | */ 84 | private function resetIp() { 85 | $username = '路由器用户名'; 86 | $password = '路由器密码'; 87 | $ch = curl_init(); 88 | curl_setopt($ch, CURLOPT_URL, '路由器重启地址'); 89 | curl_setopt($ch, CURLOPT_USERPWD, "$username:$password"); 90 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 91 | curl_setopt($ch, CURLOPT_TIMEOUT, 70); 92 | curl_exec($ch); 93 | curl_close($ch); 94 | return 'reset-ok'; 95 | } 96 | 97 | /** 98 | * 测试网络是否正常 99 | * @return [type] [description] 100 | */ 101 | private function pingIp() { 102 | $this->getIp(); 103 | $ip = '211.149.206.77'; // IP地址 104 | if (PATH_SEPARATOR==':') { 105 | // linux 106 | exec("ping $ip -c 4",$info); 107 | if (count($info) < 9) { 108 | return 'no'; 109 | } 110 | // 获取ping的时间 111 | $str = $info[count($info)-1]; 112 | return round(substr($str, strpos($str,'/',strpos($str,'='))+1 , 4)); 113 | } 114 | else { 115 | // windows 116 | exec("ping $ip -n 4",$info); 117 | if (count($info) < 10) { 118 | return 'no'; 119 | } 120 | // 获取ping的时间 121 | $str = $info[count($info)-1]; 122 | return substr($str, strripos($str,'=')+1); 123 | } 124 | } 125 | 126 | /** 127 | * 获取当前IP 128 | */ 129 | private function getIp() { 130 | return file_get_content("http://myip.ipip.net/s",false); 131 | } 132 | 133 | /** 134 | * 开始采集 135 | */ 136 | private function crawl() { 137 | // 采集相关操作逻辑 138 | } 139 | } 140 | 141 | $dynamic_obj = new dynamic(); 142 | $dynamic_obj->testNet(); 143 | --------------------------------------------------------------------------------