├── .gitignore ├── examples └── test.php ├── composer.json ├── src ├── HttpFuture.php ├── future.php ├── task.php └── TaskManager.php ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | -------------------------------------------------------------------------------- /examples/test.php: -------------------------------------------------------------------------------- 1 | fetch(); 10 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "calabashdoll/curl-future", 3 | "description": "Parallel CURL Requests with PHP", 4 | "type": "library", 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "leo", 9 | "email": "263068280@qq.com" 10 | } 11 | ], 12 | "autoload":{ 13 | "psr-4":{"CurlFuture\\":"src/"} 14 | }, 15 | "require": {} 16 | } 17 | -------------------------------------------------------------------------------- /src/HttpFuture.php: -------------------------------------------------------------------------------- 1 | addTask($url, $options); 26 | 27 | $this->callback = function($data)use($mt, $ch){ 28 | return $mt->fetch($ch); 29 | }; 30 | } 31 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 zhangyue 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/future.php: -------------------------------------------------------------------------------- 1 | callback = $callback; 21 | } 22 | 23 | /** 24 | * 链式执行的函数,避免大量回调,上一个future执行的结果会作为下一个future执行结果的参数来执行 25 | * @param @callback 一个可执行的函数 26 | * @author fang 27 | * @version 2015年11月25日09:52:00 28 | */ 29 | public function then($callback){ 30 | if($this->nextFuture){ 31 | $this->nextFuture->then($callback); 32 | }else{ 33 | $this->nextFuture = new self($callback); 34 | } 35 | 36 | return $this; 37 | } 38 | 39 | /** 40 | * future真正执行的方法,一直执行到future链到最后一个,并返回最后一个的执行结果 41 | * @param @input 初始输入参数 42 | * @author fang 43 | * @version 2015年11月25日09:52:00 44 | */ 45 | public function fetch($input = null){ 46 | $ret = call_user_func_array($this->callback, array($input)); 47 | if($this->nextFuture){ 48 | return $this->nextFuture->fetch($ret); 49 | }else{ 50 | return $ret; 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/task.php: -------------------------------------------------------------------------------- 1 | url = $url; 20 | $ch = curl_init(); 21 | 22 | 23 | $curlOptions = array( 24 | CURLOPT_TIMEOUT => 1, 25 | CURLOPT_RETURNTRANSFER => 1, 26 | CURLOPT_URL => $url, 27 | 28 | ); 29 | 30 | //这个地方需要合并cat的头信息 31 | $headers = isset($options['header'])?$options['header']:array(); 32 | $curlOptions[CURLOPT_HTTPHEADER] = $headers; 33 | 34 | if(isset($options['proxy_url']) && $options['proxy_url']){ 35 | $curlOptions[CURLOPT_PROXY] = $options['proxy_url']; 36 | } 37 | 38 | //设置超时时间 39 | $timeout = isset($options['timeout']) ? $options['timeout'] : 1; 40 | if($timeout<1){ 41 | $curlOptions[CURLOPT_TIMEOUT_MS] = intval($timeout * 1000); 42 | $curlOptions[CURLOPT_NOSIGNAL] = 1; 43 | }else{ 44 | $curlOptions[CURLOPT_TIMEOUT] = $timeout; 45 | } 46 | 47 | // 如果需要post数据 48 | if (isset($options['post_data']) && $options['post_data']) { 49 | $curlOptions[CURLOPT_POST] = true; 50 | 51 | curl_setopt($ch, CURLOPT_POST, true); 52 | $postData = $options['post_data']; 53 | if (is_array($options['post_data'])) { 54 | $postData = http_build_query($options['post_data']); 55 | } 56 | $curlOptions[CURLOPT_POSTFIELDS] = $postData; 57 | } 58 | 59 | curl_setopt_array($ch, $curlOptions); 60 | 61 | $this->ch = $ch; 62 | } 63 | 64 | 65 | /** 66 | * 请求完成后调用,可以在这个函数里面加入日志与统计布点,返回http返回结果 67 | * @author fang 68 | * @version 2015年11月25日09:52:00 69 | * @return 成功string,失败false 70 | */ 71 | public function complete(){ 72 | return $this->getContent(); 73 | } 74 | 75 | 76 | /** 77 | * 如果curl已经完成,通过这个函数读取内容 78 | * @author fang 79 | * @version 2015年11月25日09:52:00 80 | * @return 成功string,失败false 81 | */ 82 | private function getContent(){ 83 | $error = curl_errno($this->ch); 84 | if($error !== 0){ 85 | return false; 86 | } 87 | 88 | return curl_multi_getcontent($this->ch); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/TaskManager.php: -------------------------------------------------------------------------------- 1 | multiHandle = curl_multi_init(); 28 | } 29 | 30 | function __destruct(){ 31 | curl_multi_close($this->multiHandle); 32 | } 33 | 34 | /** 35 | * 添加curl任务,options参考HttpFuture::__construct 36 | * @author fang 37 | * @version 2015年11月25日09:52:00 38 | * @return curl_handle 39 | */ 40 | public function addTask($url, $options){ 41 | $req = new Task($url, $options); 42 | $ch = $req->ch; 43 | 44 | $this->runningTasks[(int)$ch] = array( 45 | 'return' => false, 46 | 'req' => $req, 47 | 'ch' => $ch, 48 | ); 49 | 50 | curl_multi_add_handle($this->multiHandle, $ch); 51 | 52 | return $ch; 53 | } 54 | 55 | /** 56 | * 如果ch未完成,阻塞并且并行执行curl请求,直到对应ch完成,返回对应结果 57 | * @author fang 58 | * @version 2015年11月25日09:52:00 59 | * @return string 60 | */ 61 | public function fetch($ch){ 62 | $chKey = (int)$ch; 63 | $this->debug("fetch ".(int)$ch); 64 | 65 | //如果两个队列里面都没有,那么退出 66 | if(!array_key_exists($chKey, $this->runningTasks) && !array_key_exists($chKey, $this->finishedTasks) )return false; 67 | 68 | $active = 1; 69 | do{ 70 | //如果任务完成了,那么退出 71 | if(array_key_exists($chKey, $this->finishedTasks))break; 72 | 73 | //执行multiLoop,直到该任务完成 74 | $active = $this->multiLoop(); 75 | //如果执行出错,那么停止循环 76 | if($active === false)break; 77 | }while(1); 78 | 79 | return $this->finishTask($ch); 80 | } 81 | 82 | /** 83 | * 循环一次multi任务 84 | * @author fang 85 | * @version 2015年11月25日09:52:00 86 | * @return bool true:可以继续执行 false:已经循环结束,无法继续执行 87 | */ 88 | protected function multiLoop(){ 89 | //echo '.'; 90 | $active = 1; 91 | 92 | // fix for https://bugs.php.net/bug.php?id=63411 93 | // see https://github.com/petewarden/ParallelCurl/blob/master/parallelcurl.php 94 | // see http://blog.marchtea.com/archives/109 95 | while(curl_multi_exec($this->multiHandle, $active) === CURLM_CALL_MULTI_PERFORM); 96 | 97 | $ret = 0; 98 | //等待socket操作 99 | $ret = curl_multi_select($this->multiHandle, self::SELECT_TIMEOUT); 100 | 101 | //处理已经完成的句柄 102 | while ($info = curl_multi_info_read($this->multiHandle)) { 103 | $ch = $info['handle']; 104 | $this->debug('get content'.(int)$ch); 105 | 106 | $task = $this->runningTasks[(int)$ch]; 107 | $task['return'] = $task['req']->complete(); 108 | 109 | unset($this->runningTasks[(int)$ch]); 110 | $this->finishedTasks[(int)$ch] = $task; 111 | curl_multi_remove_handle($this->multiHandle, $ch); 112 | 113 | } 114 | 115 | return $active; 116 | } 117 | 118 | /** 119 | * 完成任务,执行任务回调 120 | * @author fang 121 | * @version 2015年11月25日09:52:00 122 | * @return mixed 输出该http请求的内容 123 | */ 124 | protected function finishTask($ch){ 125 | $this->debug("finishTask ".(int)$ch); 126 | 127 | $ch = (int)$ch; 128 | $task = $this->finishedTasks[$ch]; 129 | unset($this->finishedTasks[$ch]); 130 | return $task['return']; 131 | } 132 | 133 | protected function debug($s){ 134 | //echo time()." {$s}\n"; 135 | } 136 | 137 | static protected $instance; 138 | /** 139 | * 获得TaskManager单例 140 | * @author fang 141 | * @version 2015年11月25日09:52:00 142 | * @return TaskManager 143 | */ 144 | static public function getInstance(){ 145 | if(!self::$instance){ 146 | self::$instance = new self(); 147 | } 148 | return self::$instance; 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CurlFuture: PHP Curl并行轮转请求库 2 | 3 | multicurl系列方法是提高php请求后端http接口的一种途径。但是直接使用的话,存在几方面问题: 4 | 5 | - 部分版本的curl扩展有bug,需要用特定的方式来调用([Rolling cURL: PHP并发最佳实践](http://www.searchtb.com/2012/06/rolling-curl-best-practices.html)) 6 | - 网上流传的CurlRolling库都只支持前面加入,最后一并执行这种使用模式。而最理想的是随时加入,需要的时候从里面取出所需的结果,且不需等待其他请求返回 7 | - 为了提升效率,大部分库选择使用回调函数的方式来执行,对已有程序改造成本较高 8 | 9 | 为了解决这些问题,开发了CurlFuture库,实现了并行请求,先到先取,链式执行的特性。 10 | 11 | ## 应用场景 12 | 13 | 对于一些大型公司,PHP作为接口聚合层来使用,而接口通过HTTP协议给出。对于一些复杂的页面,可能需要请求几十个相互独立的接口, 14 | 如果使用并行模式,则可以极大的提升性能。 15 | 16 | ## 安装方法 17 | 引入入口php文件即可:`include __DIR__.'/curl_future.php';` 18 | 19 | ## 使用方法 20 | 21 | ```php 22 | /** 23 | * 获得一个延迟执行curl的类 24 | * @param $url 请求url地址 25 | * @param $options = array(), 26 | * header:头信息(Array), 27 | * proxy_url:代理服务器地址, 28 | * timeout:超时时间,可以小于1 29 | * post_data: string|array post数据 30 | * @return CurlFuture\HttpFuture 31 | */ 32 | function curl_future($url, $options = array()); 33 | 34 | echo curl_future("http://s.newhua.com/2015/1113/304528.shtml?4", array()) 35 | ->fetch(); 36 | ``` 37 | 38 | ## 并行请求的实例(async.php) 39 | 40 | ```php 41 | include __DIR__.'/curl_future.php'; 42 | 43 | $f4 = curl_future("http://s.newhua.com/2015/1113/304528.shtml?4"); 44 | $f5 = curl_future("http://s.newhua.com/2015/1113/304528.shtml?5"); 45 | 46 | echo strlen($f1->fetch()); //这个地方会并行执行 47 | echo "\n"; 48 | echo strlen($f2->fetch()); 49 | echo "\n"; 50 | ``` 51 | 52 | ## 链式执行的示例(then.php) 53 | 54 | ```php 55 | include __DIR__.'/curl_future.php'; 56 | 57 | echo curl_future("http://s.newhua.com/2015/1113/304528.shtml") 58 | ->then(function($data){ 59 | return strlen($data); 60 | }) 61 | ->then(function($len){ 62 | return "Length: $len"; 63 | }) 64 | ->fetch(); 65 | ``` 66 | 67 | ## 和Model/Service结合的示例(model.php) 68 | 69 | ```php 70 | include __DIR__.'/curl_future.php'; 71 | 72 | class BookModel{ 73 | //接口串行调用的示例,通过then函数将处理过程串联起来 74 | static public function getTitleFuture($id){ 75 | return curl_future("http://111.202.7.252/{$id}") 76 | ->then(function($data){ 77 | return strlen($data); 78 | }) 79 | ->then(function($data){ 80 | $url = "http://111.202.7.252/{$data}"; 81 | $html = curl_future($url)->fetch(); 82 | preg_match('/title(.+?)\/title/is', $html, $matches); 83 | return $matches[1]; 84 | }); 85 | } 86 | 87 | //普通接口调用+后续处理的示例 88 | static public function getContentFuture($id){ 89 | return curl_future("http://111.202.7.252/{$id}") 90 | ->then(function($data){ 91 | return substr($data, 0, 100); 92 | }); 93 | } 94 | } 95 | 96 | //多个请求并行发出示例,这个地方用Model封装起来,便于和不同框架相结合 97 | $t1 = BookModel::getTitleFuture('111'); 98 | $t2 = BookModel::getTitleFuture('222'); 99 | $t3 = BookModel::getTitleFuture('333'); 100 | 101 | $c1 = BookModel::getContentFuture('111'); 102 | $c2 = BookModel::getContentFuture('222'); 103 | $c3 = BookModel::getContentFuture('333'); 104 | 105 | //fetch函数会阻塞住,这个地方会把所有队列里面的请求发出,直到需要获取的t1的请求执行完再返回 106 | var_dump($t1->fetch()); 107 | //由于上个fetch已经阻塞过了,下面的这个fetch很可能无需阻塞直接返回,也有可能上面的fetch没有执行完,此处阻塞住继续执行请求,直到拿到t2的数据 108 | var_dump($t2->fetch()); 109 | var_dump($c3->fetch()); 110 | ``` 111 | 112 | ## 原理 113 | 114 | 在每次fetch的时候,开始事件循环。当所需http返回后,结束循环。继续执行php逻辑。 115 | ```php 116 | //task_manager.php 117 | public function fetch($ch){ 118 | $chKey = (int)$ch; 119 | 120 | //如果两个队列里面都没有,那么退出 121 | if(!array_key_exists($chKey, $this->runningTasks) && !array_key_exists($chKey, $this->finishedTasks) )return false; 122 | 123 | $active = 1; 124 | do{ 125 | //如果任务完成了,那么退出 126 | if(array_key_exists($chKey, $this->finishedTasks))break; 127 | 128 | //执行multiLoop,直到该任务完成 129 | $active = $this->multiLoop(); 130 | //如果执行出错,那么停止循环 131 | if($active === false)break; 132 | }while(1); 133 | 134 | return $this->finishTask($ch); 135 | } 136 | ``` 137 | 138 | ## 性能测试 139 | 140 | 请求本机接口200次,nginx默认页面,同步、异步与file_get_contents对比 141 | 142 | /example/bench.php 143 | 144 | curl_future sync:384 ms 145 | file_get_contents:390 ms 146 | curl_futhre async:68 ms 147 | 148 | curl_future sync:624 ms 149 | file_get_contents:460 ms 150 | curl_futhre async:69 ms 151 | 152 | curl_future sync:463 ms 153 | file_get_contents:355 ms 154 | curl_futhre async:70 ms 155 | 156 | curl_future sync:447 ms 157 | file_get_contents:409 ms 158 | curl_futhre async:66 ms 159 | 160 | 同步方式没有file_get_contents稳定,但是异步批量方式性能提升很明显。 161 | 162 | ## 参考项目 163 | 164 | - [Client URL Library](http://www.php.net/manual/en/book.curl.php) 165 | - [Parallel CURL Requests with PHP](http://blog.rob.cx/multi-curl) 166 | - [A more efficient multi-curl library for PHP (non-blocking)](http://code.google.com/p/rolling-curl/) 167 | - [PHP: Parallel cURL Performance](http://stackoverflow.com/questions/10485199/php-parallel-curl-performance-rollingcurl-vs-parallelcurl) --------------------------------------------------------------------------------