├── ChangeLog ├── MultiCurl.class.php ├── README └── example.php /ChangeLog: -------------------------------------------------------------------------------- 1 | ChangeLog (MultiCurl class) 2 | ========================================= 3 | 4 | 2010-11-06 - version 1.07 5 | 6 | [*] Small improvements. 7 | 8 | 2008-01-04 - version 1.06 9 | 10 | [*] Improve closeSession method and fix session destruction logic. 11 | Therefore cookiejar works correctly now. Thanks to Mavook (ICQ 272990). 12 | 13 | 2007-08-22 - version 1.05 14 | 15 | [+] MultiCurl class is abstract now. 16 | [+] Add abstract method OnLoad to MultiCurl class. 17 | Required parameter $onLoadFunc is deleted from constructor now. 18 | [*] Change example file and move to ./examples directory. 19 | 20 | 2007-08-18 - version 1.04 21 | 22 | [*] Improve destruction logic. 23 | 24 | 2007-08-18 - version 1.03 25 | 26 | [*] Change source code documentation. 27 | [*] Change example file example.php. 28 | 29 | 2007-08-13 - version 1.02 30 | 31 | [+] Add destructor, it closes correctly all opened sessions. 32 | Thanks to Stanislav Malkin (from http://weblancer.net/users/ArtLab/). 33 | 34 | 2007-08-13 - version 1.01 35 | 36 | [+] Add possibility to set maximal size limit for downloaded content. 37 | [+] Add setter and getter for maxSessions parameter. 38 | [+] Add TODO text file. 39 | [*] Migrate to license LGPL 3.0. 40 | [*] Fix defect in function addUrl for arrays. 41 | Thanks to lyoxa (from http://weblancer.net/users/lyoxa/). 42 | 43 | 2007-08-12 - version 1.00 44 | 45 | [+] Add primary documentation. 46 | [*] Code refactoring, some minor changes. 47 | 48 | 2007-08-12 - version 0.02 49 | 50 | [+] Add possibility to setup common CURL options for all CURL multi sessions. 51 | [+] Setup separate CURL options for different CURL multi sessions. 52 | 53 | 2007-08-11 - version 0.01 54 | 55 | [+] Primary version. 56 | Thanks to Pavel Kudrin (from http://weblancer.net/users/pk2002/) for 57 | initial discussion. 58 | [+] Set query for CURL multi sessions. The main idea you can use only 59 | limited number of parallel requests. If you add next request, and 60 | the query is fully filled, the script waits while one or some previous 61 | CURL multi sessions will be completed. 62 | [+] Set maximal size limit for downloaded content. Please note: it is 63 | possible to download rather more bytes than the limit because download 64 | operation uses internal buffer. 65 | [+] Set common CURL options for all requests. -------------------------------------------------------------------------------- /MultiCurl.class.php: -------------------------------------------------------------------------------- 1 | http://weblancer.net/users/tvv/ 19 | * @copyright 2007-2010 Vadym Timofeyev 20 | * @license http://www.gnu.org/licenses/lgpl-3.0.txt 21 | * @version 1.07 22 | * @since PHP 5.0 23 | * @example examples/example.php How to use MultiCurl class library. 24 | */ 25 | abstract class MultiCurl { 26 | /** 27 | * Maximal number of CURL multi sessions. Default: 10 sessions. 28 | * 29 | * @var integer 30 | */ 31 | private $maxSessions = 10; 32 | 33 | /** 34 | * Maximal size of downloaded content. Default: 10 Mb (10 * 1024 * 1024). 35 | * 36 | * @var integer 37 | */ 38 | private $maxSize = 10485760; 39 | 40 | /** 41 | * Common CURL options (used for all requests). 42 | * 43 | * @var array 44 | */ 45 | private $curlOptions; 46 | 47 | /** 48 | * Current CURL multi sessions. 49 | * 50 | * @var array 51 | */ 52 | private $sessions = array(); 53 | 54 | /** 55 | * Class constructor. Setup primary parameters. 56 | * 57 | * @param array $curlOptions Common CURL options. 58 | */ 59 | public function __construct($curlOptions = array()) { 60 | if(empty($curlOptions)) 61 | { 62 | $header[] = "Accept: */*"; 63 | $header[] = "Cache-Control: max-age=0"; 64 | $header[] = "Accept-Charset: utf-8;q=0.7,*;q=0.7"; 65 | $header[] = "Accept-Language: en-us,en;q=0.5"; 66 | $header[] = "Pragma: "; 67 | 68 | $curlOptions=array( 69 | CURLOPT_HEADER => true, 70 | CURLOPT_HTTPHEADER => $header, 71 | CURLOPT_USERAGENT => 'Googlebot/2.1 (+http://www.google.com/bot.html)', 72 | CURLOPT_CONNECTTIMEOUT => 20, 73 | CURLOPT_TIMEOUT => 10 74 | ); 75 | } 76 | $this->setCurlOptions($curlOptions); 77 | } 78 | 79 | /** 80 | * Class destructor. Close opened sessions. 81 | */ 82 | public function __destruct() { 83 | foreach ($this->sessions as $i => $sess) { 84 | $this->destroySession($i); 85 | } 86 | } 87 | 88 | /** 89 | * Adds new URL to query. 90 | * 91 | * @param mixed $url URL for downloading. 92 | * @param array $curlOptions CURL options for current request. 93 | */ 94 | public function addUrl($url, $curlOptions = array()) { 95 | // Check URL 96 | if (!$url) { 97 | throw new Exception('URL is empty!'); 98 | } 99 | 100 | // Check array of URLs 101 | if (is_array($url)) { 102 | foreach ($url as $s) { 103 | $this->addUrl($s, $curlOptions); 104 | } 105 | return; 106 | } 107 | 108 | // Check query 109 | while (count($this->sessions) == $this->maxSessions) { 110 | $this->checkSessions(); 111 | } 112 | 113 | // Init new CURL session 114 | $ch = curl_init($url); 115 | foreach ($this->curlOptions as $option => $value) { 116 | curl_setopt($ch, $option, $value); 117 | } 118 | foreach ($curlOptions as $option => $value) { 119 | curl_setopt($ch, $option, $value); 120 | } 121 | 122 | // Init new CURL multi session 123 | $mh = curl_multi_init(); 124 | curl_multi_add_handle($mh, $ch); 125 | $this->sessions[] = array($mh, $ch, $url); 126 | $sessions_key = array_keys($this->sessions); 127 | $this->execSession(array_pop($sessions_key)); 128 | } 129 | 130 | /** 131 | * Waits CURL milti sessions. 132 | */ 133 | public function wait() { 134 | while (count($this->sessions)) { 135 | $this->checkSessions(); 136 | } 137 | } 138 | 139 | /** 140 | * Executes all active CURL multi sessions. 141 | */ 142 | protected function checkSessions() { 143 | foreach ($this->sessions as $i => $sess) { 144 | if ($this->multiSelect($sess[0]) != -1) { 145 | $this->execSession($i); 146 | } 147 | else { 148 | throw new Exception('Multicurl loop detected!'); 149 | } 150 | } 151 | } 152 | 153 | /** 154 | * Executes CURL multi session, check session status and downloaded size. 155 | * 156 | * @param integer $i A session id. 157 | */ 158 | protected function execSession($i) { 159 | list($mh, $ch) = $this->sessions[$i]; 160 | if ($mh) { 161 | do { 162 | $mrc = curl_multi_exec($mh, $act); 163 | } while ($act > 0); 164 | if (!$act || $mrc !== CURLM_OK || curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD) >= $this->maxSize) { 165 | $this->closeSession($i); 166 | } 167 | } 168 | } 169 | 170 | /** 171 | * Replace curl_multi_select. 172 | * 173 | * @see http://php.net/manual/en/function.curl-multi-select.php#110869 174 | * @param resource $mh A cURL multi handle returned by curl_multi_init(). 175 | * @param float $timeout Time, in seconds, to wait for a response. 176 | */ 177 | protected function multiSelect($mh, $timeout = 1.0) { 178 | $ts = microtime(true); 179 | 180 | do { 181 | $mrc = curl_multi_exec($mh, $act); 182 | $ct = microtime(true); 183 | $t = $ct - $ts; 184 | if ($t >= $timeout) { 185 | return CURLM_CALL_MULTI_PERFORM; 186 | } 187 | } while ($mrc == CURLM_CALL_MULTI_PERFORM); 188 | } 189 | 190 | /** 191 | * Closes session. 192 | * 193 | * @param integer $i A session id. 194 | */ 195 | protected function closeSession($i) { 196 | list(, $ch, $url) = $this->sessions[$i]; 197 | 198 | $content = !curl_error($ch) ? curl_multi_getcontent($ch) : null; 199 | $info = curl_getinfo($ch); 200 | $this->destroySession($i); 201 | $this->onLoad($url, $content, $info); 202 | } 203 | 204 | /** 205 | * Destroys session. 206 | * 207 | * @param integer $i A session id. 208 | */ 209 | protected function destroySession($i) { 210 | list($mh, $ch,) = $this->sessions[$i]; 211 | 212 | curl_multi_remove_handle($mh, $ch); 213 | curl_close($ch); 214 | curl_multi_close($mh); 215 | 216 | unset($this->sessions[$i]); 217 | } 218 | 219 | /** 220 | * Gets maximal number of CURL multi sessions. 221 | * 222 | * @return integer Maximal number of CURL multi sessions. 223 | */ 224 | public function getMaxSessions() { 225 | return $this->maxSessions; 226 | } 227 | 228 | /** 229 | * Sets maximal number of CURL multi sessions. 230 | * 231 | * @param integer $maxSessions Maximal number of CURL multi sessions. 232 | */ 233 | public function setMaxSessions($maxSessions) { 234 | if ((int)$maxSessions <= 0) { 235 | throw new Exception('Max sessions number must be bigger then zero!'); 236 | } 237 | 238 | $this->maxSessions = (int)$maxSessions; 239 | } 240 | 241 | /** 242 | * Gets maximal size limit for downloaded content. 243 | * 244 | * @return integer Maximal size limit for downloaded content. 245 | */ 246 | public function getMaxSize() { 247 | return $this->maxSize; 248 | } 249 | 250 | /** 251 | * Sets maximal size limit for downloaded content. 252 | * 253 | * @param integer $maxSize Maximal size limit for downloaded content. 254 | */ 255 | public function setMaxSize($maxSize) { 256 | if ((int)$maxSize <= 0) { 257 | throw new Exception('Max size limit must be bigger then zero!'); 258 | } 259 | 260 | $this->maxSize = (int)$maxSize; 261 | } 262 | 263 | /** 264 | * Gets CURL options for all requests. 265 | * 266 | * @return array CURL options. 267 | */ 268 | public function getCurlOptions() { 269 | return $this->curlOptions; 270 | } 271 | 272 | /** 273 | * Sets CURL options for all requests. 274 | * 275 | * @param array $curlOptions CURL options. 276 | */ 277 | public function setCurlOptions($curlOptions) { 278 | if (!array_key_exists(CURLOPT_FOLLOWLOCATION, $curlOptions)) { 279 | $curlOptions[CURLOPT_FOLLOWLOCATION] = 1; 280 | } 281 | $curlOptions[CURLOPT_RETURNTRANSFER] = 1; 282 | $this->curlOptions = $curlOptions; 283 | } 284 | 285 | /** 286 | * OnLoad callback event. 287 | * 288 | * @param string $url URL for downloading. 289 | * @param string $content Downloaded content. 290 | * @param array $info CURL session information. 291 | */ 292 | protected abstract function onLoad($url, $content, $info); 293 | 294 | /** 295 | * Checks CURL extension, etc. 296 | */ 297 | public static function checkEnvironment() { 298 | if (!extension_loaded('curl')) { 299 | throw new Exception('CURL extension not loaded'); 300 | } 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README (MultiCurl class) 2 | ========================================= 3 | 4 | NAME: MultiCurl class 5 | 6 | VERSION: 1.07 7 | 8 | AUTHOR: Vadym Timofeyev http://weblancer.net/users/tvv/ 9 | 10 | DESCRIPTION: 11 | 12 | MultiCurl class provides a convenient way to execute parallel HTTP(S) 13 | requests via PHP MULTI CURL extension with additional restrictions. 14 | For example: start 100 downloads with 2 parallel sessions, and get only 15 | first 100 Kb per session. 16 | 17 | Supported features: 18 | 19 | - Set query for CURL multi sessions. The main idea you can use only 20 | limited number of parallel requests. If you add next request and 21 | the query is fully filled the script waits while one or some previous 22 | CURL multi sessions will be completed. 23 | 24 | - Set maximal size limit for downloaded content. Please note: it is 25 | possible to download rather more bytes than the limit because download 26 | operation uses internal buffer. 27 | 28 | - Set common CURL options for all requests. 29 | 30 | - Set separate CURL options for different requests if it is necessary. 31 | 32 | SYNOPSIS: 33 | 34 | setMaxSessions(2); // limit 2 parallel sessions (by default 10) 47 | $mc->setMaxSize(10240); // limit 10 Kb per session (by default 10 Mb) 48 | $mc->addUrl('http://google.com'); 49 | $mc->addUrl('http://yahoo.com'); 50 | $mc->addUrl('http://altavista.com'); 51 | $mc->wait(); 52 | } catch (Exception $e) { 53 | die($e->getMessage()); 54 | } 55 | ?> 56 | 57 | COPYRIGHT: 58 | 59 | Copyright (c) 2007 Vadym Timofeyev. All rights reserved. 60 | This software is released under the GNU Lesser General Public License. 61 | Please read the disclaimer at the top of the MultiCurl.class.php file. 62 | -------------------------------------------------------------------------------- /example.php: -------------------------------------------------------------------------------- 1 | setMaxSessions(2); // limit 2 parallel sessions (by default 10) 14 | $mc->setMaxSize(10240); // limit 10 Kb per session (by default 10 Mb) 15 | $mc->addUrl('http://google.com'); 16 | $mc->addUrl('http://yahoo.com'); 17 | $mc->addUrl('http://altavista.com'); 18 | $mc->wait(); 19 | } catch (Exception $e) { 20 | die($e->getMessage()); 21 | } --------------------------------------------------------------------------------