├── .gitignore ├── composer.json ├── examples ├── arrays.php ├── queue_from_callback.php ├── prefix_and_append.php ├── simple_urls.php ├── objects.php └── next_url_callback.php ├── LICENSE ├── README.md └── Crimp.php /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | /composer.lock 3 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xpaw/crimp", 3 | "description": "A simple multi curl implementation, optimized for high concurrency.", 4 | "license": "MIT", 5 | "homepage": "https://github.com/xPaw/Crimp.php", 6 | "keywords": 7 | [ 8 | "curl", 9 | "async", 10 | "asynchronous", 11 | "multi", 12 | "parallel", 13 | "rolling", 14 | "guzzle" 15 | ], 16 | "require": 17 | { 18 | "php": ">=8.1", 19 | "ext-curl": "*" 20 | }, 21 | "require-dev": 22 | { 23 | "phpstan/phpstan": "^2.1" 24 | }, 25 | "scripts": 26 | { 27 | "test": "phpstan analyse Crimp.php examples --level max" 28 | }, 29 | "autoload": 30 | { 31 | "files": 32 | [ 33 | "Crimp.php" 34 | ] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /examples/arrays.php: -------------------------------------------------------------------------------- 1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 7 | 8 | // When queueing arrays, they must contain a `Url` key 9 | $Crimp->Add( [ 'Url' => 'https://example.com' ] ); 10 | 11 | $Crimp->Go(); 12 | 13 | /** @param array{Url: string} $Request */ 14 | function ArrayCallback( CurlHandle $Handle, string $Data, array $Request ) : void 15 | { 16 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title ); 17 | 18 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME ); 19 | 20 | printf( 21 | "%.4f | %-30s | %s (original: %s)\n", 22 | $Time, 23 | substr( $Title[ 1 ] ?? '', 0, 30 ), 24 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ), 25 | $Request[ 'Url' ] 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /examples/queue_from_callback.php: -------------------------------------------------------------------------------- 1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 7 | 8 | $Crimp->Add( [ 9 | 'Url' => 'https://example.com/?v=1', 10 | 'Count' => 1, 11 | ] ); 12 | 13 | $Crimp->Go(); 14 | 15 | // $Url here will be the original array that was passed to Add() 16 | /** @param array{Url: string, Count: int} $OriginalArray */ 17 | function QueueCallback( CurlHandle $Handle, string $Data, array $OriginalArray ) : void 18 | { 19 | /** @var Crimp $Crimp */ 20 | global $Crimp; 21 | 22 | echo curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ) . PHP_EOL; 23 | 24 | // Queue again 25 | if( $OriginalArray[ 'Count' ] < 4 ) 26 | { 27 | $NewCount = $OriginalArray[ 'Count' ] + 1; 28 | 29 | $Crimp->Add( [ 30 | 'Url' => 'https://example.com/?v=' . $NewCount, 31 | 'Count' => $NewCount, 32 | ] ); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /examples/prefix_and_append.php: -------------------------------------------------------------------------------- 1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 7 | 8 | // String to prepend to all URLs 9 | $Crimp->UrlPrefix = 'https://example.com/?v='; 10 | 11 | // String to append to all URLs 12 | $Crimp->UrlAppend = '&another=value'; 13 | 14 | // These two properties allow saving memory when queueing many requests 15 | $Crimp->Add( '1' ); 16 | $Crimp->Add( '2' ); 17 | $Crimp->Add( '3' ); 18 | $Crimp->Add( '4' ); 19 | 20 | $Crimp->Go(); 21 | 22 | function PrefixAppendCallback( CurlHandle $Handle, string $Data ) : void 23 | { 24 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title ); 25 | 26 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME ); 27 | 28 | printf( 29 | "%.4f | %-30s | %s\n", 30 | $Time, 31 | substr( $Title[ 1 ] ?? '', 0, 30 ), 32 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ) 33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Pavel Djundik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/simple_urls.php: -------------------------------------------------------------------------------- 1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 10 | 11 | $Crimp->Urls = 12 | [ 13 | 'https://cloudflare.com', 14 | 'https://news.ycombinator.com', 15 | 'https://www.google.com', 16 | 'https://www.yahoo.com', 17 | ]; 18 | 19 | $Crimp->Go(); 20 | 21 | $FinalTime = microtime( true ) - $StartTime; 22 | 23 | printf( "\nExecution time: %.4f\n", $FinalTime ); 24 | printf( "Total cURL time: %.4f\n", $TotalTime ); 25 | 26 | // $Url here will be the original string that was passed to $Crimp->Urls 27 | function SimpleUrlCallback( CurlHandle $Handle, string $Data, string $Url ) : void 28 | { 29 | /** @var float $TotalTime */ 30 | global $TotalTime; 31 | 32 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title ); 33 | 34 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME ); 35 | $TotalTime += $Time; 36 | 37 | printf( 38 | "%.4f | %-30s | %s (original: %s)\n", 39 | $Time, 40 | substr( $Title[ 1 ] ?? '', 0, 30 ), 41 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ), 42 | $Url 43 | ); 44 | } 45 | -------------------------------------------------------------------------------- /examples/objects.php: -------------------------------------------------------------------------------- 1 | Url = $url; 14 | } 15 | } 16 | 17 | $Crimp = new Crimp( 'ObjectCallback' ); 18 | $Crimp->CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 19 | 20 | $Crimp->Add( new RequestUrl( 'https://cloudflare.com' ) ); 21 | $Crimp->Add( new RequestUrl( 'https://news.ycombinator.com' ) ); 22 | $Crimp->Add( new RequestUrl( 'https://www.google.com' ) ); 23 | $Crimp->Add( new RequestUrl( 'https://www.yahoo.com' ) ); 24 | 25 | $Crimp->Go(); 26 | 27 | // $Url here will be the RequestUrl object that was queued 28 | function ObjectCallback( CurlHandle $Handle, string $Data, RequestUrl $Request ) : void 29 | { 30 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title ); 31 | 32 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME ); 33 | 34 | printf( 35 | "%.4f | %-30s | %s (original: %s)\n", 36 | $Time, 37 | substr( $Title[ 1 ] ?? '', 0, 30 ), 38 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ), 39 | $Request->Url 40 | ); 41 | } 42 | -------------------------------------------------------------------------------- /examples/next_url_callback.php: -------------------------------------------------------------------------------- 1 | NextUrlCallback = 'NextUrlCallback'; 9 | 10 | $Crimp->Add( '1' ); 11 | $Crimp->Add( '2' ); 12 | $Crimp->Add( '3' ); 13 | $Crimp->Add( '4' ); 14 | 15 | $Crimp->Go(); 16 | 17 | function NextUrlExampleCallback( CurlHandle $Handle, string $Data, string $Request ) : void 18 | { 19 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title ); 20 | 21 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME ); 22 | 23 | printf( 24 | "%.4f | %-30s | %s\n", 25 | $Time, 26 | substr( $Title[ 1 ] ?? '', 0, 30 ), 27 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ) 28 | ); 29 | } 30 | 31 | function NextUrlCallback( CurlHandle $Handle, string $Request ) : void 32 | { 33 | // Use this callback to set any unique options for the request, for example when rotating proxies 34 | // Do keep in mind that because cURL handles are rotated, if one option is set, 35 | // it must be (re)set for every request 36 | 37 | if( $Request === '3' ) 38 | { 39 | curl_setopt( $Handle, CURLOPT_URL, 'https://example.com/test/path' ); 40 | } 41 | else 42 | { 43 | curl_setopt( $Handle, CURLOPT_URL, 'https://example.com/?v=' . $Request ); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crimp [![Packagist](https://img.shields.io/packagist/dt/xpaw/crimp.svg)](https://packagist.org/packages/xpaw/crimp) 2 | 3 | A simple multi curl implementation, optimized for high concurrency. 4 | 5 | This is practically a bare bones implemention. 6 | Retrying, HTTP code checking and other stuff is up to the user. 7 | 8 | Usage: 9 | 10 | ```php 11 | $Crimp = new Crimp( function( CurlHandle $Handle, string $Data, $Request ) : void 12 | { 13 | // $Handle is the cURL handle 14 | // $Data is the content of a cURL handle 15 | // $Request is whatever was queued 16 | } ); 17 | 18 | // How many concurrent threads to use 19 | $Crimp->Threads = 10; 20 | 21 | // Set any curl option that are needed 22 | $Crimp->CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1; 23 | 24 | // Queue urls 25 | $Crimp->Add( 'https://example.com/?v=1' ); 26 | $Crimp->Add( 'https://example.com/?v=2' ); 27 | 28 | // Queue an array, it must contain a `Url` key 29 | $Crimp->Add( [ 'Url' => 'https://example.com/?v=3' ] ); 30 | 31 | // Queue an object, it must contain a `Url` property 32 | class RequestUrl { public string $Url; } 33 | $request = new RequestUrl(); 34 | $request->Url = 'https://example.com/?v=4'; 35 | $Crimp->Add( $request ); 36 | 37 | // Execute the requests 38 | $Crimp->Go(); 39 | ``` 40 | 41 | `CURLOPT_RETURNTRANSFER` is enabled by default. See [examples](examples/) folder for more. 42 | 43 | If you need a fully featured multi cURL implemention, take a look at 44 | [Zebra_cURL](https://github.com/stefangabos/Zebra_cURL) or [Guzzle](https://github.com/guzzle/guzzle) instead. 45 | -------------------------------------------------------------------------------- /Crimp.php: -------------------------------------------------------------------------------- 1 | Links to fetch. 38 | * 39 | * Any value in this array will be queued when `Go()` is called. 40 | * This array is a left over for simplicity. Consider using `Add()` method. 41 | */ 42 | public array $Urls = []; 43 | 44 | /** 45 | * @var callable(CurlHandle, string, T): void Callback to be called with the data of executed request 46 | * 47 | * Callback to be called with the data of executed request. 48 | */ 49 | public $Callback; 50 | 51 | /** 52 | * @var null|callable(CurlHandle, T): void Callback to be called on every executed url 53 | * 54 | * Callback to be called on every executed url. 55 | */ 56 | public $NextUrlCallback; 57 | 58 | /** 59 | * @var array cURL options to be set on each handle 60 | * 61 | * @see https://php.net/curl_setopt 62 | */ 63 | public array $CurlOptions = 64 | [ 65 | CURLOPT_RETURNTRANSFER => 1, // Always return a string instead of directly outputting. 66 | CURLOPT_ENCODING => '', // Empty string tells cURL to send a header containing all supported encoding types. 67 | CURLOPT_TIMEOUT => 30, 68 | CURLOPT_CONNECTTIMEOUT => 10, 69 | ]; 70 | 71 | /** @var SplQueue */ 72 | private readonly SplQueue $Queue; 73 | 74 | /** @var WeakMap */ 75 | private WeakMap $CurrentHandles; 76 | 77 | /** 78 | * Initializes a new instance of the Crimp class. 79 | * 80 | * @param callable(CurlHandle, string, T): void $Callback 81 | */ 82 | public function __construct( callable $Callback ) 83 | { 84 | $this->Callback = $Callback; 85 | $this->CurrentHandles = new WeakMap(); 86 | $this->Queue = new SplQueue(); 87 | } 88 | 89 | /** @param callable(CurlHandle, T): void $Callback */ 90 | public function SetNextUrlCallback( callable $Callback ) : void 91 | { 92 | $this->NextUrlCallback = $Callback; 93 | } 94 | 95 | /** @param T $Url */ 96 | public function Add( string|int|array|object $Url ) : void 97 | { 98 | $this->Queue->enqueue( $Url ); 99 | } 100 | 101 | /** 102 | * Runs the multi curl. 103 | */ 104 | public function Go() : void 105 | { 106 | if( isset( $this->CurlOptions[ CURLOPT_URL ] ) ) 107 | { 108 | throw new InvalidArgumentException( 'cURL options must not contain CURLOPT_URL, it is set during run time' ); 109 | } 110 | 111 | // Legacy? 112 | foreach( $this->Urls as $Url ) 113 | { 114 | $this->Queue->enqueue( $Url ); 115 | } 116 | 117 | $this->Urls = []; 118 | 119 | $Count = $this->Queue->count(); 120 | 121 | if( $Count === 0 ) 122 | { 123 | throw new InvalidArgumentException( 'No URLs to fetch' ); 124 | } 125 | 126 | $Threads = $this->Threads; 127 | 128 | if( $Threads > $Count || $Threads <= 0 ) 129 | { 130 | $Threads = $Count; 131 | } 132 | 133 | $ShareHandle = curl_share_init(); 134 | curl_share_setopt( $ShareHandle, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL ); 135 | curl_share_setopt( $ShareHandle, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS ); 136 | 137 | $MultiHandle = curl_multi_init( ); 138 | 139 | while( $Threads-- > 0 ) 140 | { 141 | // Note: If curl objects are queued, then this handle is pointless and will not be used 142 | $Handle = curl_init( ); 143 | 144 | curl_setopt( $Handle, CURLOPT_SHARE, $ShareHandle ); 145 | curl_setopt_array( $Handle, $this->CurlOptions ); 146 | 147 | $this->NextUrl( $MultiHandle, $Handle ); 148 | 149 | // Move things along while creating handles, otherwise with many threads there may be issues with SSL connections 150 | if( $Threads % 20 === 0 ) 151 | { 152 | curl_multi_exec( $MultiHandle, $Running ); 153 | } 154 | } 155 | 156 | unset( $Threads, $Count, $Url ); 157 | 158 | $Repeats = 0; 159 | 160 | do 161 | { 162 | curl_multi_exec( $MultiHandle, $Running ); // libcurl = curl_multi_perform 163 | 164 | while( $Done = curl_multi_info_read( $MultiHandle ) ) 165 | { 166 | /** @var CurlHandle $Handle */ 167 | $Handle = $Done[ 'handle' ]; 168 | $Data = curl_multi_getcontent( $Handle ); 169 | 170 | call_user_func( $this->Callback, $Handle, $Data, $this->CurrentHandles[ $Handle ] ); 171 | 172 | curl_multi_remove_handle( $MultiHandle, $Handle ); 173 | 174 | if( !$this->Queue->isEmpty() ) 175 | { 176 | $this->NextUrl( $MultiHandle, $Handle ); 177 | $Running = 1; 178 | } 179 | else 180 | { 181 | unset( $this->CurrentHandles[ $Handle ] ); 182 | } 183 | } 184 | 185 | if( $Running ) 186 | { 187 | $Descriptors = curl_multi_select( $MultiHandle, 0.1 ); // libcurl = curl_multi_wait 188 | 189 | // count number of repeated zero numfds 190 | if( $Descriptors === 0 ) 191 | { 192 | if( ++$Repeats > 1 ) 193 | { 194 | usleep( 100 ); 195 | } 196 | } 197 | else 198 | { 199 | $Repeats = 0; 200 | } 201 | } 202 | } 203 | while( $Running ); 204 | } 205 | 206 | private function NextUrl( CurlMultiHandle $MultiHandle, CurlHandle $Handle ) : void 207 | { 208 | $Obj = $this->Queue->dequeue(); 209 | $Url = null; 210 | 211 | switch( gettype( $Obj ) ) 212 | { 213 | case 'array': 214 | /** @var array{Url: string} $Obj */ 215 | $Url = $Obj[ 'Url' ]; 216 | break; 217 | 218 | case 'object': 219 | if( $Obj instanceof \CurlHandle ) 220 | { 221 | unset( $this->CurrentHandles[ $Handle ] ); 222 | 223 | $Handle = $Obj; 224 | } 225 | else 226 | { 227 | $Url = $Obj->Url; 228 | } 229 | 230 | break; 231 | 232 | default: 233 | $Url = $Obj; 234 | } 235 | 236 | if( $Url !== null ) 237 | { 238 | curl_setopt( $Handle, CURLOPT_URL, $this->UrlPrefix . $Url . $this->UrlAppend ); 239 | } 240 | 241 | if( $this->NextUrlCallback !== null ) 242 | { 243 | call_user_func( $this->NextUrlCallback, $Handle, $Obj ); 244 | } 245 | 246 | curl_multi_add_handle( $MultiHandle, $Handle ); 247 | 248 | /** @var T $Obj */ 249 | $this->CurrentHandles[ $Handle ] = $Obj; 250 | } 251 | } 252 | --------------------------------------------------------------------------------