├── .gitignore
├── composer.json
├── examples
├── arrays.php
├── queue_from_callback.php
├── prefix_and_append.php
├── simple_urls.php
├── objects.php
└── next_url_callback.php
├── LICENSE
├── README.md
└── Crimp.php
/.gitignore:
--------------------------------------------------------------------------------
1 | /vendor/
2 | /composer.lock
3 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "xpaw/crimp",
3 | "description": "A simple multi curl implementation, optimized for high concurrency.",
4 | "license": "MIT",
5 | "homepage": "https://github.com/xPaw/Crimp.php",
6 | "keywords":
7 | [
8 | "curl",
9 | "async",
10 | "asynchronous",
11 | "multi",
12 | "parallel",
13 | "rolling",
14 | "guzzle"
15 | ],
16 | "require":
17 | {
18 | "php": ">=8.1",
19 | "ext-curl": "*"
20 | },
21 | "require-dev":
22 | {
23 | "phpstan/phpstan": "^2.1"
24 | },
25 | "scripts":
26 | {
27 | "test": "phpstan analyse Crimp.php examples --level max"
28 | },
29 | "autoload":
30 | {
31 | "files":
32 | [
33 | "Crimp.php"
34 | ]
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/examples/arrays.php:
--------------------------------------------------------------------------------
1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
7 |
8 | // When queueing arrays, they must contain a `Url` key
9 | $Crimp->Add( [ 'Url' => 'https://example.com' ] );
10 |
11 | $Crimp->Go();
12 |
13 | /** @param array{Url: string} $Request */
14 | function ArrayCallback( CurlHandle $Handle, string $Data, array $Request ) : void
15 | {
16 | preg_match( '/
]*>(.*?)<\/title>/', $Data, $Title );
17 |
18 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME );
19 |
20 | printf(
21 | "%.4f | %-30s | %s (original: %s)\n",
22 | $Time,
23 | substr( $Title[ 1 ] ?? '', 0, 30 ),
24 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ),
25 | $Request[ 'Url' ]
26 | );
27 | }
28 |
--------------------------------------------------------------------------------
/examples/queue_from_callback.php:
--------------------------------------------------------------------------------
1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
7 |
8 | $Crimp->Add( [
9 | 'Url' => 'https://example.com/?v=1',
10 | 'Count' => 1,
11 | ] );
12 |
13 | $Crimp->Go();
14 |
15 | // $Url here will be the original array that was passed to Add()
16 | /** @param array{Url: string, Count: int} $OriginalArray */
17 | function QueueCallback( CurlHandle $Handle, string $Data, array $OriginalArray ) : void
18 | {
19 | /** @var Crimp $Crimp */
20 | global $Crimp;
21 |
22 | echo curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ) . PHP_EOL;
23 |
24 | // Queue again
25 | if( $OriginalArray[ 'Count' ] < 4 )
26 | {
27 | $NewCount = $OriginalArray[ 'Count' ] + 1;
28 |
29 | $Crimp->Add( [
30 | 'Url' => 'https://example.com/?v=' . $NewCount,
31 | 'Count' => $NewCount,
32 | ] );
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/examples/prefix_and_append.php:
--------------------------------------------------------------------------------
1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
7 |
8 | // String to prepend to all URLs
9 | $Crimp->UrlPrefix = 'https://example.com/?v=';
10 |
11 | // String to append to all URLs
12 | $Crimp->UrlAppend = '&another=value';
13 |
14 | // These two properties allow saving memory when queueing many requests
15 | $Crimp->Add( '1' );
16 | $Crimp->Add( '2' );
17 | $Crimp->Add( '3' );
18 | $Crimp->Add( '4' );
19 |
20 | $Crimp->Go();
21 |
22 | function PrefixAppendCallback( CurlHandle $Handle, string $Data ) : void
23 | {
24 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title );
25 |
26 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME );
27 |
28 | printf(
29 | "%.4f | %-30s | %s\n",
30 | $Time,
31 | substr( $Title[ 1 ] ?? '', 0, 30 ),
32 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL )
33 | );
34 | }
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Pavel Djundik
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/examples/simple_urls.php:
--------------------------------------------------------------------------------
1 | CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
10 |
11 | $Crimp->Urls =
12 | [
13 | 'https://cloudflare.com',
14 | 'https://news.ycombinator.com',
15 | 'https://www.google.com',
16 | 'https://www.yahoo.com',
17 | ];
18 |
19 | $Crimp->Go();
20 |
21 | $FinalTime = microtime( true ) - $StartTime;
22 |
23 | printf( "\nExecution time: %.4f\n", $FinalTime );
24 | printf( "Total cURL time: %.4f\n", $TotalTime );
25 |
26 | // $Url here will be the original string that was passed to $Crimp->Urls
27 | function SimpleUrlCallback( CurlHandle $Handle, string $Data, string $Url ) : void
28 | {
29 | /** @var float $TotalTime */
30 | global $TotalTime;
31 |
32 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title );
33 |
34 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME );
35 | $TotalTime += $Time;
36 |
37 | printf(
38 | "%.4f | %-30s | %s (original: %s)\n",
39 | $Time,
40 | substr( $Title[ 1 ] ?? '', 0, 30 ),
41 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ),
42 | $Url
43 | );
44 | }
45 |
--------------------------------------------------------------------------------
/examples/objects.php:
--------------------------------------------------------------------------------
1 | Url = $url;
14 | }
15 | }
16 |
17 | $Crimp = new Crimp( 'ObjectCallback' );
18 | $Crimp->CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
19 |
20 | $Crimp->Add( new RequestUrl( 'https://cloudflare.com' ) );
21 | $Crimp->Add( new RequestUrl( 'https://news.ycombinator.com' ) );
22 | $Crimp->Add( new RequestUrl( 'https://www.google.com' ) );
23 | $Crimp->Add( new RequestUrl( 'https://www.yahoo.com' ) );
24 |
25 | $Crimp->Go();
26 |
27 | // $Url here will be the RequestUrl object that was queued
28 | function ObjectCallback( CurlHandle $Handle, string $Data, RequestUrl $Request ) : void
29 | {
30 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title );
31 |
32 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME );
33 |
34 | printf(
35 | "%.4f | %-30s | %s (original: %s)\n",
36 | $Time,
37 | substr( $Title[ 1 ] ?? '', 0, 30 ),
38 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL ),
39 | $Request->Url
40 | );
41 | }
42 |
--------------------------------------------------------------------------------
/examples/next_url_callback.php:
--------------------------------------------------------------------------------
1 | NextUrlCallback = 'NextUrlCallback';
9 |
10 | $Crimp->Add( '1' );
11 | $Crimp->Add( '2' );
12 | $Crimp->Add( '3' );
13 | $Crimp->Add( '4' );
14 |
15 | $Crimp->Go();
16 |
17 | function NextUrlExampleCallback( CurlHandle $Handle, string $Data, string $Request ) : void
18 | {
19 | preg_match( '/]*>(.*?)<\/title>/', $Data, $Title );
20 |
21 | $Time = curl_getinfo( $Handle, CURLINFO_TOTAL_TIME );
22 |
23 | printf(
24 | "%.4f | %-30s | %s\n",
25 | $Time,
26 | substr( $Title[ 1 ] ?? '', 0, 30 ),
27 | curl_getinfo( $Handle, CURLINFO_EFFECTIVE_URL )
28 | );
29 | }
30 |
31 | function NextUrlCallback( CurlHandle $Handle, string $Request ) : void
32 | {
33 | // Use this callback to set any unique options for the request, for example when rotating proxies
34 | // Do keep in mind that because cURL handles are rotated, if one option is set,
35 | // it must be (re)set for every request
36 |
37 | if( $Request === '3' )
38 | {
39 | curl_setopt( $Handle, CURLOPT_URL, 'https://example.com/test/path' );
40 | }
41 | else
42 | {
43 | curl_setopt( $Handle, CURLOPT_URL, 'https://example.com/?v=' . $Request );
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Crimp [](https://packagist.org/packages/xpaw/crimp)
2 |
3 | A simple multi curl implementation, optimized for high concurrency.
4 |
5 | This is practically a bare bones implemention.
6 | Retrying, HTTP code checking and other stuff is up to the user.
7 |
8 | Usage:
9 |
10 | ```php
11 | $Crimp = new Crimp( function( CurlHandle $Handle, string $Data, $Request ) : void
12 | {
13 | // $Handle is the cURL handle
14 | // $Data is the content of a cURL handle
15 | // $Request is whatever was queued
16 | } );
17 |
18 | // How many concurrent threads to use
19 | $Crimp->Threads = 10;
20 |
21 | // Set any curl option that are needed
22 | $Crimp->CurlOptions[ CURLOPT_FOLLOWLOCATION ] = 1;
23 |
24 | // Queue urls
25 | $Crimp->Add( 'https://example.com/?v=1' );
26 | $Crimp->Add( 'https://example.com/?v=2' );
27 |
28 | // Queue an array, it must contain a `Url` key
29 | $Crimp->Add( [ 'Url' => 'https://example.com/?v=3' ] );
30 |
31 | // Queue an object, it must contain a `Url` property
32 | class RequestUrl { public string $Url; }
33 | $request = new RequestUrl();
34 | $request->Url = 'https://example.com/?v=4';
35 | $Crimp->Add( $request );
36 |
37 | // Execute the requests
38 | $Crimp->Go();
39 | ```
40 |
41 | `CURLOPT_RETURNTRANSFER` is enabled by default. See [examples](examples/) folder for more.
42 |
43 | If you need a fully featured multi cURL implemention, take a look at
44 | [Zebra_cURL](https://github.com/stefangabos/Zebra_cURL) or [Guzzle](https://github.com/guzzle/guzzle) instead.
45 |
--------------------------------------------------------------------------------
/Crimp.php:
--------------------------------------------------------------------------------
1 | Links to fetch.
38 | *
39 | * Any value in this array will be queued when `Go()` is called.
40 | * This array is a left over for simplicity. Consider using `Add()` method.
41 | */
42 | public array $Urls = [];
43 |
44 | /**
45 | * @var callable(CurlHandle, string, T): void Callback to be called with the data of executed request
46 | *
47 | * Callback to be called with the data of executed request.
48 | */
49 | public $Callback;
50 |
51 | /**
52 | * @var null|callable(CurlHandle, T): void Callback to be called on every executed url
53 | *
54 | * Callback to be called on every executed url.
55 | */
56 | public $NextUrlCallback;
57 |
58 | /**
59 | * @var array cURL options to be set on each handle
60 | *
61 | * @see https://php.net/curl_setopt
62 | */
63 | public array $CurlOptions =
64 | [
65 | CURLOPT_RETURNTRANSFER => 1, // Always return a string instead of directly outputting.
66 | CURLOPT_ENCODING => '', // Empty string tells cURL to send a header containing all supported encoding types.
67 | CURLOPT_TIMEOUT => 30,
68 | CURLOPT_CONNECTTIMEOUT => 10,
69 | ];
70 |
71 | /** @var SplQueue */
72 | private readonly SplQueue $Queue;
73 |
74 | /** @var WeakMap */
75 | private WeakMap $CurrentHandles;
76 |
77 | /**
78 | * Initializes a new instance of the Crimp class.
79 | *
80 | * @param callable(CurlHandle, string, T): void $Callback
81 | */
82 | public function __construct( callable $Callback )
83 | {
84 | $this->Callback = $Callback;
85 | $this->CurrentHandles = new WeakMap();
86 | $this->Queue = new SplQueue();
87 | }
88 |
89 | /** @param callable(CurlHandle, T): void $Callback */
90 | public function SetNextUrlCallback( callable $Callback ) : void
91 | {
92 | $this->NextUrlCallback = $Callback;
93 | }
94 |
95 | /** @param T $Url */
96 | public function Add( string|int|array|object $Url ) : void
97 | {
98 | $this->Queue->enqueue( $Url );
99 | }
100 |
101 | /**
102 | * Runs the multi curl.
103 | */
104 | public function Go() : void
105 | {
106 | if( isset( $this->CurlOptions[ CURLOPT_URL ] ) )
107 | {
108 | throw new InvalidArgumentException( 'cURL options must not contain CURLOPT_URL, it is set during run time' );
109 | }
110 |
111 | // Legacy?
112 | foreach( $this->Urls as $Url )
113 | {
114 | $this->Queue->enqueue( $Url );
115 | }
116 |
117 | $this->Urls = [];
118 |
119 | $Count = $this->Queue->count();
120 |
121 | if( $Count === 0 )
122 | {
123 | throw new InvalidArgumentException( 'No URLs to fetch' );
124 | }
125 |
126 | $Threads = $this->Threads;
127 |
128 | if( $Threads > $Count || $Threads <= 0 )
129 | {
130 | $Threads = $Count;
131 | }
132 |
133 | $ShareHandle = curl_share_init();
134 | curl_share_setopt( $ShareHandle, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL );
135 | curl_share_setopt( $ShareHandle, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS );
136 |
137 | $MultiHandle = curl_multi_init( );
138 |
139 | while( $Threads-- > 0 )
140 | {
141 | // Note: If curl objects are queued, then this handle is pointless and will not be used
142 | $Handle = curl_init( );
143 |
144 | curl_setopt( $Handle, CURLOPT_SHARE, $ShareHandle );
145 | curl_setopt_array( $Handle, $this->CurlOptions );
146 |
147 | $this->NextUrl( $MultiHandle, $Handle );
148 |
149 | // Move things along while creating handles, otherwise with many threads there may be issues with SSL connections
150 | if( $Threads % 20 === 0 )
151 | {
152 | curl_multi_exec( $MultiHandle, $Running );
153 | }
154 | }
155 |
156 | unset( $Threads, $Count, $Url );
157 |
158 | $Repeats = 0;
159 |
160 | do
161 | {
162 | curl_multi_exec( $MultiHandle, $Running ); // libcurl = curl_multi_perform
163 |
164 | while( $Done = curl_multi_info_read( $MultiHandle ) )
165 | {
166 | /** @var CurlHandle $Handle */
167 | $Handle = $Done[ 'handle' ];
168 | $Data = curl_multi_getcontent( $Handle );
169 |
170 | call_user_func( $this->Callback, $Handle, $Data, $this->CurrentHandles[ $Handle ] );
171 |
172 | curl_multi_remove_handle( $MultiHandle, $Handle );
173 |
174 | if( !$this->Queue->isEmpty() )
175 | {
176 | $this->NextUrl( $MultiHandle, $Handle );
177 | $Running = 1;
178 | }
179 | else
180 | {
181 | unset( $this->CurrentHandles[ $Handle ] );
182 | }
183 | }
184 |
185 | if( $Running )
186 | {
187 | $Descriptors = curl_multi_select( $MultiHandle, 0.1 ); // libcurl = curl_multi_wait
188 |
189 | // count number of repeated zero numfds
190 | if( $Descriptors === 0 )
191 | {
192 | if( ++$Repeats > 1 )
193 | {
194 | usleep( 100 );
195 | }
196 | }
197 | else
198 | {
199 | $Repeats = 0;
200 | }
201 | }
202 | }
203 | while( $Running );
204 | }
205 |
206 | private function NextUrl( CurlMultiHandle $MultiHandle, CurlHandle $Handle ) : void
207 | {
208 | $Obj = $this->Queue->dequeue();
209 | $Url = null;
210 |
211 | switch( gettype( $Obj ) )
212 | {
213 | case 'array':
214 | /** @var array{Url: string} $Obj */
215 | $Url = $Obj[ 'Url' ];
216 | break;
217 |
218 | case 'object':
219 | if( $Obj instanceof \CurlHandle )
220 | {
221 | unset( $this->CurrentHandles[ $Handle ] );
222 |
223 | $Handle = $Obj;
224 | }
225 | else
226 | {
227 | $Url = $Obj->Url;
228 | }
229 |
230 | break;
231 |
232 | default:
233 | $Url = $Obj;
234 | }
235 |
236 | if( $Url !== null )
237 | {
238 | curl_setopt( $Handle, CURLOPT_URL, $this->UrlPrefix . $Url . $this->UrlAppend );
239 | }
240 |
241 | if( $this->NextUrlCallback !== null )
242 | {
243 | call_user_func( $this->NextUrlCallback, $Handle, $Obj );
244 | }
245 |
246 | curl_multi_add_handle( $MultiHandle, $Handle );
247 |
248 | /** @var T $Obj */
249 | $this->CurrentHandles[ $Handle ] = $Obj;
250 | }
251 | }
252 |
--------------------------------------------------------------------------------