├── .github └── FUNDING.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── composer.json └── src └── Transformer.php /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: clue 2 | custom: https://clue.engineering/support 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.4.0 (2023-04-21) 4 | 5 | * Feature: Forward compatibility with upcoming Promise v3. 6 | (#26 by @clue) 7 | 8 | * Feature: Full support for PHP 8.2 and update test environment. 9 | (#25 by @clue) 10 | 11 | * Update documentation and simplify examples by updating to new default loop. 12 | (#23 and #24 by @PaulRotmann) 13 | 14 | * Improve test suite, ensure 100% code coverage and use GitHub actions for continuous integration (CI). 15 | (#21 and #27 by @clue) 16 | 17 | ## 1.3.0 (2020-10-16) 18 | 19 | * Enhanced documentation for ReactPHP's new HTTP client. 20 | (#20 by @SimonFrings) 21 | 22 | * Improve test suite and add `.gitattributes` to exclude dev files from exports. 23 | Prepare PHP 8 support, update to PHPUnit 9 and simplify test matrix. 24 | (#16, #18 and #19 by @SimonFrings) 25 | 26 | ## 1.2.0 (2020-04-17) 27 | 28 | * Feature: Add `any()` helper to await first successful fulfillment of operations. 29 | (#15 by @clue) 30 | 31 | ```php 32 | // new: limit concurrency while awaiting first operation to complete successfully 33 | $promise = Transformer::any($input, 3, function ($data) use ($browser, $url) { 34 | return $browser->post($url, [], json_encode($data)); 35 | }); 36 | 37 | $promise->then(function (ResponseInterface $response) { 38 | echo 'First successful response: ' . $response->getBody() . PHP_EOL; 39 | }); 40 | ``` 41 | 42 | * Improve test suite to run tests on PHP 7.4 and simplify test matrix 43 | and add support / sponsorship info. 44 | (#13 and #14 by @clue) 45 | 46 | ## 1.1.0 (2018-08-13) 47 | 48 | * Feature: Add `all()` helper to await successful fulfillment of all operations. 49 | (#11 by @clue) 50 | 51 | ```php 52 | // new: limit concurrency while awaiting all operations to complete 53 | $promise = Transformer::all($input, 3, function ($data) use ($browser, $url) { 54 | return $browser->post($url, [], json_encode($data)); 55 | }); 56 | 57 | $promise->then(function ($count) { 58 | echo 'All ' . $count . ' jobs successful!' . PHP_EOL; 59 | }); 60 | ``` 61 | 62 | * Feature: Forward compatibility with stable Stream v1.0 LTS. 63 | (#10 by @clue) 64 | 65 | ## 1.0.0 (2018-05-25) 66 | 67 | * First stable release, following SemVer 68 | 69 | I'd like to thank [@geertvanbommel](https://github.com/geertvanbommel), 70 | a fellow software architect specializing in database batch processing and 71 | API development, for sponsoring the first release! 🎉 72 | Thanks to sponsors like this, who understand the importance of open source 73 | development, I can justify spending time and focus on open source development 74 | instead of traditional paid work. 75 | 76 | > Did you know that I offer custom development services and issuing invoices for 77 | sponsorships of releases and for contributions? Contact me (@clue) for details. 78 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Christian Lück 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is furnished 10 | to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clue/reactphp-flux 2 | 3 | [![CI status](https://github.com/clue/reactphp-flux/actions/workflows/ci.yml/badge.svg)](https://github.com/clue/reactphp-flux/actions) 4 | [![code coverage](https://img.shields.io/badge/code%20coverage-100%25-success)](#tests) 5 | [![installs on Packagist](https://img.shields.io/packagist/dt/clue/reactphp-flux?color=blue&label=installs%20on%20Packagist)](https://packagist.org/packages/clue/reactphp-flux) 6 | 7 | Flux, the lightweight stream processor to concurrently do many (but not too many) things at once, 8 | built on top of [ReactPHP](https://reactphp.org/). 9 | 10 | Let's say you have a large list of users or products that you want to process 11 | by individually sending a (RESTful) HTTP API request to some third party API 12 | for each record. Estimating each call to take around `0.3s` means that having 13 | `10000` users processed sequentially, you would have to wait around 50 minutes 14 | for all jobs to complete. This works perfectly fine for a small number of 15 | operations, but keeping thousands of jobs in memory at once may easily take up 16 | all resources on your side. 17 | Instead, you can use this library to stream your arbitrarily large input list 18 | as individual records to a non-blocking (async) transformation handler. It uses 19 | [ReactPHP](https://reactphp.org/) to enable you to concurrently process multiple 20 | records at once. You can control the concurrency limit, so that by allowing 21 | it to process 10 operations at the same time, you can thus process this large 22 | input list around 10 times faster and at the same time you're no longer limited 23 | how many records this list may contain (think processing millions of records). 24 | This library provides a simple API that is easy to use in order to manage any 25 | kind of async operation without having to mess with most of the low-level details. 26 | You can use this to throttle multiple HTTP requests, database queries or pretty 27 | much any API that already uses Promises. 28 | 29 | * **Async execution of operations** - 30 | Choose how many async operations should be processed at once (concurrently). 31 | Process their results as soon as responses come in. 32 | The Promise-based design provides a *sane* interface to working with out of order results. 33 | * **Standard interfaces** - 34 | Allows easy integration with existing higher-level components by implementing 35 | ReactPHP's standard [promises](#promises) and [streaming interfaces](#streaming). 36 | * **Lightweight, SOLID design** - 37 | Provides a thin abstraction that is [*just good enough*](https://en.wikipedia.org/wiki/Principle_of_good_enough) 38 | and does not get in your way. 39 | Builds on top of well-tested components and well-established concepts instead of reinventing the wheel. 40 | * **Good test coverage** - 41 | Comes with an [automated tests suite](#tests) and is regularly tested in the *real world*. 42 | 43 | **Table of contents** 44 | 45 | * [Support us](#support-us) 46 | * [Quickstart example](#quickstart-example) 47 | * [Usage](#usage) 48 | * [Transformer](#transformer) 49 | * [Promises](#promises) 50 | * [Timeout](#timeout) 51 | * [Streaming](#streaming) 52 | * [all()](#all) 53 | * [any()](#any) 54 | * [Install](#install) 55 | * [Tests](#tests) 56 | * [License](#license) 57 | * [More](#more) 58 | 59 | ## Support us 60 | 61 | We invest a lot of time developing, maintaining and updating our awesome 62 | open-source projects. You can help us sustain this high-quality of our work by 63 | [becoming a sponsor on GitHub](https://github.com/sponsors/clue). Sponsors get 64 | numerous benefits in return, see our [sponsoring page](https://github.com/sponsors/clue) 65 | for details. 66 | 67 | Let's take these projects to the next level together! 🚀 68 | 69 | ## Quickstart example 70 | 71 | Once [installed](#install), you can use the following code to process an example 72 | user lists by sending a (RESTful) HTTP API request for each user record: 73 | 74 | ```php 75 | get("https://ipapi.co/$user[ip]/country_name/")->then( 93 | function (Psr\Http\Message\ResponseInterface $response) use ($user) { 94 | // response successfully received 95 | // add country to user array and return updated user 96 | $user['country'] = (string)$response->getBody(); 97 | 98 | return $user; 99 | } 100 | ); 101 | }); 102 | 103 | // load a huge number of users to process from NDJSON file 104 | $input = new Clue\React\NDJson\Decoder( 105 | new React\Stream\ReadableResourceStream( 106 | fopen(__DIR__ . '/users.ndjson', 'r') 107 | ), 108 | true 109 | ); 110 | 111 | // process all users by piping through transformer 112 | $input->pipe($transformer); 113 | 114 | // log transformed output results 115 | $transformer->on('data', function ($user) { 116 | echo $user['name'] . ' is from ' . $user['country'] . PHP_EOL; 117 | }); 118 | $transformer->on('end', function () { 119 | echo '[DONE]' . PHP_EOL; 120 | }); 121 | $transformer->on('error', function (Exception $e) { 122 | echo 'Error: ' . $e->getMessage() . PHP_EOL; 123 | }); 124 | 125 | ``` 126 | 127 | See also the [examples](examples/). 128 | 129 | By changing the `$concurrency` parameter, you can see how processing this list 130 | without concurrency takes near `4s`, while using a concurrency setting of `5` 131 | takes near just `1s` (YMMV obviously). 132 | 133 | ## Usage 134 | 135 | ### Transformer 136 | 137 | The `Transformer` passes all input data through its transformation handler 138 | and forwards the resulting output data. 139 | 140 | It uses ReactPHP's standard [streaming interfaces](#streaming) which allow 141 | to process huge inputs without having to store everything in memory at once 142 | and instead allows you to efficiently process its input in small chunks. 143 | Any data you write to this stream will be passed through its transformation 144 | handler which is responsible for processing and transforming this data and 145 | also takes care of mangaging streaming throughput and back-pressure. 146 | 147 | The transformation handler can be any non-blocking (async) callable that uses 148 | [promises](#promises) to signal its eventual results. This callable receives 149 | a single data argument as passed to the writable side and must return a 150 | promise. A successful fulfillment value will be forwarded to the readable end 151 | of the stream, while an unsuccessful rejection value will emit an `error` 152 | event and then `close()` the stream. 153 | 154 | The `new Transformer(int $concurrency, callable $handler)` call 155 | can be used to create a new transformer instance. 156 | You can create any number of transformation streams, for example when you 157 | want to apply different transformations to different kinds of streams. 158 | 159 | The `$concurrency` parameter sets a new soft limit for the maximum number 160 | of jobs to handle concurrently. Finding a good concurrency limit depends 161 | on your particular use case. It's common to limit concurrency to a rather 162 | small value, as doing more than a dozen of things at once may easily 163 | overwhelm the receiving side. Using a `1` value will ensure that all jobs 164 | are processed one after another, effectively creating a "waterfall" of 165 | jobs. Using a value less than 1 will throw an `InvalidArgumentException`. 166 | 167 | ```php 168 | // handle up to 10 jobs concurrently 169 | $transformer = new Transformer(10, $handler); 170 | ``` 171 | 172 | ```php 173 | // handle each job after another without concurrency (waterfall) 174 | $transformer = new Transformer(1, $handler); 175 | ``` 176 | 177 | The `$handler` parameter must be a valid callable that accepts your job 178 | parameter (the data from its writable side), invokes the appropriate 179 | operation and returns a Promise as a placeholder for its future result 180 | (which will be made available on its readable side). 181 | 182 | ```php 183 | // using a Closure as handler is usually recommended 184 | $transformer = new Transformer(10, function ($url) use ($browser) { 185 | return $browser->get($url); 186 | }); 187 | ``` 188 | 189 | ```php 190 | // accepts any callable, so PHP's array notation is also supported 191 | $transformer = new Transformer(10, array($browser, 'get')); 192 | ``` 193 | 194 | *Continue with reading more about [promises](#promises).* 195 | 196 | #### Promises 197 | 198 | This library works under the assumption that you want to concurrently handle 199 | async operations that use a [Promise](https://github.com/reactphp/promise)-based API. 200 | You can use this to concurrently run multiple HTTP requests, database queries 201 | or pretty much any API that already uses Promises. 202 | 203 | For demonstration purposes, the examples in this documentation use 204 | [ReactPHP's async HTTP client](https://github.com/reactphp/http#client-usage). 205 | Its API can be used like this: 206 | 207 | ```php 208 | $browser = new React\Http\Browser(); 209 | 210 | $promise = $browser->get($url); 211 | ``` 212 | 213 | If you wrap this in a `Transformer` instance as given above, this code will look 214 | like this: 215 | 216 | ```php 217 | $browser = new React\Http\Browser(); 218 | 219 | $transformer = new Transformer(10, function ($url) use ($browser) { 220 | return $browser->get($url); 221 | }); 222 | 223 | $transformer->write($url); 224 | ``` 225 | 226 | The `$transformer` instance is a `WritableStreaminterface`, so that writing to it 227 | with `write($data)` will actually be forwarded as `$browser->get($data)` as 228 | given in the `$handler` argument (more about this in the following section about 229 | [streaming](#streaming)). 230 | 231 | Each operation is expected to be async (non-blocking), so you may actually 232 | invoke multiple handlers concurrently (send multiple requests in parallel). 233 | The `$handler` is responsible for responding to each request with a resolution 234 | value, the order is not guaranteed. 235 | These operations use a [Promise](https://github.com/reactphp/promise)-based 236 | interface that makes it easy to react to when an operation is completed (i.e. 237 | either successfully fulfilled or rejected with an error): 238 | 239 | ```php 240 | $transformer = new Transformer(10, function ($url) use ($browser) { 241 | $promise = $browser->get($url); 242 | 243 | return $promise->then( 244 | function ($response) { 245 | var_dump('Result received', $result); 246 | 247 | return json_decode($response->getBody()); 248 | }, 249 | function (Exception $e) { 250 | echo 'Error: ' . $e->getMessage() . PHP_EOL; 251 | 252 | throw $error; 253 | } 254 | ); 255 | ); 256 | ``` 257 | 258 | Each operation may take some time to complete, but due to its async nature you 259 | can actually start any number of (queued) operations. Once the concurrency limit 260 | is reached, this invocation will simply be queued and this stream will signal 261 | to the writing side that it should pause writing, thus effectively throttling 262 | the writable side (back-pressure). It will automatically start the next 263 | operation once another operation is completed and signal to the writable side 264 | that is may resume writing. This means that this is handled entirely 265 | transparently and you do not need to worry about this concurrency limit 266 | yourself. 267 | 268 | This example expects URI strings as input, sends a simple HTTP GET request 269 | and returns the JSON-decoded HTTP response body. You can transform your 270 | fulfillment value to anything that should be made available on the readable 271 | end of your stream. Similar logic may be used to filter your input stream, 272 | such as skipping certain input values or rejecting it by returning a rejected 273 | promise. Accordingly, returning a rejected promise (the equivalent of 274 | throwing an `Exception`) will result in an `error` event that tries to 275 | `cancel()` all pending operations and then `close()` the stream. 276 | 277 | #### Timeout 278 | 279 | By default, this library does not limit how long a single operation can take, 280 | so that the transformation handler may stay pending for a long time. 281 | Many use cases involve some kind of "timeout" logic so that an operation is 282 | cancelled after a certain threshold is reached. 283 | 284 | You can simply use [react/promise-timer](https://github.com/reactphp/promise-timer) 285 | which helps taking care of this through a simple API. 286 | 287 | The resulting code with timeouts applied look something like this: 288 | 289 | ```php 290 | use React\Promise\Timer; 291 | 292 | $transformer = new Transformer(10, function ($uri) use ($browser) { 293 | return Timer\timeout($browser->get($uri), 2.0); 294 | }); 295 | 296 | $transformer->write($uri); 297 | ``` 298 | 299 | The resulting stream can be consumed as usual and the above code will ensure 300 | that execution of this operation can not take longer than the given timeout 301 | (i.e. after it is actually started). 302 | 303 | Please refer to [react/promise-timer](https://github.com/reactphp/promise-timer) 304 | for more details. 305 | 306 | #### Streaming 307 | 308 | The `Transformer` implements the [`DuplexStreamInterface`](https://github.com/reactphp/stream#duplexstreaminterface) 309 | and as such allows you to write to its writable input side and to consume 310 | from its readable output side. Any data you write to this stream will be 311 | passed through its transformation handler which is responsible for processing 312 | and transforming this data (see above for more details). 313 | 314 | The `Transformer` takes care of passing data you pass on its writable side to 315 | the transformation handler argument and forwarding resulting data to it 316 | readable end. 317 | Each operation may take some time to complete, but due to its async nature you 318 | can actually start any number of (queued) operations. Once the concurrency limit 319 | is reached, this invocation will simply be queued and this stream will signal 320 | to the writing side that it should pause writing, thus effectively throttling 321 | the writable side (back-pressure). It will automatically start the next 322 | operation once another operation is completed and signal to the writable side 323 | that is may resume writing. This means that this is handled entirely 324 | transparently and you do not need to worry about this concurrency limit 325 | yourself. 326 | 327 | The following examples use an async (non-blocking) transformation handler as 328 | given above: 329 | 330 | ```php 331 | $browser = new React\Http\Browser(); 332 | 333 | $transformer = new Transformer(10, function ($url) use ($browser) { 334 | return $browser->get($url); 335 | }); 336 | ``` 337 | 338 | The `write(mixed $data): bool` method can be used to 339 | transform data through the transformation handler like this: 340 | 341 | ```php 342 | $transformer->on('data', function (ResponseInterface $response) { 343 | var_dump($response); 344 | }); 345 | 346 | $transformer->write('http://example.com/'); 347 | ``` 348 | 349 | This handler receives a single data argument as passed to the writable side 350 | and must return a promise. A successful fulfillment value will be forwarded to 351 | the readable end of the stream, while an unsuccessful rejection value will 352 | emit an `error` event, try to `cancel()` all pending operations and then 353 | `close()` the stream. 354 | 355 | Note that this class makes no assumptions about any data types. Whatever is 356 | written to it, will be processed by the transformation handler. Whatever the 357 | transformation handler yields will be forwarded to its readable end. 358 | 359 | The `end(mixed $data = null): void` method can be used to 360 | soft-close the stream once all transformation handlers are completed. 361 | It will close the writable side, wait for all outstanding transformation 362 | handlers to complete and then emit an `end` event and then `close()` the stream. 363 | You may optionally pass a (non-null) `$data` argument which will be processed 364 | just like a `write($data)` call immediately followed by an `end()` call. 365 | 366 | ```php 367 | $transformer->on('data', function (ResponseInterface $response) { 368 | var_dump($response); 369 | }); 370 | $transformer->on('end', function () { 371 | echo '[DONE]' . PHP_EOL; 372 | }); 373 | 374 | $transformer->end('http://example.com/'); 375 | ``` 376 | 377 | The `close(): void` method can be used to 378 | forcefully close the stream. It will try to `cancel()` all pending transformation 379 | handlers and then immediately close the stream and emit a `close` event. 380 | 381 | ```php 382 | $transformer->on('data', $this->expectCallableNever()); 383 | $transformer->on('close', function () { 384 | echo '[CLOSED]' . PHP_EOL; 385 | }); 386 | 387 | $transformer->write('http://example.com/'); 388 | $transformer->close(); 389 | ``` 390 | 391 | The `pipe(WritableStreamInterface $dest): WritableStreamInterface` method can be used to 392 | forward an input stream into the transformer and/or to forward the resulting 393 | output stream to another stream. 394 | 395 | ```php 396 | $source->pipe($transformer)->pipe($dest); 397 | ``` 398 | 399 | This piping context is particularly powerful because it will automatically 400 | throttle the incoming source stream and wait for the transformation handler 401 | to complete before resuming work (back-pressure). Any additional data events 402 | will be queued in-memory and resumed as appropriate. As such, it allows you 403 | to limit how many operations are processed at once. 404 | 405 | Because streams are one of the core abstractions of ReactPHP, a large number 406 | of stream implementations are available for many different use cases. For 407 | example, this allows you to use the following pseudo code to send an HTTP 408 | request for each JSON object in a compressed NDJSON file: 409 | 410 | ```php 411 | $transformer = new Transformer(10, function ($data) use ($http) { 412 | return $http->post('https://example.com/?id=' . $data['id'])->then( 413 | function ($response) use ($data) { 414 | return array('done' => $data['id']); 415 | } 416 | ); 417 | }); 418 | 419 | $source->pipe($gunzip)->pipe($ndjson)->pipe($transformer)->pipe($dest); 420 | 421 | $transformer->on('error', function (Exception $e) { 422 | echo 'Error: ' . $e->getMessage() . PHP_EOL; 423 | }); 424 | ``` 425 | 426 | Keep in mind that the transformation handler may return a rejected promise. 427 | In this case, the stream will emit an `error` event and then `close()` the 428 | stream. If you do not want the stream to end in this case, you explicitly 429 | have to handle any rejected promises and return some placeholder value 430 | instead, for example like this: 431 | 432 | ```php 433 | $uploader = new Transformer(10, function ($data) use ($http) { 434 | return $http->post('https://example.com/?id=' . $data['id'])->then( 435 | function ($response) use ($data) { 436 | return array('done' => $data['id']); 437 | }, 438 | function ($error) use ($data) { 439 | // HTTP request failed => return dummy indicator 440 | return array( 441 | 'failed' => $data['id'], 442 | 'reason' => $error->getMessage() 443 | ); 444 | } 445 | ); 446 | }); 447 | ``` 448 | 449 | #### all() 450 | 451 | The static `all(ReadableStreamInterface $input, int $concurrency, callable $handler): PromiseInterface` method can be used to 452 | concurrently process all jobs from the input stream through the given `$handler`. 453 | 454 | This is a convenience method which uses the `Transformer` internally to 455 | schedule all jobs from the input stream while limiting concurrency to 456 | ensure no more than `$concurrency` jobs ever run at once. It will return 457 | a promise which resolves with the total number of all successful jobs 458 | on success. 459 | 460 | ```php 461 | $browser = new React\Http\Browser(); 462 | 463 | $promise = Transformer::all($input, 3, function ($data) use ($browser, $url) { 464 | return $browser->post($url, [], json_encode($data)); 465 | }); 466 | 467 | $promise->then(function ($count) { 468 | echo 'All ' . $count . ' jobs successful!' . PHP_EOL; 469 | }, function (Exception $e) { 470 | echo 'Error: ' . $e->getMessage() . PHP_EOL; 471 | }); 472 | ``` 473 | 474 | If either of the jobs fail, it will reject the resulting promise, will 475 | `close()` the input stream and will try to cancel all outstanding jobs. 476 | Calling `cancel()` on the pending promise will `close()` the input stream 477 | and will try to cancel all outstanding jobs. Similarly, if the `$input` 478 | stream emits an `error` event, it will reject the resulting promise and 479 | will try to cancel all outstanding jobs. 480 | 481 | The `$input` parameter must be a `ReadableStreamInterface` which emits 482 | one `data` event for each job to process. Each element will be passed to 483 | the `$handler` to start one job. The fulfillment value for each job will 484 | be ignored, so for best performance it's recommended to not return any 485 | excessive data structures. When the stream emits an `end` or `close` 486 | event, this method will wait for all outstanding jobs to complete and 487 | then resolve with the number of successful jobs. If this stream is 488 | already closed or does not emit any `data` events, this method will 489 | resolve with a `0` value without processing any jobs. 490 | 491 | ```php 492 | $input = new ThroughStream(); 493 | 494 | $promise = Transformer::all($input, 2, $handler); 495 | 496 | $input->write('a'); 497 | $input->write('b'); 498 | $input->write('c'); 499 | $input->end(); 500 | ``` 501 | 502 | Because streams are one of the core abstractions of ReactPHP, a large number 503 | of stream implementations are available for many different use cases. For 504 | example, this allows you to use [clue/reactphp-ndjson](https://github.com/clue/reactphp-ndjson) 505 | or [clue/reactphp-csv](https://github.com/clue/reactphp-csv) to process 506 | large lists of structured input data. See also [streaming](#streaming) for 507 | more details. 508 | 509 | The `$concurrency` parameter sets a new soft limit for the maximum number 510 | of jobs to handle concurrently. Finding a good concurrency limit depends 511 | on your particular use case. It's common to limit concurrency to a rather 512 | small value, as doing more than a dozen of things at once may easily 513 | overwhelm the receiving side. Using a `1` value will ensure that all jobs 514 | are processed one after another, effectively creating a "waterfall" of 515 | jobs. Using a value less than 1 will reject with an 516 | `InvalidArgumentException` without processing any jobs. 517 | 518 | ```php 519 | // handle up to 10 jobs concurrently 520 | $promise = Transformer::all($stream, 10, $handler); 521 | ``` 522 | 523 | ```php 524 | // handle each job after another without concurrency (waterfall) 525 | $promise = Transformer::all($stream, 1, $handler); 526 | ``` 527 | 528 | The `$handler` parameter must be a valid callable that accepts your job 529 | parameter (the data from the `$input` stream), invokes the appropriate 530 | operation and returns a Promise as a placeholder for its future result. 531 | The fulfillment value for each job will be ignored, so for best 532 | performance it's recommended to not return any excessive data structures. 533 | If the given argument is not a valid callable, this method will reject 534 | with an `InvalidArgumentException` without processing any jobs. 535 | 536 | ```php 537 | // using a Closure as handler is usually recommended 538 | $promise = Transformer::all($stream, 10, function ($url) use ($browser) { 539 | return $browser->get($url); 540 | }); 541 | ``` 542 | 543 | ```php 544 | // accepts any callable, so PHP's array notation is also supported 545 | $promise = Transformer::all($stream, 10, array($browser, 'get')); 546 | ``` 547 | 548 | Note that this method returns a promise that resolves with the total 549 | number of successful operations only if all operations succeed. This 550 | is mostly a convenience method that uses the [`Transformer`](#transformer) 551 | under the hood. If your input data is small enough to fit into memory 552 | (a few dozens or hundreds of operations), you may want to use 553 | [clue/reactphp-mq](https://github.com/clue/reactphp-mq) instead and keep 554 | all operations in memory without using a streaming approach. 555 | 556 | #### any() 557 | 558 | The static `any(ReadableStreamInterface $input, int $concurrency, callable $handler): PromiseInterface` method can be used to 559 | concurrently process some jobs from the input stream through the given `$handler`. 560 | 561 | This is a convenience method which uses the `Transformer` internally to 562 | schedule the jobs from the input stream while limiting concurrency to 563 | ensure no more than `$concurrency` jobs ever run at once. It will return 564 | a promise which resolves with the first successful resolution value on 565 | success. 566 | 567 | ```php 568 | $browser = new React\Http\Browser(); 569 | 570 | $promise = Transformer::any($input, 3, function ($data) use ($browser, $url) { 571 | return $browser->post($url, [], json_encode($data)); 572 | }); 573 | 574 | $promise->then(function (ResponseInterface $response) { 575 | echo 'First successful job: ' . $response->getBody() . PHP_EOL; 576 | }, function (Exception $e) { 577 | echo 'Error: ' . $e->getMessage() . PHP_EOL; 578 | }); 579 | ``` 580 | 581 | If the first job succeeds, it will resolve the resulting promise with its 582 | resolution value, `close()` the input stream and will try to cancel all 583 | other outstanding jobs. 584 | 585 | If either of the jobs fails, it will stay in a pending state and will 586 | wait for one of the other jobs to succeed. If all jobs fail, it will 587 | reject the resulting promise. Calling `cancel()` on the pending promise 588 | will `close()` the input stream and will try to cancel all outstanding 589 | jobs. Similarly, if the `$input` stream emits an `error` event, it will 590 | reject the resulting promise and will try to cancel all outstanding jobs. 591 | 592 | The `$input` parameter must be a `ReadableStreamInterface` which emits 593 | one `data` event for each job to process. Each element will be passed to 594 | the `$handler` to start one job. The fulfillment value for the first 595 | successful job will be used to fulfill the resulting promise. When the 596 | stream emits an `end` or `close` event, this method will wait for all 597 | outstanding jobs to complete and then resolve or reject accordingly. If 598 | this stream is already closed or does not emit any `data` events, this 599 | method will reject with an `UnderflowException` without processing any 600 | jobs. 601 | 602 | ```php 603 | $input = new ThroughStream(); 604 | 605 | $promise = Transformer::any($input, 2, $handler); 606 | 607 | $input->write('a'); 608 | $input->write('b'); 609 | $input->write('c'); 610 | $input->end(); 611 | ``` 612 | 613 | Because streams are one of the core abstractions of ReactPHP, a large number 614 | of stream implementations are available for many different use cases. For 615 | example, this allows you to use [clue/reactphp-ndjson](https://github.com/clue/reactphp-ndjson) 616 | or [clue/reactphp-csv](https://github.com/clue/reactphp-csv) to process 617 | large lists of structured input data. See also [streaming](#streaming) for 618 | more details. 619 | 620 | The `$concurrency` parameter sets a new soft limit for the maximum number 621 | of jobs to handle concurrently. Finding a good concurrency limit depends 622 | on your particular use case. It's common to limit concurrency to a rather 623 | small value, as doing more than a dozen of things at once may easily 624 | overwhelm the receiving side. Using a `1` value will ensure that all jobs 625 | are processed one after another, effectively creating a "waterfall" of 626 | jobs. Using a value less than 1 will reject with an 627 | `InvalidArgumentException` without processing any jobs. 628 | 629 | ```php 630 | // handle up to 10 jobs concurrently 631 | $promise = Transformer::any($stream, 10, $handler); 632 | ``` 633 | 634 | ```php 635 | // handle each job after another without concurrency (waterfall) 636 | $promise = Transformer::any($stream, 1, $handler); 637 | ``` 638 | 639 | The `$handler` parameter must be a valid callable that accepts your job 640 | parameter (the data from the `$input` stream), invokes the appropriate 641 | operation and returns a Promise as a placeholder for its future result. 642 | The fulfillment value for the first successful job will be used to 643 | fulfill the resulting promise. If the given argument is not a valid 644 | callable, this method will reject with an `InvalidArgumentException` 645 | without processing any jobs. 646 | 647 | ```php 648 | // using a Closure as handler is usually recommended 649 | $promise = Transformer::any($stream, 10, function ($url) use ($browser) { 650 | return $browser->get($url); 651 | }); 652 | ``` 653 | 654 | ```php 655 | // accepts any callable, so PHP's array notation is also supported 656 | $promise = Transformer::any($stream, 10, array($browser, 'get')); 657 | ``` 658 | 659 | Note that this method returns a promise that resolves with the first 660 | successful resolution value only if any operation succeeds. This is 661 | mostly a convenience method that uses the [`Transformer`](#transformer) 662 | under the hood. If your input data is small enough to fit into memory 663 | (a few dozens or hundreds of operations), you may want to use 664 | [clue/reactphp-mq](https://github.com/clue/reactphp-mq) instead and keep 665 | all operations in memory without using a streaming approach. 666 | 667 | ## Install 668 | 669 | The recommended way to install this library is [through Composer](https://getcomposer.org/). 670 | [New to Composer?](https://getcomposer.org/doc/00-intro.md) 671 | 672 | This project follows [SemVer](https://semver.org/). 673 | This will install the latest supported version: 674 | 675 | ```bash 676 | composer require clue/reactphp-flux:^1.4 677 | ``` 678 | 679 | See also the [CHANGELOG](CHANGELOG.md) for details about version upgrades. 680 | 681 | This project aims to run on any platform and thus does not require any PHP 682 | extensions and supports running on legacy PHP 5.3 through current PHP 8+ and 683 | HHVM. 684 | It's *highly recommended to use the latest supported PHP version* for this project. 685 | 686 | ## Tests 687 | 688 | To run the test suite, you first need to clone this repo and then install all 689 | dependencies [through Composer](https://getcomposer.org/): 690 | 691 | ```bash 692 | composer install 693 | ``` 694 | 695 | To run the test suite, go to the project root and run: 696 | 697 | ```bash 698 | vendor/bin/phpunit 699 | ``` 700 | 701 | The test suite is set up to always ensure 100% code coverage across all 702 | supported environments. If you have the Xdebug extension installed, you can also 703 | generate a code coverage report locally like this: 704 | 705 | ```bash 706 | XDEBUG_MODE=coverage vendor/bin/phpunit --coverage-text 707 | ``` 708 | 709 | ## License 710 | 711 | This project is released under the permissive [MIT license](LICENSE). 712 | 713 | > Did you know that I offer custom development services and issuing invoices for 714 | sponsorships of releases and for contributions? Contact me (@clue) for details. 715 | 716 | ## More 717 | 718 | * If you want to learn more about processing streams of data, refer to the documentation of 719 | the underlying [react/stream](https://github.com/reactphp/stream) component. 720 | 721 | * If you only want to process a few dozens or hundreds of operations, 722 | you may want to use [clue/reactphp-mq](https://github.com/clue/reactphp-mq) 723 | instead and keep all operations in memory without using a streaming approach. 724 | 725 | * If you want to process structured NDJSON files (`.ndjson` file extension), 726 | you may want to use [clue/reactphp-ndjson](https://github.com/clue/reactphp-ndjson) 727 | on the input stream before passing the decoded stream to the transformer. 728 | 729 | * If you want to process compressed GZIP files (`.gz` file extension), 730 | you may want to use [clue/reactphp-zlib](https://github.com/clue/reactphp-zlib) 731 | on the compressed input stream before passing the decompressed stream to the 732 | decoder (such as NDJSON). 733 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "clue/reactphp-flux", 3 | "description": "Flux, the lightweight stream processor to concurrently do many (but not too many) things at once, built on top of ReactPHP.", 4 | "keywords": ["flux", "streaming", "non-blocking", "concurrency", "message queue", "ReactPHP", "async"], 5 | "homepage": "https://github.com/clue/reactphp-flux", 6 | "license": "MIT", 7 | "authors": [ 8 | { 9 | "name": "Christian Lück", 10 | "email": "christian@clue.engineering" 11 | } 12 | ], 13 | "require": { 14 | "php": ">=5.3", 15 | "react/promise": "^3 || ^2.9 || ^1.2.1", 16 | "react/stream": "^1.2" 17 | }, 18 | "require-dev": { 19 | "clue/ndjson-react": "^1.0", 20 | "phpunit/phpunit": "^9.6 || ^5.7 || ^4.8.36", 21 | "react/http": "^1.8" 22 | }, 23 | "autoload": { 24 | "psr-4": { 25 | "Clue\\React\\Flux\\": "src/" 26 | } 27 | }, 28 | "autoload-dev": { 29 | "psr-4": { 30 | "Clue\\Tests\\React\\Flux\\": "tests/" 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/Transformer.php: -------------------------------------------------------------------------------- 1 | get($url); 50 | * ``` 51 | * 52 | * If you wrap this in a `Transformer` instance as given above, this code will look 53 | * like this: 54 | * 55 | * ```php 56 | * $browser = new React\Http\Browser(); 57 | * 58 | * $transformer = new Transformer(10, function ($url) use ($browser) { 59 | * return $browser->get($url); 60 | * }); 61 | * 62 | * $transformer->write($url); 63 | * ``` 64 | * 65 | * The `$transformer` instance is a `WritableStreaminterface`, so that writing to it 66 | * with `write($data)` will actually be forwarded as `$browser->get($data)` as 67 | * given in the `$handler` argument (more about this in the following section about 68 | * [streaming](#streaming)). 69 | * 70 | * Each operation is expected to be async (non-blocking), so you may actually 71 | * invoke multiple handlers concurrently (send multiple requests in parallel). 72 | * The `$handler` is responsible for responding to each request with a resolution 73 | * value, the order is not guaranteed. 74 | * These operations use a [Promise](https://github.com/reactphp/promise)-based 75 | * interface that makes it easy to react to when an operation is completed (i.e. 76 | * either successfully fulfilled or rejected with an error): 77 | * 78 | * ```php 79 | * $transformer = new Transformer(10, function ($url) use ($browser) { 80 | * $promise = $browser->get($url); 81 | * 82 | * return $promise->then( 83 | * function ($response) { 84 | * var_dump('Result received', $result); 85 | * 86 | * return json_decode($response->getBody()); 87 | * }, 88 | * function (Exception $e) { 89 | * echo 'Error: ' . $e->getMessage() . PHP_EOL; 90 | * 91 | * throw $error; 92 | * } 93 | * ); 94 | * ); 95 | * ``` 96 | * 97 | * Each operation may take some time to complete, but due to its async nature you 98 | * can actually start any number of (queued) operations. Once the concurrency limit 99 | * is reached, this invocation will simply be queued and this stream will signal 100 | * to the writing side that it should pause writing, thus effectively throttling 101 | * the writable side (back-pressure). It will automatically start the next 102 | * operation once another operation is completed and signal to the writable side 103 | * that is may resume writing. This means that this is handled entirely 104 | * transparently and you do not need to worry about this concurrency limit 105 | * yourself. 106 | * 107 | * This example expects URI strings as input, sends a simple HTTP GET request 108 | * and returns the JSON-decoded HTTP response body. You can transform your 109 | * fulfillment value to anything that should be made available on the readable 110 | * end of your stream. Similar logic may be used to filter your input stream, 111 | * such as skipping certain input values or rejecting it by returning a rejected 112 | * promise. Accordingly, returning a rejected promise (the equivalent of 113 | * throwing an `Exception`) will result in an `error` event that tries to 114 | * `cancel()` all pending operations and then `close()` the stream. 115 | * 116 | * #### Timeout 117 | * 118 | * By default, this library does not limit how long a single operation can take, 119 | * so that the transformation handler may stay pending for a long time. 120 | * Many use cases involve some kind of "timeout" logic so that an operation is 121 | * cancelled after a certain threshold is reached. 122 | * 123 | * You can simply use [react/promise-timer](https://github.com/reactphp/promise-timer) 124 | * which helps taking care of this through a simple API. 125 | * 126 | * The resulting code with timeouts applied look something like this: 127 | * 128 | * ```php 129 | * use React\Promise\Timer; 130 | * 131 | * $transformer = new Transformer(10, function ($uri) use ($browser) { 132 | * return Timer\timeout($browser->get($uri), 2.0); 133 | * }); 134 | * 135 | * $transformer->write($uri); 136 | * ``` 137 | * 138 | * The resulting stream can be consumed as usual and the above code will ensure 139 | * that execution of this operation can not take longer than the given timeout 140 | * (i.e. after it is actually started). 141 | * 142 | * Please refer to [react/promise-timer](https://github.com/reactphp/promise-timer) 143 | * for more details. 144 | * 145 | * #### Streaming 146 | * 147 | * The `Transformer` implements the [`DuplexStreamInterface`](https://github.com/reactphp/stream#duplexstreaminterface) 148 | * and as such allows you to write to its writable input side and to consume 149 | * from its readable output side. Any data you write to this stream will be 150 | * passed through its transformation handler which is responsible for processing 151 | * and transforming this data (see above for more details). 152 | * 153 | * The `Transformer` takes care of passing data you pass on its writable side to 154 | * the transformation handler argument and forwarding resulting data to it 155 | * readable end. 156 | * Each operation may take some time to complete, but due to its async nature you 157 | * can actually start any number of (queued) operations. Once the concurrency limit 158 | * is reached, this invocation will simply be queued and this stream will signal 159 | * to the writing side that it should pause writing, thus effectively throttling 160 | * the writable side (back-pressure). It will automatically start the next 161 | * operation once another operation is completed and signal to the writable side 162 | * that is may resume writing. This means that this is handled entirely 163 | * transparently and you do not need to worry about this concurrency limit 164 | * yourself. 165 | * 166 | * The following examples use an async (non-blocking) transformation handler as 167 | * given above: 168 | * 169 | * ```php 170 | * $browser = new React\Http\Browser(); 171 | * 172 | * $transformer = new Transformer(10, function ($url) use ($browser) { 173 | * return $browser->get($url); 174 | * }); 175 | * ``` 176 | * 177 | * The `write(mixed $data): bool` method can be used to 178 | * transform data through the transformation handler like this: 179 | * 180 | * ```php 181 | * $transformer->on('data', function (ResponseInterface $response) { 182 | * var_dump($response); 183 | * }); 184 | * 185 | * $transformer->write('http://example.com/'); 186 | * ``` 187 | * 188 | * The handler receives a single data argument as passed to the writable side 189 | * and must return a promise. A successful fulfillment value will be forwarded to 190 | * the readable end of the stream, while an unsuccessful rejection value will 191 | * emit an `error` event, try to `cancel()` all pending operations and then 192 | * `close()` the stream. 193 | * 194 | * Note that this class makes no assumptions about any data types. Whatever is 195 | * written to it, will be processed by the transformation handler. Whatever the 196 | * transformation handler yields will be forwarded to its readable end. 197 | * 198 | * The `end(mixed $data = null): void` method can be used to 199 | * soft-close the stream once all transformation handlers are completed. 200 | * It will close the writable side, wait for all outstanding transformation 201 | * handlers to complete and then emit an `end` event and then `close()` the stream. 202 | * You may optionally pass a (non-null) `$data` argument which will be processed 203 | * just like a `write($data)` call immediately followed by an `end()` call. 204 | * 205 | * ```php 206 | * $transformer->on('data', function (ResponseInterface $response) { 207 | * var_dump($response); 208 | * }); 209 | * $transformer->on('end', function () { 210 | * echo '[DONE]' . PHP_EOL; 211 | * }); 212 | * 213 | * $transformer->end('http://example.com/'); 214 | * ``` 215 | * 216 | * The `close(): void` method can be used to 217 | * forcefully close the stream. It will try to `cancel()` all pending transformation 218 | * handlers and then immediately close the stream and emit a `close` event. 219 | * 220 | * ```php 221 | * $transformer->on('data', $this->expectCallableNever()); 222 | * $transformer->on('close', function () { 223 | * echo '[CLOSED]' . PHP_EOL; 224 | * }); 225 | * 226 | * $transformer->write('http://example.com/'); 227 | * $transformer->close(); 228 | * ``` 229 | * 230 | * The `pipe(WritableStreamInterface $dest): WritableStreamInterface` method can be used to 231 | * forward an input stream into the transformer and/or to forward the resulting 232 | * output stream to another stream. 233 | * 234 | * ```php 235 | * $source->pipe($transformer)->pipe($dest); 236 | * ``` 237 | * 238 | * This piping context is particularly powerful because it will automatically 239 | * throttle the incoming source stream and wait for the transformation handler 240 | * to complete before resuming work (back-pressure). Any additional data events 241 | * will be queued in-memory and resumed as appropriate. As such, it allows you 242 | * to limit how many operations are processed at once. 243 | * 244 | * Because streams are one of the core abstractions of ReactPHP, a large number 245 | * of stream implementations are available for many different use cases. For 246 | * example, this allows you to use the following pseudo code to send an HTTP 247 | * request for each JSON object in a compressed NDJSON file: 248 | * 249 | * ```php 250 | * $transformer = new Transformer(10, function ($data) use ($http) { 251 | * return $http->post('https://example.com/?id=' . $data['id'])->then( 252 | * function ($response) use ($data) { 253 | * return array('done' => $data['id']); 254 | * } 255 | * ); 256 | * }); 257 | * 258 | * $source->pipe($gunzip)->pipe($ndjson)->pipe($transformer)->pipe($dest); 259 | * 260 | * $transformer->on('error', function (Exception $e) { 261 | * echo 'Error: ' . $e->getMessage() . PHP_EOL; 262 | * }); 263 | * ``` 264 | * 265 | * Keep in mind that the transformation handler may return a rejected promise. 266 | * In this case, the stream will emit an `error` event and then `close()` the 267 | * stream. If you do not want the stream to end in this case, you explicitly 268 | * have to handle any rejected promises and return some placeholder value 269 | * instead, for example like this: 270 | * 271 | * ```php 272 | * $uploader = new Transformer(10, function ($data) use ($http) { 273 | * return $http->post('https://example.com/?id=' . $data['id'])->then( 274 | * function ($response) use ($data) { 275 | * return array('done' => $data['id']); 276 | * }, 277 | * function ($error) use ($data) { 278 | * // HTTP request failed => return dummy indicator 279 | * return array( 280 | * 'failed' => $data['id'], 281 | * 'reason' => $error->getMessage() 282 | * ); 283 | * } 284 | * ); 285 | * }); 286 | * ``` 287 | * 288 | * @see DuplexStreamInterface 289 | */ 290 | final class Transformer extends EventEmitter implements DuplexStreamInterface 291 | { 292 | private $readable = true; 293 | private $writable = true; 294 | private $closed = false; 295 | private $paused = false; 296 | private $drain = false; 297 | private $concurrency; 298 | private $callback; 299 | 300 | private $promises = array(); 301 | private $queued = array(); 302 | 303 | /** 304 | * Concurrently process all jobs from the input stream through the given `$handler`. 305 | * 306 | * This is a convenience method which uses the `Transformer` internally to 307 | * schedule all jobs from the input stream while limiting concurrency to 308 | * ensure no more than `$concurrency` jobs ever run at once. It will return 309 | * a promise which resolves with the total number of all successful jobs 310 | * on success. 311 | * 312 | * ```php 313 | * $browser = new React\Http\Browser(); 314 | * 315 | * $promise = Transformer::all($input, 3, function ($data) use ($browser, $url) { 316 | * return $browser->post($url, [], json_encode($data)); 317 | * }); 318 | * 319 | * $promise->then(function ($count) { 320 | * echo 'All ' . $count . ' jobs successful!' . PHP_EOL; 321 | * }, function (Exception $e) { 322 | * echo 'Error: ' . $e->getMessage() . PHP_EOL; 323 | * }); 324 | * ``` 325 | * 326 | * If either of the jobs fail, it will reject the resulting promise, will 327 | * `close()` the input stream and will try to cancel all outstanding jobs. 328 | * Calling `cancel()` on the pending promise will `close()` the input stream 329 | * and will try to cancel all outstanding jobs. Similarly, if the `$input` 330 | * stream emits an `error` event, it will reject the resulting promise and 331 | * will try to cancel all outstanding jobs. 332 | * 333 | * The `$input` parameter must be a `ReadableStreamInterface` which emits 334 | * one `data` event for each job to process. Each element will be passed to 335 | * the `$handler` to start one job. The fulfillment value for each job will 336 | * be ignored, so for best performance it's recommended to not return any 337 | * excessive data structures. When the stream emits an `end` or `close` 338 | * event, this method will wait for all outstanding jobs to complete and 339 | * then resolve with the number of successful jobs. If this stream is 340 | * already closed or does not emit any `data` events, this method will 341 | * resolve with a `0` value without processing any jobs. 342 | * 343 | * ```php 344 | * $input = new ThroughStream(); 345 | * 346 | * $promise = Transformer::all($input, 2, $handler); 347 | * 348 | * $input->write('a'); 349 | * $input->write('b'); 350 | * $input->write('c'); 351 | * $input->end(); 352 | * ``` 353 | * 354 | * Because streams are one of the core abstractions of ReactPHP, a large number 355 | * of stream implementations are available for many different use cases. For 356 | * example, this allows you to use [clue/reactphp-ndjson](https://github.com/clue/reactphp-ndjson) 357 | * or [clue/reactphp-csv](https://github.com/clue/reactphp-csv) to process 358 | * large lists of structured input data. See also [streaming](#streaming) for 359 | * more details. 360 | * 361 | * The `$concurrency` parameter sets a new soft limit for the maximum number 362 | * of jobs to handle concurrently. Finding a good concurrency limit depends 363 | * on your particular use case. It's common to limit concurrency to a rather 364 | * small value, as doing more than a dozen of things at once may easily 365 | * overwhelm the receiving side. Using a `1` value will ensure that all jobs 366 | * are processed one after another, effectively creating a "waterfall" of 367 | * jobs. Using a value less than 1 will reject with an 368 | * `InvalidArgumentException` without processing any jobs. 369 | * 370 | * ```php 371 | * // handle up to 10 jobs concurrently 372 | * $promise = Transformer::all($stream, 10, $handler); 373 | * ``` 374 | * 375 | * ```php 376 | * // handle each job after another without concurrency (waterfall) 377 | * $promise = Transformer::all($stream, 1, $handler); 378 | * ``` 379 | * 380 | * The `$handler` parameter must be a valid callable that accepts your job 381 | * parameter (the data from the `$input` stream), invokes the appropriate 382 | * operation and returns a Promise as a placeholder for its future result. 383 | * The fulfillment value for each job will be ignored, so for best 384 | * performance it's recommended to not return any excessive data structures. 385 | * If the given argument is not a valid callable, this method will reject 386 | * with an `InvalidArgumentException` without processing any jobs. 387 | * 388 | * ```php 389 | * // using a Closure as handler is usually recommended 390 | * $promise = Transformer::all($stream, 10, function ($url) use ($browser) { 391 | * return $browser->get($url); 392 | * }); 393 | * ``` 394 | * 395 | * ```php 396 | * // accepts any callable, so PHP's array notation is also supported 397 | * $promise = Transformer::all($stream, 10, array($browser, 'get')); 398 | * ``` 399 | * 400 | * Note that this method returns a promise that resolves with the total 401 | * number of successful operations only if all operations succeed. This 402 | * is mostly a convenience method that uses the [`Transformer`](#transformer) 403 | * under the hood. If your input data is small enough to fit into memory 404 | * (a few dozens or hundreds of operations), you may want to use 405 | * [clue/reactphp-mq](https://github.com/clue/reactphp-mq) instead and keep 406 | * all operations in memory without using a streaming approach. 407 | * 408 | * @param ReadableStreamInterface $input 409 | * @param int $concurrency 410 | * @param callable $callback 411 | * @return PromiseInterface Returns a Promise 412 | */ 413 | public static function all(ReadableStreamInterface $input, $concurrency, $callback) 414 | { 415 | if (!$input->isReadable()) { 416 | return Promise\resolve(0); 417 | } 418 | 419 | try { 420 | $stream = new self($concurrency, $callback); 421 | } catch (\InvalidArgumentException $e) { 422 | return Promise\reject($e); 423 | } 424 | 425 | $deferred = new Deferred(function ($_, $reject) use ($input, $stream) { 426 | $reject(new \RuntimeException('Transformer cancelled')); 427 | $input->close(); 428 | $stream->close(); 429 | }); 430 | 431 | // forward input data through transformer until input stream ends/closes 432 | $input->pipe($stream); 433 | $input->on('close', array($stream, 'end')); 434 | 435 | // count number of successful transformations and resolve with count on end 436 | $count = 0; 437 | $stream->on('data', function () use (&$count) { 438 | ++$count; 439 | }); 440 | $stream->on('end', function () use (&$count, $deferred) { 441 | $deferred->resolve($count); 442 | }); 443 | 444 | // input error or transformation error should reject result 445 | $input->on('error', function ($error) use ($deferred, $stream) { 446 | $deferred->reject($error); 447 | $stream->close(); 448 | }); 449 | $stream->on('error', function ($error) use ($deferred, $input) { 450 | $deferred->reject($error); 451 | $input->close(); 452 | }); 453 | 454 | return $deferred->promise(); 455 | } 456 | 457 | /** 458 | * Concurrently process some jobs from the input stream through the given `$handler`. 459 | * 460 | * This is a convenience method which uses the `Transformer` internally to 461 | * schedule the jobs from the input stream while limiting concurrency to 462 | * ensure no more than `$concurrency` jobs ever run at once. It will return 463 | * a promise which resolves with the first successful resolution value on 464 | * success. 465 | * 466 | * ```php 467 | * $browser = new React\Http\Browser(); 468 | * 469 | * $promise = Transformer::any($input, 3, function ($data) use ($browser, $url) { 470 | * return $browser->post($url, [], json_encode($data)); 471 | * }); 472 | * 473 | * $promise->then(function (ResponseInterface $response) { 474 | * echo 'First successful job: ' . $response->getBody() . PHP_EOL; 475 | * }, function (Exception $e) { 476 | * echo 'Error: ' . $e->getMessage() . PHP_EOL; 477 | * }); 478 | * ``` 479 | * 480 | * If the first job succeeds, it will resolve the resulting promise with its 481 | * resolution value, `close()` the input stream and will try to cancel all 482 | * other outstanding jobs. 483 | * 484 | * If either of the jobs fails, it will stay in a pending state and will 485 | * wait for one of the other jobs to succeed. If all jobs fail, it will 486 | * reject the resulting promise. Calling `cancel()` on the pending promise 487 | * will `close()` the input stream and will try to cancel all outstanding 488 | * jobs. Similarly, if the `$input` stream emits an `error` event, it will 489 | * reject the resulting promise and will try to cancel all outstanding jobs. 490 | * 491 | * The `$input` parameter must be a `ReadableStreamInterface` which emits 492 | * one `data` event for each job to process. Each element will be passed to 493 | * the `$handler` to start one job. The fulfillment value for the first 494 | * successful job will be used to fulfill the resulting promise. When the 495 | * stream emits an `end` or `close` event, this method will wait for all 496 | * outstanding jobs to complete and then resolve or reject accordingly. If 497 | * this stream is already closed or does not emit any `data` events, this 498 | * method will reject with an `UnderflowException` without processing any 499 | * jobs. 500 | * 501 | * ```php 502 | * $input = new ThroughStream(); 503 | * 504 | * $promise = Transformer::any($input, 2, $handler); 505 | * 506 | * $input->write('a'); 507 | * $input->write('b'); 508 | * $input->write('c'); 509 | * $input->end(); 510 | * ``` 511 | * 512 | * Because streams are one of the core abstractions of ReactPHP, a large number 513 | * of stream implementations are available for many different use cases. For 514 | * example, this allows you to use [clue/reactphp-ndjson](https://github.com/clue/reactphp-ndjson) 515 | * or [clue/reactphp-csv](https://github.com/clue/reactphp-csv) to process 516 | * large lists of structured input data. See also [streaming](#streaming) for 517 | * more details. 518 | * 519 | * The `$concurrency` parameter sets a new soft limit for the maximum number 520 | * of jobs to handle concurrently. Finding a good concurrency limit depends 521 | * on your particular use case. It's common to limit concurrency to a rather 522 | * small value, as doing more than a dozen of things at once may easily 523 | * overwhelm the receiving side. Using a `1` value will ensure that all jobs 524 | * are processed one after another, effectively creating a "waterfall" of 525 | * jobs. Using a value less than 1 will reject with an 526 | * `InvalidArgumentException` without processing any jobs. 527 | * 528 | * ```php 529 | * // handle up to 10 jobs concurrently 530 | * $promise = Transformer::any($stream, 10, $handler); 531 | * ``` 532 | * 533 | * ```php 534 | * // handle each job after another without concurrency (waterfall) 535 | * $promise = Transformer::any($stream, 1, $handler); 536 | * ``` 537 | * 538 | * The `$handler` parameter must be a valid callable that accepts your job 539 | * parameter (the data from the `$input` stream), invokes the appropriate 540 | * operation and returns a Promise as a placeholder for its future result. 541 | * The fulfillment value for the first successful job will be used to 542 | * fulfill the resulting promise. If the given argument is not a valid 543 | * callable, this method will reject with an `InvalidArgumentException` 544 | * without processing any jobs. 545 | * 546 | * ```php 547 | * // using a Closure as handler is usually recommended 548 | * $promise = Transformer::any($stream, 10, function ($url) use ($browser) { 549 | * return $browser->get($url); 550 | * }); 551 | * ``` 552 | * 553 | * ```php 554 | * // accepts any callable, so PHP's array notation is also supported 555 | * $promise = Transformer::any($stream, 10, array($browser, 'get')); 556 | * ``` 557 | * 558 | * Note that this method returns a promise that resolves with the first 559 | * successful resolution value only if any operation succeeds. This is 560 | * mostly a convenience method that uses the [`Transformer`](#transformer) 561 | * under the hood. If your input data is small enough to fit into memory 562 | * (a few dozens or hundreds of operations), you may want to use 563 | * [clue/reactphp-mq](https://github.com/clue/reactphp-mq) instead and keep 564 | * all operations in memory without using a streaming approach. 565 | * 566 | * @param ReadableStreamInterface $input 567 | * @param int $concurrency 568 | * @param callable $callback 569 | * @return PromiseInterface Returns a Promise 570 | */ 571 | public static function any(ReadableStreamInterface $input, $concurrency, $callback) 572 | { 573 | if (!$input->isReadable()) { 574 | return Promise\reject(new \UnderflowException('Input stream already closed')); 575 | } 576 | 577 | $ignore = new \stdClass(); 578 | if (is_callable($callback)) { 579 | $callback = function ($data) use ($callback, $ignore) { 580 | return $callback($data)->then(null, function ($e) use ($ignore) { 581 | // operation failed => ignore by returning ignore marker 582 | return $ignore; 583 | }); 584 | }; 585 | } 586 | 587 | try { 588 | $stream = new self($concurrency, $callback); 589 | } catch (\InvalidArgumentException $e) { 590 | return Promise\reject($e); 591 | } 592 | 593 | $deferred = new Deferred(function ($_, $reject) use ($input, $stream) { 594 | $reject(new \RuntimeException('Transformer cancelled')); 595 | $input->close(); 596 | $stream->close(); 597 | }); 598 | 599 | // forward input data through transformer until input stream ends/closes 600 | $input->pipe($stream); 601 | $input->on('close', array($stream, 'end')); 602 | 603 | // resolve promise when first successful transformation completes 604 | $stream->on('data', function ($result) use ($ignore, $deferred, $input, $stream) { 605 | if ($result !== $ignore) { 606 | $deferred->resolve($result); 607 | $input->close(); 608 | $stream->close(); 609 | } 610 | }); 611 | 612 | // reject promise when all transformations are done without any successful transformation above 613 | $stream->on('end', function () use ($deferred) { 614 | $deferred->reject(new \UnderflowException('Stream ended without any successful transformation')); 615 | }); 616 | 617 | // input error should reject result 618 | $input->on('error', function ($error) use ($deferred, $stream) { 619 | $deferred->reject($error); 620 | $stream->close(); 621 | }); 622 | 623 | return $deferred->promise(); 624 | } 625 | 626 | 627 | /** 628 | * Instantiates a new Transformer instance. 629 | * 630 | * You can create any number of transformation streams, for example when you 631 | * want to apply different transformations to different kinds of streams. 632 | * 633 | * The `$concurrency` parameter sets a new soft limit for the maximum number 634 | * of jobs to handle concurrently. Finding a good concurrency limit depends 635 | * on your particular use case. It's common to limit concurrency to a rather 636 | * small value, as doing more than a dozen of things at once may easily 637 | * overwhelm the receiving side. Using a `1` value will ensure that all jobs 638 | * are processed one after another, effectively creating a "waterfall" of 639 | * jobs. Using a value less than 1 will throw an `InvalidArgumentException`. 640 | * 641 | * ```php 642 | * // handle up to 10 jobs concurrently 643 | * $transformer = new Transformer(10, $handler); 644 | * ``` 645 | * 646 | * ```php 647 | * // handle each job after another without concurrency (waterfall) 648 | * $transformer = new Transformer(1, $handler); 649 | * ``` 650 | * 651 | * The `$handler` parameter must be a valid callable that accepts your job 652 | * parameter (the data from its writable side), invokes the appropriate 653 | * operation and returns a Promise as a placeholder for its future result 654 | * (which will be made available on its readable side). 655 | * 656 | * ```php 657 | * // using a Closure as handler is usually recommended 658 | * $transformer = new Transformer(10, function ($url) use ($browser) { 659 | * return $browser->get($url); 660 | * }); 661 | * ``` 662 | * 663 | * ```php 664 | * // accepts any callable, so PHP's array notation is also supported 665 | * $transformer = new Transformer(10, array($browser, 'get')); 666 | * ``` 667 | * 668 | * @param int $concurrency 669 | * @param callable $handler 670 | * @throws InvalidArgumentException 671 | */ 672 | public function __construct($concurrency, $handler) 673 | { 674 | if ($concurrency < 1) { 675 | throw new InvalidArgumentException('Invalid concurrency limit given'); 676 | } 677 | if (!is_callable($handler)) { 678 | throw new InvalidArgumentException('Invalid transformation handler given'); 679 | } 680 | 681 | $this->concurrency = $concurrency; 682 | $this->callback = $handler; 683 | } 684 | 685 | public function pause() 686 | { 687 | $this->paused = true; 688 | } 689 | 690 | public function resume() 691 | { 692 | if ($this->drain) { 693 | $this->drain = false; 694 | $this->emit('drain'); 695 | } 696 | $this->paused = false; 697 | } 698 | 699 | public function pipe(WritableStreamInterface $dest, array $options = array()) 700 | { 701 | return Util::pipe($this, $dest, $options); 702 | } 703 | 704 | public function isReadable() 705 | { 706 | return $this->readable; 707 | } 708 | 709 | public function isWritable() 710 | { 711 | return $this->writable; 712 | } 713 | 714 | public function write($data) 715 | { 716 | if (!$this->writable) { 717 | return false; 718 | } 719 | 720 | if (count($this->promises) >= $this->concurrency) { 721 | $this->queued[] = $data; 722 | return false; 723 | } 724 | 725 | $this->processData($data); 726 | 727 | if (!$this->writable) { 728 | return false; 729 | } 730 | 731 | // stream explicitly in paused state or pending promises still above limit 732 | if ($this->paused || count($this->promises) >= $this->concurrency) { 733 | $this->drain = true; 734 | return false; 735 | } 736 | 737 | return true; 738 | } 739 | 740 | public function end($data = null) 741 | { 742 | if (!$this->writable) { 743 | return; 744 | } 745 | 746 | $this->writable = false; 747 | $this->drain = false; 748 | 749 | if (null !== $data) { 750 | if (count($this->promises) >= $this->concurrency) { 751 | $this->queued[] = $data; 752 | } else { 753 | $this->processData($data); 754 | } 755 | } 756 | 757 | // either already closed or awaiting any pending promises 758 | if ($this->closed || $this->promises) { 759 | return; 760 | } 761 | 762 | $this->readable = false; 763 | $this->emit('end'); 764 | $this->close(); 765 | } 766 | 767 | public function close() 768 | { 769 | if ($this->closed) { 770 | return; 771 | } 772 | 773 | $this->readable = false; 774 | $this->writable = false; 775 | $this->closed = true; 776 | $this->drain = false; 777 | $this->callback = null; 778 | $this->queued = array(); 779 | 780 | foreach ($this->promises as $promise) { 781 | if ($promise instanceof PromiseInterface && \method_exists($promise, 'cancel')) { 782 | $promise->cancel(); 783 | } 784 | } 785 | $this->promises = array(); 786 | 787 | $this->emit('close'); 788 | $this->removeAllListeners(); 789 | } 790 | 791 | private function processData($data) 792 | { 793 | $handler = $this->callback; 794 | $this->promises[] = $promise = $handler($data); 795 | end($this->promises); 796 | $id = key($this->promises); 797 | 798 | $that = $this; 799 | $promise->then( 800 | function ($result) use ($that, $id) { 801 | $that->handleResult($result, $id); 802 | }, 803 | function ($error) use ($that, $id) { 804 | $that->handleError( 805 | new \RuntimeException('Handler rejected', 0, $error), 806 | $id 807 | ); 808 | } 809 | ); 810 | } 811 | 812 | /** @internal */ 813 | public function handleResult($result, $id) 814 | { 815 | if ($this->closed) { 816 | return; 817 | } 818 | 819 | unset($this->promises[$id]); 820 | $this->emit('data', array($result)); 821 | 822 | // process next queued item if still below concurrency limit 823 | if (count($this->promises) < $this->concurrency && $this->queued) { 824 | $data = array_shift($this->queued); 825 | $this->processData($data); 826 | return; 827 | } 828 | 829 | // end and close stream if this is the final end write 830 | if (!$this->writable && !$this->promises) { 831 | $this->readable = false; 832 | $this->emit('end'); 833 | $this->close(); 834 | return; 835 | } 836 | 837 | // nothing left to do? signal source stream to continue writing to this stream 838 | if ($this->writable && $this->drain) { 839 | $this->drain = false; 840 | $this->emit('drain'); 841 | } 842 | } 843 | 844 | /** @internal */ 845 | public function handleError(\Exception $e, $id) 846 | { 847 | if ($this->closed) { 848 | return; 849 | } 850 | 851 | unset($this->promises[$id]); 852 | $this->emit('error', array($e)); 853 | $this->close(); 854 | } 855 | } 856 | --------------------------------------------------------------------------------