├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .php-cs-fixer.dist.php ├── LICENSE ├── README.md ├── composer.json ├── doc ├── advanced_usage.md ├── getting-started.md └── recipes.md ├── phpstan.dist.neon ├── phpunit.xml.dist ├── src ├── EtlConfiguration.php ├── EtlExecutor.php ├── EtlState.php ├── EventDispatcher │ ├── Event │ │ ├── BeforeLoadEvent.php │ │ ├── EndEvent.php │ │ ├── Event.php │ │ ├── ExtractEvent.php │ │ ├── ExtractExceptionEvent.php │ │ ├── FlushEvent.php │ │ ├── FlushExceptionEvent.php │ │ ├── InitEvent.php │ │ ├── LoadEvent.php │ │ ├── LoadExceptionEvent.php │ │ ├── StartEvent.php │ │ ├── TransformEvent.php │ │ └── TransformExceptionEvent.php │ ├── EventDispatcher.php │ ├── PrioritizedListenerProvider.php │ └── StoppableEventTrait.php ├── Exception │ ├── EtlException.php │ ├── ExtractException.php │ ├── FlushException.php │ ├── LoadException.php │ ├── SkipRequest.php │ ├── StopRequest.php │ └── TransformException.php ├── Extractor │ ├── CSVExtractor.php │ ├── CallableExtractor.php │ ├── ChainExtractor.php │ ├── ExtractorInterface.php │ ├── FileExtractor.php │ ├── IterableExtractor.php │ ├── IterableExtractorInterface.php │ ├── JSONExtractor.php │ ├── ReactStreamExtractor.php │ ├── STDINExtractor.php │ └── TextLinesExtractor.php ├── Internal │ ├── ClonableTrait.php │ ├── ConditionalLoaderTrait.php │ ├── DispatchEventsTrait.php │ ├── EtlBuilderTrait.php │ ├── EtlEventListenersTrait.php │ ├── StateHolder.php │ └── TransformResult.php ├── Iterator │ ├── CSVIterator.php │ ├── ConsumableIterator.php │ ├── FileIterator.php │ ├── IteratorStream.php │ ├── PregSplitIterator.php │ └── StrTokIterator.php ├── Loader │ ├── CSVLoader.php │ ├── CallableLoader.php │ ├── ChainLoader.php │ ├── ConditionalLoaderInterface.php │ ├── DoctrineORMLoader.php │ ├── InMemoryLoader.php │ ├── JSONLoader.php │ ├── LoaderInterface.php │ └── STDOUTLoader.php ├── Normalizer │ ├── EmptyStringToNullNormalizer.php │ ├── NumericStringToNumberNormalizer.php │ └── ValueNormalizerInterface.php ├── Processor │ ├── IterableProcessor.php │ ├── ProcessorInterface.php │ └── ReactStreamProcessor.php ├── Recipe │ ├── FilterRecipe.php │ ├── FilterRecipeMode.php │ ├── LoggerRecipe.php │ └── Recipe.php ├── Transformer │ ├── CallableTransformer.php │ ├── ChainTransformer.php │ ├── NullTransformer.php │ └── TransformerInterface.php └── functions.php └── tests ├── Behavior ├── Events │ ├── BeforeLoadEventTest.php │ ├── EndEventTest.php │ ├── ExtractEventTest.php │ ├── ExtractExceptionEventTest.php │ ├── FlushEventTest.php │ ├── FlushExceptionEventTest.php │ ├── InitEventTest.php │ ├── LoadEventTest.php │ ├── LoadExceptionEventTest.php │ ├── StartEventTest.php │ ├── TransformEventTest.php │ └── TransformExceptionEventTest.php ├── ExtractExceptionTest.php ├── FlushExceptionTest.php ├── FlushTest.php ├── LoadExceptionTest.php ├── NextTickTest.php ├── ReactStreamProcessorTest.php ├── SkipTest.php ├── StopTest.php └── TransformExceptionTest.php ├── Data ├── 10-biggest-cities.csv ├── 10-biggest-cities.json └── 10-biggest-cities.php ├── Stubs ├── InMemoryLoader.php ├── STDINStub.php ├── STDOUTStub.php └── WritableStreamStub.php └── Unit ├── ContextTest.php ├── EtlConfigurationTest.php ├── EtlExecutorTest.php ├── EventDispatcher └── EventDispatcherTest.php ├── Extractor ├── CSVExtractorTest.php ├── CallableExtractorTest.php ├── ChainExtractorTest.php ├── IterableExtractorTest.php ├── JSONExtractorTest.php ├── ReactStreamExtractorTest.php ├── STDINExtractorTest.php └── TextLinesExtractorTest.php ├── FunctionsTest.php ├── Iterator ├── CSVIteratorTest.php ├── IteratorStreamTest.php ├── PregSplitIteratorTest.php └── StrTokIteratorTest.php ├── Loader ├── CSVLoaderTest.php ├── CallableLoaderTest.php ├── ChainLoaderTest.php ├── Doctrine │ └── Book.php ├── DoctrineORMLoaderTest.php ├── JSONLoaderTest.php └── STDOUTLoaderTest.php ├── Normalizer ├── EmptyStringToNullNormalizerTest.php └── NumericStringToNumberNormalizerTest.php ├── Recipe ├── FilterRecipeTest.php ├── LoggerRecipeTest.php └── RecipeTest.php └── Transformer ├── CallableTransformerTest.php ├── ChainTransformerTest.php └── NullTransformerTest.php /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI Workflow 2 | on: 3 | push: 4 | branches: [ master, '4.0' ] 5 | pull_request: 6 | 7 | jobs: 8 | tests: 9 | runs-on: ubuntu-22.04 10 | strategy: 11 | matrix: 12 | php: 13 | - 8.2 14 | - 8.3 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Setup PHP 19 | uses: shivammathur/setup-php@v2 20 | with: 21 | php-version: ${{ matrix.php }} 22 | 23 | - name: Install dependencies 24 | run: composer install --prefer-dist --no-progress 25 | 26 | - name: Check types 27 | run: vendor/bin/phpstan analyse 28 | 29 | - name: Run test suite 30 | run: vendor/bin/pest --coverage --coverage-clover=coverage.xml 31 | 32 | - name: Upload coverage to Codecov 33 | uses: codecov/codecov-action@v1 34 | with: 35 | token: ${{ secrets.CODECOV_TOKEN }} 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | composer.lock 3 | .php-cs-fixer.cache 4 | -------------------------------------------------------------------------------- /.php-cs-fixer.dist.php: -------------------------------------------------------------------------------- 1 | in(__DIR__) 5 | ; 6 | 7 | return (new PhpCsFixer\Config()) 8 | ->setRules([ 9 | '@Symfony' => true, 10 | 'global_namespace_import' => [ 11 | 'import_functions' => true, 12 | 'import_constants' => true, 13 | ], 14 | ]) 15 | ->setFinder($finder) 16 | ; 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2023 Beno!t POLASZEK 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Latest Stable Version](https://poser.pugx.org/bentools/etl/v/stable)](https://packagist.org/packages/bentools/etl) 2 | [![License](https://poser.pugx.org/bentools/etl/license)](https://packagist.org/packages/bentools/etl) 3 | [![CI Workflow](https://github.com/bpolaszek/bentools-etl/actions/workflows/ci.yml/badge.svg)](https://github.com/bpolaszek/bentools-etl/actions/workflows/ci.yml) 4 | [![Coverage](https://codecov.io/gh/bpolaszek/bentools-etl/branch/master/graph/badge.svg?token=L5ulTaymbt)](https://codecov.io/gh/bpolaszek/bentools-etl) 5 | [![Total Downloads](https://poser.pugx.org/bentools/etl/downloads)](https://packagist.org/packages/bentools/etl) 6 | 7 | Okay, so you heard about the [Extract / Transform / Load](https://en.wikipedia.org/wiki/Extract,_transform,_load) pattern, 8 | and you're looking for a PHP library to do the stuff. Alright, let's go! 9 | 10 | `bentools/etl` is a versatile PHP library for implementing the Extract, Transform, Load (ETL) pattern, designed to streamline data processing tasks. 11 | 12 | Table of Contents 13 | ----------------- 14 | 15 | - [Concepts](#concepts) 16 | - [Installation](#installation) 17 | - [Getting started](#usage) 18 | - [The EtlState object](doc/getting-started.md#the-etlstate-object) 19 | - [Skipping items](doc/getting-started.md#skipping-items) 20 | - [Stopping the workflow](doc/getting-started.md#stopping-the-workflow) 21 | - [Using events](doc/getting-started.md#using-events) 22 | - [Flush frequency and early flushes](doc/getting-started.md#flush-frequency-and-early-flushes) 23 | - [Advanced Usage](doc/advanced_usage.md) 24 | - [Creating your own Extractor / Transformers / Loaders](doc/advanced_usage.md#creating-your-own-extractor--transformers--loaders) 25 | - [Difference between yield and return in transformers](doc/advanced_usage.md#difference-between-yield-and-return-in-transformers) 26 | - [Next tick](doc/advanced_usage.md#next-tick) 27 | - [Chaining extractors / transformers / loaders](doc/advanced_usage.md#chaining-extractors--transformers--loaders) 28 | - [Reading from STDIN / Writing to STDOUT](doc/advanced_usage.md#reading-from-stdin--writing-to-stdout) 29 | - [Instantiators](doc/advanced_usage.md#instantiators) 30 | - [Using ReactPHP](doc/advanced_usage.md#using-reactphp-experimental) 31 | - [Recipes](doc/recipes.md) 32 | - [Contributing](#contribute) 33 | - [License](#license) 34 | 35 | Concepts 36 | -------- 37 | 38 | Let's cover the basic concepts: 39 | - **Extract**: you have a source of data (a database, a CSV file, whatever) - an **extractor** is able to read that data and provide an iterator of items 40 | - **Transform**: apply transformation to each item. A **transformer** may generate 0, 1 or several items to **load** (for example, 1 item may generate multiple SQL queries) 41 | - **Load**: load transformed item to the destination. For example, **extracted items** have been **transformed** to SQL queries, and your **loader** will run those queries against your database. 42 | 43 | Installation 44 | ------------ 45 | 46 | ```bash 47 | composer require bentools/etl 48 | ``` 49 | 50 | > [!WARNING] 51 | > Current version (4.0) is a complete redesign and introduces significant BC (backward compatibility) breaks. 52 | > Avoid upgrading from `^2.0` or `^3.0` unless you're fully aware of the changes. 53 | 54 | Usage 55 | ----- 56 | 57 | Now let's have a look on how simple it is: 58 | 59 | ```php 60 | use BenTools\ETL\EtlExecutor; 61 | 62 | // Given 63 | $singers = ['Bob Marley', 'Amy Winehouse']; 64 | 65 | // Transform each singer's name to uppercase and process the array 66 | $etl = (new EtlExecutor()) 67 | ->transformWith(fn (string $name) => strtoupper($name)); 68 | 69 | // When 70 | $report = $etl->process($singers); 71 | 72 | // Then 73 | var_dump($report->output); // ["BOB MARLEY", "AMY WINEHOUSE"] 74 | ``` 75 | 76 | OK, that wasn't really hard, here we basically don't have to _extract_ anything (we can already iterate on `$singers`), 77 | and we're not _loading_ anywhere, except into PHP's memory. 78 | 79 | You may ask, "why don't you just `array_map('strtoupper', $singers)` ?" and you're totally right. 80 | 81 | But sometimes, extracting, transforming and / or loading get a little more complex. 82 | You may want to extract from a file, a crawled content on the web, perform one to many transformations, maybe skip some items, 83 | or reuse some extraction, transformation or loading logic. 84 | 85 | Here's another example of what you can do: 86 | 87 | ```php 88 | use BenTools\ETL\EventDispatcher\Event\TransformEvent; 89 | use BenTools\ETL\Loader\JSONLoader; 90 | 91 | use function BenTools\ETL\extractFrom; 92 | 93 | $executor = extractFrom(function () { 94 | yield ['firstName' => 'Barack', 'lastName' => 'Obama']; 95 | yield ['firstName' => 'Donald', 'lastName' => 'Trump']; 96 | yield ['firstName' => 'Joe', 'lastName' => 'Biden']; 97 | }) 98 | ->transformWith(fn (array $item) => implode(' ', array_values($item))) 99 | ->loadInto(new JSONLoader()) 100 | ->onTransform(function (TransformEvent $event) { 101 | if ('Donald Trump' === $event->transformResult->value) { 102 | $event->state->skip(); 103 | } 104 | }); 105 | 106 | $report = $executor->process(); 107 | 108 | dump($report->output); // string '["Barack Obama", "Joe Biden"]' 109 | ``` 110 | 111 | Or: 112 | 113 | ```php 114 | $report = $executor->process(destination: 'file:///tmp/presidents.json'); 115 | var_dump($report->output); // string 'file:///tmp/presidents.json' - content has been written here 116 | ``` 117 | 118 | You get the point. Now you're up to write your own workflows! 119 | 120 | Continue reading the [Getting Started Guide](doc/getting-started.md). 121 | 122 | Contribute 123 | ---------- 124 | 125 | Contributions are welcome! Don't hesitate to suggest recipes. 126 | 127 | This library is 100% covered with [Pest](https://pestphp.com) tests. 128 | 129 | Please ensure to run tests using the command below and maintain code coverage before submitting PRs. 130 | 131 | ```bash 132 | composer ci:check 133 | ``` 134 | 135 | License 136 | ------- 137 | 138 | MIT. 139 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bentools/etl", 3 | "description": "PHP ETL (Extract / Transform / Load) implementation, with very few dependencies.", 4 | "type": "library", 5 | "require": { 6 | "php": ">=8.2", 7 | "psr/event-dispatcher": "^1.0", 8 | "psr/log": "^3.0", 9 | "symfony/options-resolver": "@stable", 10 | "thecodingmachine/safe": "^2.5" 11 | }, 12 | "require-dev": { 13 | "bentools/iterable-functions": "^2.1", 14 | "doctrine/orm": "^2.16", 15 | "friendsofphp/php-cs-fixer": "^3.35", 16 | "mockery/mockery": "^1.6", 17 | "monolog/monolog": "^3.5", 18 | "pestphp/pest": "^2.24", 19 | "phpstan/phpstan": "^1.10", 20 | "phpstan/phpstan-mockery": "^1.1", 21 | "react/stream": "^1.3", 22 | "symfony/var-dumper": "*" 23 | }, 24 | "license": "MIT", 25 | "autoload": { 26 | "psr-4": { 27 | "BenTools\\ETL\\": "src/" 28 | }, 29 | "files": [ 30 | "src/functions.php" 31 | ] 32 | }, 33 | "autoload-dev": { 34 | "psr-4": { 35 | "BenTools\\ETL\\Tests\\": "tests/" 36 | } 37 | }, 38 | "scripts": { 39 | "ci:check": [ 40 | "vendor/bin/php-cs-fixer fix", 41 | "vendor/bin/phpstan analyse", 42 | "vendor/bin/pest --coverage" 43 | ] 44 | }, 45 | "minimum-stability": "stable", 46 | "config": { 47 | "sort-packages": true, 48 | "allow-plugins": { 49 | "pestphp/pest-plugin": true 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /doc/getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | Consider you have a `/tmp/cities.csv` file containing this, and you want to convert it to a JSON file. 4 | 5 | 6 | ```csv 7 | city_english_name,city_local_name,country_iso_code,continent,population 8 | "New York","New York",US,"North America",8537673 9 | "Los Angeles","Los Angeles",US,"North America",39776830 10 | Tokyo,東京,JP,Asia,13929286 11 | ``` 12 | 13 | ```php 14 | use BenTools\ETL\EtlExecutor; 15 | 16 | $etl = (new EtlExecutor()) 17 | ->extractFrom(new CSVExtractor(options: ['columns' => 'auto'])) 18 | ->loadInto(new JSONLoader()); 19 | 20 | $report = $etl->process('file:///tmp/cities.csv', 'file:///tmp/cities.json'); 21 | dump($report->output); // file:///tmp/cities.json 22 | ``` 23 | 24 | Then, let's have a look at `/tmp/cities.json`: 25 | ```json 26 | [ 27 | { 28 | "city_english_name": "New York", 29 | "city_local_name": "New York", 30 | "country_iso_code": "US", 31 | "continent": "North America", 32 | "population": 8537673 33 | }, 34 | { 35 | "city_english_name": "Los Angeles", 36 | "city_local_name": "Los Angeles", 37 | "country_iso_code": "US", 38 | "continent": "North America", 39 | "population": 39776830 40 | }, 41 | { 42 | "city_english_name": "Tokyo", 43 | "city_local_name": "東京", 44 | "country_iso_code": "JP", 45 | "continent": "Asia", 46 | "population": 13929286 47 | } 48 | ] 49 | ``` 50 | 51 | > [!NOTE] 52 | > We didn't _transform_ anything here, we just denormalized the CSV file to an array, then serialized that array to a JSON file. 53 | 54 | The `CSVExtractor` has some options to _read_ the data, such as considering that the 1st row is the column keys. 55 | 56 | This libary ships with a few built-in extractors and loaders (plain text, csv, json) to name a few, 57 | but you can of course create your own. See [Advanced Usage](advanced_usage.md). 58 | 59 | The `EtlState` object 60 | --------------------- 61 | 62 | The `ETLState` object is the state of the ETL which is currently processed by the `EtlExecutor`. 63 | This object gives you various information such as the duration, the total number of items, 64 | the current extracted key, and so on. 65 | It also contains a `context` array which is here to hold some data related to the current process. 66 | 67 | The `ETLState` object is injected in extractors' `extract()` method, 68 | in transformers' `transform()` method 69 | and in loaders' `load()` and `flush()` methods. 70 | If you use callables, it will be injected as well. 71 | 72 | The `ETLState` object is also injected into all events. 73 | Most of its properties are read-only, except `context`. 74 | 75 | > [!TIP] 76 | > Your executor can provide a default context (see example below). 77 | 78 | ```php 79 | it('accepts a default context', function () { 80 | // Given 81 | $executor = (new EtlExecutor())->withContext(['foo' => 'bar']); 82 | 83 | // When 84 | $report = $executor->process([], context: ['bar' => 'baz']); 85 | 86 | // Then 87 | expect($report->context)->toBe(['foo' => 'bar', 'bar' => 'baz']); 88 | }); 89 | ``` 90 | 91 | Skipping items 92 | -------------- 93 | 94 | You can skip items at any time. 95 | 96 | > [!TIP] 97 | > Use the `skip()` method from the `EtlState` object as soon as your business logic requires it. 98 | 99 | Stopping the workflow 100 | --------------------- 101 | 102 | You can stop the workflow at any time. 103 | 104 | > [!TIP] 105 | > Use the `stop()` method from the `EtlState` object as soon as your business logic requires it. 106 | 107 | Using Events 108 | ------------ 109 | 110 | The `EtlExecutor` emits a variety of events during the ETL workflow, providing insights and control over the process. 111 | 112 | - `InitEvent` when `process()` was just called 113 | - `StartEvent` when extraction just started (we might know the total number of items to extract at this time, if the extractor provides this) 114 | - `ExtractEvent` upon each extracted item 115 | - `ExtractExceptionEvent` when something wrong occured during extraction (this is generally not recoverable) 116 | - `TransformEvent` upon each transformed item (exposes a `TransformResult` object, containing 0, one or more items to load) 117 | - `TransformExceptionEvent` when something wrong occured during transformation (the exception can be dismissed) 118 | - `BeforeLoadEvent` upon each item to be loaded 119 | - `LoadEvent` upon each loaded item 120 | - `LoadExceptionEvent` when something wrong occured during loading (the exception can be dismissed) 121 | - `FlushEvent` at each flush 122 | - `FlushExceptionEvent` when something wrong occured during flush (the exception can be dismissed) 123 | - `EndEvent` whenever the workflow is complete. 124 | 125 | > [!IMPORTANT] 126 | > All events give you access to the `EtlState` object, the state of the running ETL process. 127 | 128 | Accessing `$event->state` allows you to: 129 | - Read what's going on (total number of items, number of loaded items, current extracted item index) 130 | - Write any arbitrary data into the `$state->context` array 131 | - [Skip items](#skipping-items) 132 | - [Stop the workflow](#stopping-the-workflow) 133 | - [Trigger an early flush](#flush-frequency-and-early-flushes). 134 | 135 | You can hook to those events during `EtlExecutor` instantiation, i.e.: 136 | 137 | ```php 138 | $etl = (new EtlExecutor()) 139 | ->onExtract( 140 | fn (ExtractEvent $event) => $logger->info('Extracting item #{key}', ['key' => $event->state->currentItemKey]), 141 | ); 142 | ``` 143 | 144 | Flush frequency and early flushes 145 | --------------------------------- 146 | 147 | By default, the `flush()` method of your loader will be invoked at the end of the ETL, 148 | meaning it will likely keep all loaded items in memory before dumping them to their final destination. 149 | 150 | > [!TIP] 151 | > - Feel free to adjust a `flushFrequency` that fits your needs to manage memory usage and data processing efficiency 152 | > - Optionally, trigger an early flush at any time during the ETL process. 153 | 154 | ```php 155 | $etl = (new EtlExecutor(options: new EtlConfiguration(flushFrequency: 10))) 156 | ->onLoad( 157 | function (LoadEvent $event) { 158 | if (/* whatever reason */) { 159 | $event->state->flush(); 160 | } 161 | }, 162 | ); 163 | ``` 164 | 165 | Advanced usage 166 | -------------- 167 | 168 | See [Advanced Usage](advanced_usage.md). 169 | -------------------------------------------------------------------------------- /doc/recipes.md: -------------------------------------------------------------------------------- 1 | # Recipes 2 | 3 | Recipes are pre-configured setups for `EtlExecutor`, facilitating reusable ETL configurations. 4 | 5 | LoggerRecipe 6 | ------------ 7 | 8 | The `LoggerRecipe` enables logging for all ETL events. 9 | 10 | ```php 11 | use BenTools\ETL\EtlExecutor; 12 | use BenTools\ETL\Recipe\LoggerRecipe; 13 | use Monolog\Logger; 14 | 15 | $logger = new Logger(); 16 | $etl = (new EtlExecutor()) 17 | ->withRecipe(new LoggerRecipe($logger)); 18 | ``` 19 | 20 | This will basically listen to all events and fire log entries. 21 | 22 | FilterRecipe 23 | ------------ 24 | 25 | The `FilterRecipe` gives you syntactic sugar for skipping items. 26 | 27 | ```php 28 | use BenTools\ETL\EtlExecutor; 29 | use BenTools\ETL\Recipe\LoggerRecipe; 30 | use Monolog\Logger; 31 | 32 | use function BenTools\ETL\skipWhen; 33 | 34 | $logger = new Logger(); 35 | $etl = (new EtlExecutor())->withRecipe(skipWhen(fn ($item) => 'apple' === $item)); 36 | $report = $etl->process(['banana', 'apple', 'pinapple']); 37 | 38 | var_dump($report->output); // ['banana', 'pineapple'] 39 | ``` 40 | 41 | Creating your own recipes 42 | ------------------------- 43 | 44 | You can create your own recipes by implementing `BenTools\ETL\Recipe\Recipe` 45 | or using a callable with the same signature. 46 | 47 | ### Example 1. Stop the workflow when a max number of items has been reached 48 | 49 | ```php 50 | use BenTools\ETL\EtlExecutor; 51 | use BenTools\ETL\EtlState; 52 | use BenTools\ETL\EventDispatcher\Event\ExtractEvent; 53 | 54 | use const PHP_INT_MAX; 55 | 56 | final class MaxItemsRecipe extends Recipe 57 | { 58 | public function __construct( 59 | private readonly int $maxItems = PHP_INT_MAX, 60 | ) { 61 | } 62 | 63 | public function decorate(EtlExecutor $executor): EtlExecutor 64 | { 65 | return $executor 66 | ->withContext(['maxItems' => $this->maxItems]) 67 | ->onExtract($this); 68 | } 69 | 70 | public function __invoke(ExtractEvent $event): void 71 | { 72 | if ($event->state->nbExtractedItems >= $event->state->context['maxItems']) { 73 | $event->state->nextTick(fn (EtlState $state) => $state->skip()); 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | Usage: 80 | 81 | ```php 82 | use function BenTools\ETL\withRecipe; 83 | 84 | $etl = withRecipe(new MaxItemsRecipe(10)); // Set to 10 items max by default 85 | $report = $etl->process(['foo', 'bar', 'baz'], context: ['maxItems' => 2]); // Optionally overwrite here 86 | var_dump($report->output); // ['foo', 'bar'] 87 | ``` 88 | 89 | ### Example 2. Display a progress bar when using the Symfony framework: 90 | 91 | ```php 92 | use BenTools\ETL\EtlExecutor; 93 | use BenTools\ETL\EventDispatcher\Event\Event; 94 | use BenTools\ETL\Recipe\Recipe; 95 | use Symfony\Component\Console\Helper\ProgressBar; 96 | 97 | final class ProgressBarRecipe extends Recipe 98 | { 99 | public function __construct( 100 | public readonly ProgressBar $progressBar, 101 | ) { 102 | } 103 | 104 | public function decorate(EtlExecutor $executor): EtlExecutor 105 | { 106 | return $executor 107 | ->onStart(function (Event $event) { 108 | if (!$event->state->nbTotalItems) { 109 | return; 110 | } 111 | $this->progressBar->setMaxSteps($event->state->nbTotalItems); 112 | }) 113 | ->onExtract(fn () => $this->progressBar->advance()) 114 | ->onEnd(fn () => $this->progressBar->finish()); 115 | } 116 | } 117 | ``` 118 | 119 | Usage: 120 | 121 | ```php 122 | use BenTools\ETL\EtlExecutor; 123 | use Symfony\Component\Console\Style\SymfonyStyle; 124 | 125 | $output = new SymfonyStyle($input, $output); 126 | $progressBar = $output->createProgressBar(); 127 | $executor = (new EtlExecutor())->withRecipe(new ProgressBarRecipe($progressBar)); 128 | ``` 129 | -------------------------------------------------------------------------------- /phpstan.dist.neon: -------------------------------------------------------------------------------- 1 | parameters: 2 | level: 6 3 | paths: 4 | - src/ 5 | - tests/ 6 | ignoreErrors: 7 | - 8 | message: "#Access to an undefined property#" 9 | path: "tests/Unit/Recipe/LoggerRecipeTest.php" 10 | includes: 11 | - vendor/phpstan/phpstan-mockery/extension.neon 12 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | tests 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | src 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/EtlConfiguration.php: -------------------------------------------------------------------------------- 1 | 0, got %d', $flushEvery)); 26 | } 27 | $this->flushFrequency = $flushEvery; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/EtlState.php: -------------------------------------------------------------------------------- 1 | 23 | */ 24 | public SplObjectStorage $nextTickCallbacks; 25 | 26 | private bool $earlyFlush = false; 27 | 28 | /** 29 | * @param array $context 30 | */ 31 | public function __construct( 32 | public readonly EtlConfiguration $options = new EtlConfiguration(), 33 | public readonly mixed $source = null, 34 | public readonly mixed $destination = null, 35 | public array $context = [], 36 | public readonly mixed $currentItemKey = null, 37 | public readonly int $currentItemIndex = -1, 38 | public readonly int $nbExtractedItems = 0, 39 | public readonly int $nbLoadedItems = 0, 40 | public readonly int $nbLoadedItemsSinceLastFlush = 0, 41 | public readonly ?int $nbTotalItems = null, 42 | public readonly DateTimeImmutable $startedAt = new DateTimeImmutable(), 43 | public readonly ?DateTimeImmutable $endedAt = null, 44 | public readonly mixed $output = null, 45 | public readonly StateHolder $stateHolder = new StateHolder(), 46 | ) { 47 | $this->nextTickCallbacks ??= new SplObjectStorage(); 48 | $this->stateHolder->state ??= $this; 49 | } 50 | 51 | /** 52 | * @internal 53 | */ 54 | public function getLastVersion(): self 55 | { 56 | return $this->stateHolder->state; 57 | } 58 | 59 | /** 60 | * @internal 61 | */ 62 | public function update(self $state): self 63 | { 64 | $this->stateHolder->state = $state; 65 | 66 | return $state; 67 | } 68 | 69 | public function nextTick(callable $callback): void 70 | { 71 | $this->nextTickCallbacks->attach(static fn (EtlState $state) => $callback($state)); 72 | } 73 | 74 | /** 75 | * Flush after current item. 76 | */ 77 | public function flush(): void 78 | { 79 | $this->earlyFlush = true; 80 | } 81 | 82 | /** 83 | * Skip current item. 84 | */ 85 | public function skip(): never 86 | { 87 | throw new SkipRequest(); 88 | } 89 | 90 | /** 91 | * Stop after current item. 92 | */ 93 | public function stop(): never 94 | { 95 | throw new StopRequest(); 96 | } 97 | 98 | public function getDuration(): float 99 | { 100 | $endedAt = $this->endedAt ?? new DateTimeImmutable(); 101 | 102 | return (float) ($endedAt->format('U.u') - $this->startedAt->format('U.u')); 103 | } 104 | 105 | /** 106 | * @internal 107 | */ 108 | public function shouldFlush(): bool 109 | { 110 | return match (true) { 111 | $this->earlyFlush => true, 112 | INF === $this->options->flushFrequency => false, 113 | 0 === $this->nbLoadedItemsSinceLastFlush => false, 114 | 0 === ($this->nbLoadedItemsSinceLastFlush % $this->options->flushFrequency) => true, 115 | default => false, 116 | }; 117 | } 118 | 119 | /** 120 | * @internal 121 | */ 122 | public function withUpdatedItemKey(mixed $key): self 123 | { 124 | return $this->cloneWith([ 125 | 'currentItemKey' => $key, 126 | 'currentItemIndex' => $this->currentItemIndex + 1, 127 | 'nbExtractedItems' => $this->nbExtractedItems + 1, 128 | ]); 129 | } 130 | 131 | /** 132 | * @internal 133 | */ 134 | public function withIncrementedNbLoadedItems(): self 135 | { 136 | return $this->cloneWith([ 137 | 'nbLoadedItems' => $this->nbLoadedItems + 1, 138 | 'nbLoadedItemsSinceLastFlush' => $this->nbLoadedItemsSinceLastFlush + 1, 139 | ]); 140 | } 141 | 142 | /** 143 | * @internal 144 | */ 145 | public function withNbTotalItems(?int $nbTotalItems): self 146 | { 147 | return $this->cloneWith(['nbTotalItems' => $nbTotalItems]); 148 | } 149 | 150 | /** 151 | * @internal 152 | */ 153 | public function withOutput(mixed $output): self 154 | { 155 | return $this->cloneWith(['output' => $output]); 156 | } 157 | 158 | /** 159 | * @internal 160 | */ 161 | public function withClearedFlush(): self 162 | { 163 | return $this->cloneWith([ 164 | 'earlyFlush' => false, 165 | 'nbLoadedItemsSinceLastFlush' => 0, 166 | ]); 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/EventDispatcher/Event/BeforeLoadEvent.php: -------------------------------------------------------------------------------- 1 | exception = null; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/EventDispatcher/Event/InitEvent.php: -------------------------------------------------------------------------------- 1 | exception = null; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/EventDispatcher/Event/StartEvent.php: -------------------------------------------------------------------------------- 1 | exception = null; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/EventDispatcher/EventDispatcher.php: -------------------------------------------------------------------------------- 1 | listenerProvider->getListenersForEvent($event); 26 | $isStoppable = $event instanceof StoppableEventInterface; 27 | 28 | foreach ($listeners as $callback) { 29 | if ($isStoppable && $event->isPropagationStopped()) { 30 | break; 31 | } 32 | 33 | $callback($event); 34 | } 35 | 36 | return $event; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/EventDispatcher/PrioritizedListenerProvider.php: -------------------------------------------------------------------------------- 1 | >> 16 | */ 17 | private array $prioritizedListeners = []; 18 | 19 | /** 20 | * @var array> 21 | */ 22 | private array $flattenedListeners = []; 23 | 24 | public function listenTo(string $eventClass, callable $callback, int $priority = 0): void 25 | { 26 | $this->prioritizedListeners[$eventClass][$priority][] = $callback; 27 | krsort($this->prioritizedListeners[$eventClass]); 28 | $this->flattenedListeners[$eventClass] = array_merge(...$this->prioritizedListeners[$eventClass]); 29 | } 30 | 31 | public function hasListeners(string $eventClass): bool 32 | { 33 | return isset($this->flattenedListeners[$eventClass]); 34 | } 35 | 36 | /** 37 | * @return iterable 38 | */ 39 | public function getListenersForEvent(object $event): iterable 40 | { 41 | return $this->flattenedListeners[$event::class] ?? []; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/EventDispatcher/StoppableEventTrait.php: -------------------------------------------------------------------------------- 1 | propagationStopped = true; 14 | } 15 | 16 | final public function isPropagationStopped(): bool 17 | { 18 | return true === $this->propagationStopped; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/Exception/EtlException.php: -------------------------------------------------------------------------------- 1 | dispatch(new ExtractExceptionEvent($state->getLastVersion(), $exception))->exception; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/Exception/FlushException.php: -------------------------------------------------------------------------------- 1 | dispatch(new FlushExceptionEvent($state, $exception))->exception; 21 | 22 | if ($exception) { 23 | throw $exception; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/Exception/LoadException.php: -------------------------------------------------------------------------------- 1 | dispatch(new LoadExceptionEvent($state->getLastVersion(), $exception))->exception; 21 | 22 | if ($exception) { 23 | throw $exception; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/Exception/SkipRequest.php: -------------------------------------------------------------------------------- 1 | dispatch(new TransformExceptionEvent($state->getLastVersion(), $exception))->exception; 21 | 22 | if ($exception) { 23 | throw $exception; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/Extractor/CSVExtractor.php: -------------------------------------------------------------------------------- 1 | source ?? $this->content; 29 | 30 | if (!is_string($content)) { 31 | throw new ExtractException('Invalid source.'); 32 | } 33 | 34 | if (str_starts_with($content, 'file://')) { 35 | $iterator = (new FileExtractor(substr($content, 7), $this->options))->extract($state); 36 | } else { 37 | $iterator = (new TextLinesExtractor($content, $this->options))->extract($state); 38 | } 39 | 40 | return new CSVIterator($iterator, $this->options); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/Extractor/CallableExtractor.php: -------------------------------------------------------------------------------- 1 | closure)($state); 23 | 24 | if (null === $extracted) { 25 | return new EmptyIterator(); 26 | } 27 | 28 | if (!is_iterable($extracted)) { 29 | return [$extracted]; 30 | } 31 | 32 | return $extracted; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/Extractor/ChainExtractor.php: -------------------------------------------------------------------------------- 1 | $_extractor) { 22 | if (!$_extractor instanceof ExtractorInterface) { 23 | $extractors[$e] = new CallableExtractor($_extractor(...)); 24 | } 25 | } 26 | $this->extractors = $extractors; 27 | } 28 | 29 | public function with( 30 | ExtractorInterface|callable $extractor, 31 | ExtractorInterface|callable ...$extractors, 32 | ): self { 33 | return new self(...[...$this->extractors, $extractor, ...$extractors]); 34 | } 35 | 36 | public function extract(EtlState $state): iterable 37 | { 38 | foreach ($this->extractors as $extractor) { 39 | foreach ($extractor->extract($state) as $item) { 40 | yield $item; 41 | } 42 | } 43 | } 44 | 45 | public static function from(ExtractorInterface $extractor): self 46 | { 47 | return match ($extractor instanceof self) { 48 | true => $extractor, 49 | false => new self($extractor), 50 | }; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/Extractor/ExtractorInterface.php: -------------------------------------------------------------------------------- 1 | source ?? $this->file; 28 | 29 | return new FileIterator($this->resolveFile($file), $this->options); 30 | } 31 | 32 | private function resolveFile(mixed $file): SplFileObject 33 | { 34 | return match (true) { 35 | $file instanceof SplFileObject => $file, 36 | is_string($file) => new SplFileObject($file), 37 | default => throw new ExtractException('Invalid file.'), 38 | }; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/Extractor/IterableExtractor.php: -------------------------------------------------------------------------------- 1 | $source 17 | */ 18 | public function __construct( 19 | public iterable $source = new EmptyIterator(), 20 | ) { 21 | } 22 | 23 | public function extract(EtlState $state): iterable 24 | { 25 | $source = $state->source ?? $this->source; 26 | 27 | if (!is_iterable($source)) { 28 | throw new ExtractException('Provided source is not iterable.'); 29 | } 30 | 31 | return $source; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/Extractor/IterableExtractorInterface.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | public function extract(EtlState $state): iterable; 15 | } 16 | -------------------------------------------------------------------------------- /src/Extractor/JSONExtractor.php: -------------------------------------------------------------------------------- 1 | source ?? $this->source; 26 | 27 | $source = $this->resolveFile($source); 28 | if ($source instanceof SplFileObject) { 29 | $content = $source->fread($source->getSize()); 30 | } 31 | 32 | if (is_string($content)) { 33 | $content = json_decode($content, true); 34 | } 35 | 36 | if (null === $content) { 37 | return new EmptyIterator(); 38 | } 39 | 40 | if (!is_iterable($content)) { 41 | throw new ExtractException('Provided JSON is not iterable.'); 42 | } 43 | 44 | yield from $content; 45 | } 46 | 47 | private function resolveFile(mixed $source): ?SplFileObject 48 | { 49 | return match (true) { 50 | $source instanceof SplFileObject => $source, 51 | is_string($source) && str_starts_with($source, 'file://') => new SplFileObject(substr($source, 7)), 52 | default => null, 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/Extractor/ReactStreamExtractor.php: -------------------------------------------------------------------------------- 1 | |ReadableStreamInterface|null $stream 15 | */ 16 | public function __construct( 17 | public ReadableStreamInterface|iterable|null $stream = null, 18 | ) { 19 | } 20 | 21 | public function extract(EtlState $state): ReadableStreamInterface 22 | { 23 | return $this->ensureStream($state->source ?? $this->stream); 24 | } 25 | 26 | /** 27 | * @param iterable|ReadableStreamInterface $items 28 | */ 29 | private function ensureStream(iterable|ReadableStreamInterface $items): ReadableStreamInterface 30 | { 31 | return $items instanceof ReadableStreamInterface ? $items : new IteratorStream($items); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/Extractor/STDINExtractor.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | final class STDINExtractor implements Iterator, IterableExtractorInterface 15 | { 16 | private SplFileObject $stdIn; 17 | 18 | public function current(): string|false 19 | { 20 | return $this->stdIn->current(); 21 | } 22 | 23 | public function next(): void 24 | { 25 | $this->stdIn->next(); 26 | } 27 | 28 | public function key(): int 29 | { 30 | return $this->stdIn->key(); 31 | } 32 | 33 | public function valid(): bool 34 | { 35 | return $this->stdIn->valid(); 36 | } 37 | 38 | public function rewind(): void 39 | { 40 | $this->stdIn = new SplFileObject('php://stdin'); 41 | $this->stdIn->setFlags(SplFileObject::DROP_NEW_LINE); 42 | } 43 | 44 | public function extract(EtlState $state): iterable 45 | { 46 | yield from $this; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Extractor/TextLinesExtractor.php: -------------------------------------------------------------------------------- 1 | setIgnoreUndefined(); 29 | $resolver->setDefaults(['skipEmptyLines' => true]); 30 | $resolver->setAllowedTypes('skipEmptyLines', 'bool'); 31 | $this->options = $resolver->resolve($options); 32 | } 33 | 34 | public function extract(EtlState $state): StrTokIterator|PregSplitIterator|EmptyIterator 35 | { 36 | $content = $state->source ?? $this->content; 37 | 38 | if (null === $content) { 39 | return new EmptyIterator(); 40 | } 41 | 42 | if ($this->options['skipEmptyLines']) { 43 | return new StrTokIterator($content); 44 | } 45 | 46 | return new PregSplitIterator($content); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Internal/ClonableTrait.php: -------------------------------------------------------------------------------- 1 | $cloneArgs 26 | */ 27 | public function cloneWith(array $cloneArgs = []): static 28 | { 29 | static $refl, $notPromotedWritablePropNames, $constructorParamNames; 30 | $refl ??= new ReflectionClass($this); 31 | $constructorParamNames ??= array_column($refl->getConstructor()->getParameters(), 'name'); 32 | $notPromotedWritablePropNames ??= array_column( 33 | array_filter( 34 | $refl->getProperties(), 35 | fn (ReflectionProperty $property) => !$property->isReadOnly() && !$property->isPromoted(), 36 | ), 37 | 'name' 38 | ); 39 | 40 | $clone = new static(...array_fill_from($constructorParamNames, get_object_vars($this), $cloneArgs)); 41 | $notPromotedProps = array_fill_from($notPromotedWritablePropNames, get_object_vars($this), $cloneArgs); 42 | foreach ($notPromotedProps as $prop => $value) { 43 | $clone->{$prop} = $value; 44 | } 45 | 46 | return $clone; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Internal/ConditionalLoaderTrait.php: -------------------------------------------------------------------------------- 1 | supports($item, $state); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/Internal/DispatchEventsTrait.php: -------------------------------------------------------------------------------- 1 | eventDispatcher->dispatch($event); 32 | 33 | return $event; 34 | } 35 | 36 | /** 37 | * @template E of Event 38 | * 39 | * @param class-string $eventClass 40 | * 41 | * @return E|null 42 | */ 43 | private function emit(string $eventClass, EtlState $state, mixed ...$args): ?Event 44 | { 45 | if (!$this->listenerProvider->hasListeners($eventClass)) { 46 | return null; 47 | } 48 | 49 | return $this->dispatch(new $eventClass($state, ...$args)); 50 | } 51 | 52 | private function emitExtractEvent(EtlState $state, mixed $item): mixed 53 | { 54 | $event = $this->emit(ExtractEvent::class, $state, $item); 55 | 56 | return $event?->item ?? $item; 57 | } 58 | 59 | private function emitTransformEvent(EtlState $state, TransformResult $transformResult): TransformResult 60 | { 61 | $event = $this->emit(TransformEvent::class, $state, $transformResult); 62 | 63 | return TransformResult::create($event?->transformResult ?? $transformResult); 64 | } 65 | 66 | private function emitBeforeLoadEvent(EtlState $state, mixed $item): mixed 67 | { 68 | $event = $this->emit(BeforeLoadEvent::class, $state, $item); 69 | 70 | return $event?->item ?? $item; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/Internal/EtlBuilderTrait.php: -------------------------------------------------------------------------------- 1 | 33 | */ 34 | use EtlEventListenersTrait; 35 | 36 | public function extractFrom( 37 | ExtractorInterface|callable $extractor, 38 | ExtractorInterface|callable ...$extractors 39 | ): self { 40 | $extractors = [$extractor, ...$extractors]; 41 | 42 | foreach ($extractors as $e => $_extractor) { 43 | if (!$_extractor instanceof ExtractorInterface) { 44 | $extractors[$e] = new CallableExtractor($_extractor(...)); 45 | } 46 | } 47 | 48 | if (count($extractors) > 1) { 49 | return $this->cloneWith(['extractor' => new ChainExtractor(...$extractors)]); 50 | } 51 | 52 | return $this->cloneWith(['extractor' => $extractors[0]]); 53 | } 54 | 55 | public function transformWith( 56 | TransformerInterface|callable $transformer, 57 | TransformerInterface|callable ...$transformers 58 | ): self { 59 | $transformers = [$transformer, ...$transformers]; 60 | 61 | foreach ($transformers as $t => $_transformer) { 62 | if (!$_transformer instanceof TransformerInterface) { 63 | $transformers[$t] = new CallableTransformer($_transformer(...)); 64 | } 65 | } 66 | 67 | if (count($transformers) > 1) { 68 | return $this->cloneWith(['transformer' => new ChainTransformer(...$transformers)]); 69 | } 70 | 71 | return $this->cloneWith(['transformer' => $transformers[0]]); 72 | } 73 | 74 | public function loadInto(LoaderInterface|callable $loader, LoaderInterface|callable ...$loaders): self 75 | { 76 | $loaders = [$loader, ...$loaders]; 77 | 78 | foreach ($loaders as $l => $_loader) { 79 | if (!$_loader instanceof LoaderInterface) { 80 | $loaders[$l] = new CallableLoader($_loader(...)); 81 | } 82 | } 83 | 84 | if (count($loaders) > 1) { 85 | return $this->cloneWith(['loader' => new ChainLoader(...$loaders)]); 86 | } 87 | 88 | return $this->cloneWith(['loader' => $loaders[0]]); 89 | } 90 | 91 | public function withOptions(EtlConfiguration $configuration): self 92 | { 93 | return $this->cloneWith(['options' => $configuration]); 94 | } 95 | 96 | public function withRecipe(Recipe|callable $recipe, Recipe|callable ...$recipes): self 97 | { 98 | $executor = $this; 99 | foreach ([$recipe, ...$recipes] as $_recipe) { 100 | if (!$_recipe instanceof Recipe) { 101 | $_recipe = Recipe::fromCallable($_recipe); 102 | } 103 | $executor = $_recipe->decorate($executor); 104 | } 105 | 106 | return $executor; 107 | } 108 | 109 | public function withProcessor(ProcessorInterface $processor): self 110 | { 111 | return $this->cloneWith(['processor' => $processor]); 112 | } 113 | 114 | /** 115 | * @param array $context 116 | */ 117 | public function withContext(array $context, bool $clear = false, bool $overwrite = true): self 118 | { 119 | return $this->cloneWith(['context' => [ 120 | ...($clear ? [] : $this->context), 121 | ...$context, 122 | ...($overwrite ? [] : array_intersect_key($this->context, $context)), 123 | ]]); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/Internal/EtlEventListenersTrait.php: -------------------------------------------------------------------------------- 1 | listenTo(InitEvent::class, $callback, $priority); 36 | } 37 | 38 | /** 39 | * @param callable(StartEvent): void $callback 40 | */ 41 | public function onStart(callable $callback, int $priority = 0): self 42 | { 43 | return $this->listenTo(StartEvent::class, $callback, $priority); 44 | } 45 | 46 | /** 47 | * @param callable(ExtractEvent): void $callback 48 | */ 49 | public function onExtract(callable $callback, int $priority = 0): self 50 | { 51 | return $this->listenTo(ExtractEvent::class, $callback, $priority); 52 | } 53 | 54 | /** 55 | * @param callable(ExtractExceptionEvent): void $callback 56 | */ 57 | public function onExtractException(callable $callback, int $priority = 0): self 58 | { 59 | return $this->listenTo(ExtractExceptionEvent::class, $callback, $priority); 60 | } 61 | 62 | /** 63 | * @param callable(TransformEvent): void $callback 64 | */ 65 | public function onTransform(callable $callback, int $priority = 0): self 66 | { 67 | return $this->listenTo(TransformEvent::class, $callback, $priority); 68 | } 69 | 70 | /** 71 | * @param callable(TransformExceptionEvent): void $callback 72 | */ 73 | public function onTransformException(callable $callback, int $priority = 0): self 74 | { 75 | return $this->listenTo(TransformExceptionEvent::class, $callback, $priority); 76 | } 77 | 78 | /** 79 | * @param callable(BeforeLoadEvent): void $callback 80 | */ 81 | public function onBeforeLoad(callable $callback, int $priority = 0): self 82 | { 83 | return $this->listenTo(BeforeLoadEvent::class, $callback, $priority); 84 | } 85 | 86 | /** 87 | * @param callable(LoadEvent): void $callback 88 | */ 89 | public function onLoad(callable $callback, int $priority = 0): self 90 | { 91 | return $this->listenTo(LoadEvent::class, $callback, $priority); 92 | } 93 | 94 | /** 95 | * @param callable(LoadExceptionEvent): void $callback 96 | */ 97 | public function onLoadException(callable $callback, int $priority = 0): self 98 | { 99 | return $this->listenTo(LoadExceptionEvent::class, $callback, $priority); 100 | } 101 | 102 | /** 103 | * @param callable(FlushEvent): void $callback 104 | */ 105 | public function onFlush(callable $callback, int $priority = 0): self 106 | { 107 | return $this->listenTo(FlushEvent::class, $callback, $priority); 108 | } 109 | 110 | /** 111 | * @param callable(FlushExceptionEvent): void $callback 112 | */ 113 | public function onFlushException(callable $callback, int $priority = 0): self 114 | { 115 | return $this->listenTo(FlushExceptionEvent::class, $callback, $priority); 116 | } 117 | 118 | /** 119 | * @param callable(EndEvent): void $callback 120 | */ 121 | public function onEnd(callable $callback, int $priority = 0): self 122 | { 123 | return $this->listenTo(EndEvent::class, $callback, $priority); 124 | } 125 | 126 | private function listenTo(string $eventClass, callable $callback, int $priority = 0): self 127 | { 128 | $clone = $this->cloneWith(); 129 | $clone->listenerProvider->listenTo($eventClass, $callback, $priority); 130 | 131 | return $clone; 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/Internal/StateHolder.php: -------------------------------------------------------------------------------- 1 | 15 | */ 16 | final class TransformResult implements IteratorAggregate 17 | { 18 | public mixed $value; 19 | public bool $iterable; 20 | 21 | private function __construct() 22 | { 23 | } 24 | 25 | public function getIterator(): Traversable 26 | { 27 | if ($this->iterable) { 28 | yield from $this->value; 29 | } else { 30 | yield $this->value; 31 | } 32 | } 33 | 34 | public static function create(mixed $value): self 35 | { 36 | static $prototype; 37 | $prototype ??= new self(); 38 | 39 | if ($value instanceof self) { 40 | return $value; 41 | } 42 | 43 | $that = clone $prototype; 44 | if ($value instanceof Generator) { 45 | $that->value = [...$value]; 46 | $that->iterable = true; 47 | } else { 48 | $that->value = $value; 49 | $that->iterable = false; 50 | } 51 | 52 | return $that; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Iterator/CSVIterator.php: -------------------------------------------------------------------------------- 1 | > 27 | */ 28 | final readonly class CSVIterator implements IteratorAggregate 29 | { 30 | /** 31 | * @var array{ 32 | * delimiter: string, 33 | * enclosure: string, 34 | * escapeString: string, 35 | * columns: 'auto'|string[]|null, 36 | * normalizers: ValueNormalizerInterface[], 37 | * skipFirstRow: bool, 38 | * } 39 | */ 40 | private array $options; 41 | 42 | /** 43 | * @param Traversable $text 44 | * @param array{ 45 | * delimiter?: string, 46 | * enclosure?: string, 47 | * escapeString?: string, 48 | * columns?: 'auto'|string[]|null, 49 | * normalizers?: ValueNormalizerInterface[], 50 | * skipFirstRow?: bool, 51 | * } $options 52 | */ 53 | public function __construct( 54 | private Traversable $text, 55 | array $options = [], 56 | ) { 57 | $resolver = (new OptionsResolver())->setIgnoreUndefined(); 58 | $resolver->setDefaults([ 59 | 'delimiter' => ',', 60 | 'enclosure' => '"', 61 | 'escapeString' => '\\', 62 | 'columns' => null, 63 | 'normalizers' => [ 64 | new NumericStringToNumberNormalizer(), 65 | new EmptyStringToNullNormalizer(), 66 | ], 67 | 'skipFirstRow' => false, 68 | ]); 69 | $resolver->setAllowedTypes('delimiter', 'string'); 70 | $resolver->setAllowedTypes('enclosure', 'string'); 71 | $resolver->setAllowedTypes('escapeString', 'string'); 72 | $resolver->setAllowedTypes('normalizers', ValueNormalizerInterface::class.'[]'); 73 | $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']); 74 | $resolver->setAllowedValues('columns', function (array|string|null $value) { 75 | return 'auto' === $value || null === $value || is_array($value); 76 | }); 77 | $resolver->setAllowedTypes('skipFirstRow', 'bool'); 78 | $this->options = $resolver->resolve($options); 79 | } 80 | 81 | /** 82 | * @param array $data 83 | * @param list|null $columns 84 | * 85 | * @return array|string[] 86 | */ 87 | private function extract(array $data, ?array $columns): array 88 | { 89 | if ($this->options['normalizers']) { 90 | array_walk($data, function (&$value) { 91 | foreach ($this->options['normalizers'] as $normalizer) { 92 | $value = $normalizer->normalize($value); 93 | } 94 | 95 | return $value; 96 | }); 97 | } 98 | 99 | return !empty($columns) ? self::combine($columns, $data) : $data; 100 | } 101 | 102 | public function getIterator(): Traversable 103 | { 104 | if ($this->text instanceof SplFileObject) { 105 | return $this->iterateFromFile($this->text); 106 | } 107 | 108 | return $this->iterateFromContent($this->text); 109 | } 110 | 111 | private function shouldSkipFirstRow(): bool 112 | { 113 | return $this->options['skipFirstRow'] || 'auto' === $this->options['columns']; 114 | } 115 | 116 | /** 117 | * @return Traversable 118 | */ 119 | private function iterateFromFile(SplFileObject $file): Traversable 120 | { 121 | $flags = [SplFileObject::READ_CSV, $file->getFlags()]; 122 | $file->setFlags(array_reduce($flags, fn ($a, $b) => $a | $b, 0)); 123 | $columns = $this->options['columns']; 124 | if ('auto' === $columns) { 125 | $columns = null; 126 | } 127 | while (!$file->eof()) { 128 | $fields = $file->fgetcsv( 129 | $this->options['delimiter'], 130 | $this->options['enclosure'], 131 | $this->options['escapeString'], 132 | ); 133 | if ([null] === $fields) { 134 | continue; 135 | } 136 | if (0 === $file->key() && $this->shouldSkipFirstRow()) { 137 | $columns ??= $fields; 138 | continue; 139 | } 140 | 141 | yield $this->extract($fields, $columns); 142 | } 143 | } 144 | 145 | /** 146 | * @param Traversable $content 147 | * 148 | * @return Traversable 149 | */ 150 | private function iterateFromContent(Traversable $content): Traversable 151 | { 152 | $columns = $this->options['columns']; 153 | if ('auto' === $columns) { 154 | $columns = null; 155 | } 156 | foreach ($content as $r => $row) { 157 | $fields = str_getcsv( 158 | $row, 159 | $this->options['delimiter'], 160 | $this->options['enclosure'], 161 | $this->options['escapeString'], 162 | ); 163 | if (0 === $r && $this->shouldSkipFirstRow()) { 164 | $columns ??= $fields; 165 | continue; 166 | } 167 | yield $this->extract($fields, $columns); 168 | } 169 | } 170 | 171 | /** 172 | * @param string[] $keys 173 | * @param string[] $values 174 | * 175 | * @return string[] 176 | */ 177 | private static function combine(array $keys, array $values): array 178 | { 179 | $nbKeys = count($keys); 180 | $nbValues = count($values); 181 | 182 | if ($nbKeys < $nbValues) { 183 | return array_combine($keys, array_slice(array_values($values), 0, $nbKeys)); 184 | } 185 | 186 | if ($nbKeys > $nbValues) { 187 | return array_combine($keys, array_merge($values, array_fill(0, $nbKeys - $nbValues, null))); 188 | } 189 | 190 | return array_combine($keys, $values); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/Iterator/ConsumableIterator.php: -------------------------------------------------------------------------------- 1 | $items 25 | */ 26 | public function __construct(iterable $items) 27 | { 28 | $this->iterator = iterable_to_iterator($items); 29 | } 30 | 31 | public function consume(): mixed 32 | { 33 | if ($this->ended) { 34 | throw new OutOfRangeException('This iterator has no more items.'); // @codeCoverageIgnore 35 | } 36 | 37 | if (!$this->started) { 38 | $this->iterator->rewind(); 39 | $this->started = true; 40 | } 41 | 42 | $value = $this->iterator->current(); 43 | $this->iterator->next(); 44 | 45 | if (!$this->iterator->valid()) { 46 | $this->ended = true; 47 | } 48 | 49 | return $value; 50 | } 51 | 52 | public function isComplete(): bool 53 | { 54 | return $this->ended; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/Iterator/FileIterator.php: -------------------------------------------------------------------------------- 1 | 18 | */ 19 | final readonly class FileIterator implements IteratorAggregate 20 | { 21 | /** 22 | * @var array{skipEmptyLines: bool} 23 | */ 24 | private array $options; 25 | 26 | /** 27 | * @param array{skipEmptyLines?: bool} $options 28 | */ 29 | public function __construct( 30 | private SplFileObject $file, 31 | array $options = [], 32 | ) { 33 | $resolver = (new OptionsResolver())->setIgnoreUndefined(); 34 | $resolver->setDefaults(['skipEmptyLines' => true]); 35 | $resolver->setAllowedTypes('skipEmptyLines', 'bool'); 36 | $this->options = $resolver->resolve($options); 37 | } 38 | 39 | public function getIterator(): Traversable 40 | { 41 | foreach ($this->file as $row) { 42 | $line = rtrim($row, PHP_EOL); 43 | if ($this->options['skipEmptyLines'] && '' === $line) { 44 | continue; 45 | } 46 | yield $line; 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/Iterator/IteratorStream.php: -------------------------------------------------------------------------------- 1 | 24 | */ 25 | public readonly ConsumableIterator $iterator; 26 | public bool $paused = false; 27 | 28 | /** 29 | * @param iterable $items 30 | */ 31 | public function __construct(iterable $items) 32 | { 33 | $this->iterator = new ConsumableIterator($items); 34 | $this->resume(); 35 | } 36 | 37 | public function isReadable(): bool 38 | { 39 | return !$this->iterator->isComplete(); 40 | } 41 | 42 | public function pause(): void 43 | { 44 | $this->paused = true; 45 | } 46 | 47 | public function resume(): void 48 | { 49 | $this->paused = false; 50 | $this->process(); 51 | } 52 | 53 | private function process(): void 54 | { 55 | if (!$this->iterator->isComplete()) { 56 | Loop::futureTick(function () { 57 | if (!$this->paused) { 58 | $this->emit('data', [$this->iterator->consume()]); 59 | } 60 | $this->process(); 61 | }); 62 | } else { 63 | $this->emit('end'); 64 | $this->close(); 65 | } 66 | } 67 | 68 | /** 69 | * @param array $options 70 | */ 71 | public function pipe(WritableStreamInterface $dest, array $options = []): WritableStreamInterface 72 | { 73 | Util::pipe($this, $dest, $options); 74 | 75 | return $dest; 76 | } 77 | 78 | public function close(): void 79 | { 80 | $this->emit('close'); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/Iterator/PregSplitIterator.php: -------------------------------------------------------------------------------- 1 | 17 | */ 18 | final readonly class PregSplitIterator implements IteratorAggregate 19 | { 20 | public function __construct( 21 | public string $content, 22 | ) { 23 | } 24 | 25 | public function getIterator(): Traversable 26 | { 27 | $lines = preg_split("/((\r?\n)|(\r\n?))/", $this->content); 28 | foreach ($lines as $line) { 29 | yield rtrim($line, PHP_EOL); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Iterator/StrTokIterator.php: -------------------------------------------------------------------------------- 1 | 19 | */ 20 | final readonly class StrTokIterator implements IteratorAggregate 21 | { 22 | public function __construct( 23 | public string $content, 24 | ) { 25 | } 26 | 27 | public function getIterator(): Traversable 28 | { 29 | $tok = strtok($this->content, "\r\n"); 30 | while (false !== $tok) { 31 | $line = $tok; 32 | $tok = strtok("\n\r"); 33 | yield rtrim($line, PHP_EOL); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/Loader/CSVLoader.php: -------------------------------------------------------------------------------- 1 | setIgnoreUndefined(); 35 | $resolver->setDefaults([ 36 | 'delimiter' => ',', 37 | 'enclosure' => '"', 38 | 'escapeString' => '\\', 39 | 'columns' => null, 40 | 'eol' => PHP_EOL, 41 | ]); 42 | $resolver->setAllowedTypes('delimiter', 'string'); 43 | $resolver->setAllowedTypes('enclosure', 'string'); 44 | $resolver->setAllowedTypes('escapeString', 'string'); 45 | $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']); 46 | $resolver->setAllowedValues('columns', function (array|string|null $value) { 47 | return 'auto' === $value || null === $value || is_array($value); 48 | }); 49 | $resolver->setAllowedTypes('eol', 'string'); 50 | $this->options = $resolver->resolve($options); 51 | } 52 | 53 | public function load(mixed $item, EtlState $state): void 54 | { 55 | $context = &$state->context[__CLASS__]; 56 | $context['columsWritten'] ??= false; 57 | 58 | if (!$context['columsWritten']) { 59 | if (is_array($this->options['columns'])) { 60 | $context['pending'][] = $this->options['columns']; 61 | $context['columsWritten'] = true; 62 | } elseif ('auto' === $this->options['columns']) { 63 | $context['pending'][] = array_keys($item); 64 | $context['columsWritten'] = true; 65 | } 66 | } 67 | 68 | $context['pending'][] = $item; 69 | } 70 | 71 | public function flush(bool $isPartial, EtlState $state): string 72 | { 73 | $context = &$state->context[__CLASS__]; 74 | $context['pending'] ??= []; 75 | $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination); 76 | foreach ($context['pending'] as $item) { 77 | $this->write($file, $item); 78 | } 79 | 80 | $context['pending'] = []; 81 | 82 | if (!$isPartial && $file instanceof SplTempFileObject) { 83 | $file->rewind(); 84 | 85 | return implode('', [...$file]); // @phpstan-ignore-line 86 | } 87 | 88 | return 'file://'.$file->getPathname(); 89 | } 90 | 91 | /** 92 | * @param array $item 93 | */ 94 | private function write(SplFileObject $file, array $item): void 95 | { 96 | $options = $this->options; 97 | $file->fputcsv($item, $options['delimiter'], $options['enclosure'], $options['escapeString'], $options['eol']); 98 | } 99 | 100 | private function resolveDestination(mixed $destination): SplFileObject 101 | { 102 | $isFileName = is_string($destination) && str_starts_with($destination, 'file://'); 103 | 104 | return match (true) { 105 | $destination instanceof SplFileObject => $destination, 106 | $isFileName => new SplFileObject(substr($destination, 7), 'w'), 107 | null === $destination => new SplTempFileObject(), 108 | default => throw new LoadException('Invalid destination.'), 109 | }; 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/Loader/CallableLoader.php: -------------------------------------------------------------------------------- 1 | destination ?? $this->closure; 23 | if (!is_callable($callback)) { 24 | throw new LoadException('Invalid destination.'); 25 | } 26 | $state->context[__CLASS__]['loaded'][] = $callback($item, $state); 27 | } 28 | 29 | /** 30 | * @codeCoverageIgnore 31 | */ 32 | public function flush(bool $isPartial, EtlState $state): mixed 33 | { 34 | foreach ($state->context[__CLASS__]['loaded'] ?? [] as $i => $item) { 35 | $state->context[__CLASS__]['output'][] = $item; 36 | unset($state->context[__CLASS__]['loaded'][$i]); 37 | } 38 | 39 | return $state->context[__CLASS__]['output'] ?? []; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Loader/ChainLoader.php: -------------------------------------------------------------------------------- 1 | $_loader) { 25 | if (!$_loader instanceof LoaderInterface) { 26 | $loaders[$l] = new CallableLoader($_loader(...)); 27 | } 28 | } 29 | $this->loaders = $loaders; 30 | } 31 | 32 | public function with( 33 | LoaderInterface|callable $loader, 34 | LoaderInterface|callable ...$loaders, 35 | ): self { 36 | return new self(...[...$this->loaders, $loader, ...$loaders]); 37 | } 38 | 39 | public function load(mixed $item, EtlState $state): void 40 | { 41 | foreach ($this->loaders as $loader) { 42 | if (self::shouldLoad($loader, $item, $state)) { 43 | $loader->load($item, $state); 44 | } 45 | } 46 | } 47 | 48 | public function flush(bool $isPartial, EtlState $state): mixed 49 | { 50 | foreach ($this->loaders as $loader) { 51 | $output = $loader->flush($isPartial, $state); 52 | } 53 | 54 | return $output ?? null; 55 | } 56 | 57 | public static function from(LoaderInterface $loader): self 58 | { 59 | return match ($loader instanceof self) { 60 | true => $loader, 61 | false => new self($loader), 62 | }; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/Loader/ConditionalLoaderInterface.php: -------------------------------------------------------------------------------- 1 | managerRegistry->getManagerForClass($item::class) 30 | ?? throw new LoadException(sprintf('Could not find manager for class %s.', $item::class)); 31 | 32 | $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage(); 33 | $managers->attach($manager); 34 | $manager->persist($item); 35 | } 36 | 37 | public function flush(bool $isPartial, EtlState $state): null 38 | { 39 | $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage(); 40 | foreach ($managers as $manager) { 41 | $manager->flush(); 42 | $managers->detach($manager); 43 | } 44 | 45 | return null; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/Loader/InMemoryLoader.php: -------------------------------------------------------------------------------- 1 | context['pending'][] = $item; 16 | } 17 | 18 | /** 19 | * @return list> 20 | */ 21 | public function flush(bool $isPartial, EtlState $state): array 22 | { 23 | $state->context['batchNumber'] ??= 0; 24 | foreach ($state->context['pending'] as $key => $value) { 25 | $state->context['batches'][$state->context['batchNumber']][] = $value; 26 | } 27 | $state->context['pending'] = []; 28 | ++$state->context['batchNumber']; 29 | 30 | return array_merge(...$state->context['batches'] ?? []); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Loader/JSONLoader.php: -------------------------------------------------------------------------------- 1 | context[__CLASS__]['pending'][] = $item; 31 | } 32 | 33 | public function flush(bool $isPartial, EtlState $state): string 34 | { 35 | $context = &$state->context[__CLASS__]; 36 | $context['hasStarted'] ??= false; 37 | $context['pending'] ??= []; 38 | 39 | $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination); 40 | // $this->writeOpeningBracketIfNotDoneYet($state, $file); 41 | match ($isPartial) { 42 | true => $this->earlyFlush($state, $file), 43 | false => $this->finalFlush($state, $file), 44 | }; 45 | $context['pending'] = []; 46 | 47 | if (!$isPartial && $file instanceof SplTempFileObject) { 48 | $file->rewind(); 49 | 50 | return implode('', [...$file]); // @phpstan-ignore-line 51 | } 52 | 53 | return 'file://'.$file->getPathname(); 54 | } 55 | 56 | private function earlyFlush(EtlState $state, SplFileObject $file): void 57 | { 58 | $context = &$state->context[__CLASS__]; 59 | $serialized = json_encode($context['pending'], JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); 60 | $serialized = ltrim($serialized, '['); 61 | $serialized = rtrim($serialized, ']'); 62 | $serialized = trim($serialized); 63 | 64 | if (!($context['openingBracket'] ?? false)) { 65 | $file->fwrite('['); 66 | $context['openingBracket'] = true; 67 | $file->fwrite(PHP_EOL.' '.$serialized); 68 | } elseif ([] !== $context['pending']) { 69 | $file->fwrite(','); 70 | $file->fwrite(PHP_EOL.' '.$serialized); 71 | } 72 | } 73 | 74 | private function finalFlush(EtlState $state, SplFileObject $file): void 75 | { 76 | $this->earlyFlush($state, $file); 77 | if ($state->nbLoadedItems > 0) { 78 | $file->fwrite(PHP_EOL); 79 | } 80 | $file->fwrite(']'.PHP_EOL); 81 | } 82 | 83 | private function resolveDestination(mixed $destination): SplFileObject 84 | { 85 | $isFileName = is_string($destination) && str_starts_with($destination, 'file://'); 86 | 87 | return match (true) { 88 | $destination instanceof SplFileObject => $destination, 89 | $isFileName => new SplFileObject(substr($destination, 7), 'w'), 90 | null === $destination => new SplTempFileObject(), 91 | default => throw new LoadException('Invalid destination.'), 92 | }; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/Loader/LoaderInterface.php: -------------------------------------------------------------------------------- 1 | context[__CLASS__]['pending'][] = $item; 33 | } 34 | 35 | public function flush(bool $isPartial, EtlState $state): int 36 | { 37 | $pendingItems = $state->context[__CLASS__]['pending'] ?? []; 38 | $state->context[__CLASS__]['resource'] ??= fopen('php://stdout', 'wb+'); 39 | $state->context[__CLASS__]['nbWrittenBytes'] ??= 0; 40 | foreach ($pendingItems as $item) { 41 | $state->context[__CLASS__]['nbWrittenBytes'] += fwrite( 42 | $state->context[__CLASS__]['resource'], 43 | $item.$this->eol, 44 | ); 45 | } 46 | 47 | $nbWrittenBytes = $state->context[__CLASS__]['nbWrittenBytes']; 48 | if (!$isPartial) { 49 | // fclose($state->context[__CLASS__]['resource']); 50 | unset($state->context[__CLASS__]); 51 | } 52 | 53 | return $nbWrittenBytes; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/Normalizer/EmptyStringToNullNormalizer.php: -------------------------------------------------------------------------------- 1 | $items 28 | */ 29 | public function process(EtlExecutor $executor, EtlState $state, mixed $items): EtlState 30 | { 31 | foreach ($this->extract($executor, $state, $items) as $key => $item) { 32 | try { 33 | $executor->processItem($item, $key, $state); 34 | } catch (SkipRequest) { 35 | } 36 | } 37 | 38 | return $state; 39 | } 40 | 41 | /** 42 | * @param iterable $items 43 | */ 44 | public function extract(EtlExecutor $executor, EtlState $state, iterable $items): Generator 45 | { 46 | try { 47 | yield from $items; 48 | } catch (Throwable $exception) { 49 | ExtractException::emit($executor, $exception, $state); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/Processor/ProcessorInterface.php: -------------------------------------------------------------------------------- 1 | on('data', function (mixed $item) use ($executor, &$key, $state, $stream) { 38 | if (is_string($item)) { 39 | $item = trim($item); 40 | } 41 | try { 42 | $executor->processItem($item, ++$key, $state); 43 | } catch (SkipRequest) { 44 | } catch (StopRequest) { 45 | $stream->close(); 46 | } catch (Throwable $e) { 47 | $stream->close(); 48 | ExtractException::emit($executor, $e, $state); 49 | } 50 | }); 51 | 52 | Loop::run(); 53 | 54 | return $state->getLastVersion(); 55 | } 56 | 57 | public function decorate(EtlExecutor $executor): EtlExecutor 58 | { 59 | return $executor->extractFrom(new ReactStreamExtractor())->withProcessor($this); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/Recipe/FilterRecipe.php: -------------------------------------------------------------------------------- 1 | eventClass, self::EVENTS_CLASSES)) { 27 | throw new InvalidArgumentException(sprintf('Can only filter on ExtractEvent / LoadEvent, not %s', $this->eventClass)); 28 | } 29 | } 30 | 31 | public function decorate(EtlExecutor $executor): EtlExecutor 32 | { 33 | return match ($this->eventClass) { 34 | ExtractEvent::class => $executor->onExtract($this(...), $this->priority), 35 | BeforeLoadEvent::class => $executor->onBeforeLoad($this(...), $this->priority), 36 | default => $executor, 37 | }; 38 | } 39 | 40 | public function __invoke(ExtractEvent|BeforeLoadEvent $event): void 41 | { 42 | $matchFilter = !($this->filter)($event->item, $event->state); 43 | if (FilterRecipeMode::EXCLUDE === $this->mode) { 44 | $matchFilter = !$matchFilter; 45 | } 46 | 47 | if ($matchFilter) { 48 | $event->state->skip(); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/Recipe/FilterRecipeMode.php: -------------------------------------------------------------------------------- 1 | $logLevels 29 | * @param array $priorities 30 | */ 31 | public function __construct( 32 | private readonly LoggerInterface $logger = new NullLogger(), 33 | private readonly array $logLevels = [ 34 | StartEvent::class => LogLevel::INFO, 35 | FlushEvent::class => LogLevel::INFO, 36 | EndEvent::class => LogLevel::INFO, 37 | ExtractExceptionEvent::class => LogLevel::ERROR, 38 | TransformExceptionEvent::class => LogLevel::ERROR, 39 | LoadExceptionEvent::class => LogLevel::ERROR, 40 | FlushExceptionEvent::class => LogLevel::ERROR, 41 | ], 42 | private readonly string $defaultLogLevel = LogLevel::DEBUG, 43 | private readonly array $priorities = [], 44 | private readonly int $defaultPriority = -1, 45 | ) { 46 | } 47 | 48 | public function decorate(EtlExecutor $executor): EtlExecutor 49 | { 50 | return $executor 51 | ->onInit(fn (InitEvent $event) => $this->log($event, 'Initializing ETL...', ['state' => $event->state]), 52 | $this->priorities[InitEvent::class] ?? $this->defaultPriority) 53 | ->onStart(fn (StartEvent $event) => $this->log($event, 'Starting ETL...', ['state' => $event->state]), 54 | $this->priorities[StartEvent::class] ?? $this->defaultPriority) 55 | ->onExtract( 56 | fn (ExtractEvent $event) => $this->log( 57 | $event, 58 | 'Extracting item #{key}', 59 | [ 60 | 'key' => $event->state->currentItemKey, 61 | 'state' => $event->state, 62 | 'item' => $event->item, 63 | ], 64 | ), 65 | $this->priorities[ExtractEvent::class] ?? $this->defaultPriority, 66 | ) 67 | ->onExtractException( 68 | fn (ExtractExceptionEvent $event) => $this->log( 69 | $event, 70 | 'Extract exception on key #{key}: {msg}', 71 | [ 72 | 'msg' => $event->exception->getMessage(), 73 | 'key' => $event->state->currentItemKey, 74 | 'state' => $event->state, 75 | ], 76 | ), 77 | $this->priorities[ExtractExceptionEvent::class] ?? $this->defaultPriority, 78 | ) 79 | ->onTransform( 80 | fn (TransformEvent $event) => $this->log( 81 | $event, 82 | 'Transformed item #{key}', 83 | [ 84 | 'key' => $event->state->currentItemKey, 85 | 'state' => $event->state, 86 | 'items' => $event->transformResult, 87 | ], 88 | ), 89 | $this->priorities[TransformEvent::class] ?? $this->defaultPriority, 90 | ) 91 | ->onTransformException( 92 | fn (TransformExceptionEvent $event) => $this->log( 93 | $event, 94 | 'Transform exception on key #{key}: {msg}', 95 | [ 96 | 'msg' => $event->exception->getMessage(), 97 | 'key' => $event->state->currentItemKey, 98 | 'state' => $event->state, 99 | ], 100 | ), 101 | $this->priorities[TransformExceptionEvent::class] ?? $this->defaultPriority, 102 | ) 103 | ->onLoad( 104 | fn (LoadEvent $event) => $this->log( 105 | $event, 106 | 'Loaded item #{key}', 107 | [ 108 | 'key' => $event->state->currentItemKey, 109 | 'state' => $event->state, 110 | 'item' => $event->item, 111 | ], 112 | ), 113 | $this->priorities[LoadEvent::class] ?? $this->defaultPriority, 114 | ) 115 | ->onLoadException( 116 | fn (LoadExceptionEvent $event) => $this->log( 117 | $event, 118 | 'Load exception on key #{key}: {msg}', 119 | [ 120 | 'msg' => $event->exception->getMessage(), 121 | 'key' => $event->state->currentItemKey, 122 | 'state' => $event->state, 123 | ], 124 | ), 125 | $this->priorities[LoadExceptionEvent::class] ?? $this->defaultPriority, 126 | ) 127 | ->onFlush( 128 | fn (FlushEvent $event) => $this->log( 129 | $event, 130 | $event->early ? 'Flushing {nb} items (early)...' : 'Flushing {nb} items...', 131 | [ 132 | 'nb' => $event->state->nbLoadedItemsSinceLastFlush, 133 | 'state' => $event->state, 134 | ], 135 | ), 136 | $this->priorities[FlushEvent::class] ?? $this->defaultPriority, 137 | ) 138 | ->onFlushException( 139 | fn (FlushExceptionEvent $event) => $this->log( 140 | $event, 141 | 'Flush exception: {msg}', 142 | [ 143 | 'msg' => $event->exception->getMessage(), 144 | 'state' => $event->state, 145 | ], 146 | ), 147 | $this->priorities[FlushExceptionEvent::class] ?? $this->defaultPriority, 148 | ) 149 | ->onEnd( 150 | fn (EndEvent $event) => $this->log( 151 | $event, 152 | 'ETL complete. {nb} items were loaded in {duration}s.', 153 | [ 154 | 'nb' => $event->state->nbLoadedItems, 155 | 'duration' => $event->state->getDuration(), 156 | 'state' => $event->state, 157 | ], 158 | ), 159 | $this->priorities[EndEvent::class] ?? $this->defaultPriority, 160 | ); 161 | } 162 | 163 | /** 164 | * @param array $context 165 | */ 166 | private function log(Event $event, string|Stringable $message, array $context = []): void 167 | { 168 | $level = $this->logLevels[$event::class] ?? $this->defaultLogLevel; 169 | 170 | $this->logger->log($level, $message, $context); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/Recipe/Recipe.php: -------------------------------------------------------------------------------- 1 | recipe)($executor); 25 | } 26 | }; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/Transformer/CallableTransformer.php: -------------------------------------------------------------------------------- 1 | closure)($item, $state); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/Transformer/ChainTransformer.php: -------------------------------------------------------------------------------- 1 | $_transformer) { 22 | if (!$_transformer instanceof TransformerInterface) { 23 | $transformers[$t] = new CallableTransformer($_transformer(...)); 24 | } 25 | } 26 | $this->transformers = $transformers; 27 | } 28 | 29 | public function with( 30 | TransformerInterface|callable $transformer, 31 | TransformerInterface|callable ...$transformers, 32 | ): self { 33 | return new self(...[...$this->transformers, $transformer, ...$transformers]); 34 | } 35 | 36 | public function transform(mixed $item, EtlState $state): mixed 37 | { 38 | foreach ($this->transformers as $transformer) { 39 | $item = $transformer->transform($item, $state); 40 | } 41 | 42 | return $item; 43 | } 44 | 45 | public static function from(TransformerInterface $transformer): self 46 | { 47 | return match ($transformer instanceof self) { 48 | true => $transformer, 49 | false => new self($transformer), 50 | }; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/Transformer/NullTransformer.php: -------------------------------------------------------------------------------- 1 | $keys 29 | * @param array $values 30 | * @param array ...$extraValues 31 | * 32 | * @return array 33 | * 34 | * @internal 35 | */ 36 | function array_fill_from(array $keys, array $values, array ...$extraValues): array 37 | { 38 | $defaults = array_fill_keys($keys, null); 39 | $values = array_replace($values, ...$extraValues); 40 | 41 | return array_intersect_key($values, $defaults); 42 | } 43 | 44 | /** 45 | * @internal 46 | * 47 | * @template T 48 | * 49 | * @param iterable $items 50 | * 51 | * @return Iterator 52 | */ 53 | function iterable_to_iterator(iterable $items): Iterator 54 | { 55 | return $items instanceof Iterator ? $items : (fn () => yield from $items)(); 56 | } 57 | 58 | function extractFrom(ExtractorInterface|callable $extractor, ExtractorInterface|callable ...$extractors): EtlExecutor 59 | { 60 | return (new EtlExecutor())->extractFrom(...func_get_args()); 61 | } 62 | 63 | function transformWith( 64 | TransformerInterface|callable $transformer, 65 | TransformerInterface|callable ...$transformers 66 | ): EtlExecutor { 67 | return (new EtlExecutor())->transformWith(...func_get_args()); 68 | } 69 | 70 | function loadInto(LoaderInterface|callable $loader, LoaderInterface|callable ...$loaders): EtlExecutor 71 | { 72 | return (new EtlExecutor())->loadInto(...func_get_args()); 73 | } 74 | 75 | function withRecipe(Recipe|callable $recipe): EtlExecutor 76 | { 77 | return (new EtlExecutor())->withRecipe(...func_get_args()); 78 | } 79 | 80 | function useReact(): EtlExecutor 81 | { 82 | return withRecipe(new ReactStreamProcessor()); 83 | } 84 | 85 | function chain(ExtractorInterface|TransformerInterface|LoaderInterface $service, 86 | ): ChainExtractor|ChainTransformer|ChainLoader { 87 | return match (true) { 88 | $service instanceof ExtractorInterface => ChainExtractor::from($service), 89 | $service instanceof TransformerInterface => ChainTransformer::from($service), 90 | $service instanceof LoaderInterface => ChainLoader::from($service), 91 | }; 92 | } 93 | 94 | function stdIn(): STDINExtractor 95 | { 96 | return new STDINExtractor(); 97 | } 98 | 99 | function stdOut(): STDOUTLoader 100 | { 101 | return new STDOUTLoader(); 102 | } 103 | 104 | function skipWhen(callable $filter, ?string $eventClass = ExtractEvent::class, int $priority = 0): Recipe 105 | { 106 | return new FilterRecipe( 107 | $filter(...), 108 | $eventClass ?? ExtractEvent::class, 109 | $priority, 110 | FilterRecipeMode::EXCLUDE 111 | ); 112 | } 113 | -------------------------------------------------------------------------------- /tests/Behavior/Events/BeforeLoadEventTest.php: -------------------------------------------------------------------------------- 1 | transformWith(function (mixed $value) { 16 | yield $value; 17 | yield strtoupper($value); 18 | }) 19 | ->onBeforeLoad(function (BeforeLoadEvent $e) { 20 | match ($e->item) { 21 | 'bar' => $e->state->skip(), 22 | 'baz' => $e->state->stop(), 23 | default => null, 24 | }; 25 | }); 26 | 27 | // When 28 | $report = $executor->process(['foo', 'bar', 'baz']); 29 | 30 | // Then 31 | expect($report->output)->toHaveCount(3) 32 | ->and($report->output)->toBe(['foo', 'FOO', 'BAR']); 33 | }); 34 | -------------------------------------------------------------------------------- /tests/Behavior/Events/EndEventTest.php: -------------------------------------------------------------------------------- 1 | onEnd(function (EndEvent $e) use (&$event) { 18 | $event = $e; 19 | }); 20 | 21 | // When 22 | $report = $executor->process(['foo', 'bar']); 23 | 24 | // Then 25 | expect($event)->toBeInstanceOf(EndEvent::class) 26 | ->and($report->nbTotalItems)->toBe(2) 27 | ->and($report->nbLoadedItems)->toBe(2) 28 | ; 29 | }); 30 | -------------------------------------------------------------------------------- /tests/Behavior/Events/ExtractEventTest.php: -------------------------------------------------------------------------------- 1 | 'foo', 3 => 'bar']; 14 | $extractedItems = []; 15 | 16 | // Given 17 | $executor = (new EtlExecutor()) 18 | ->onExtract(function (ExtractEvent $event) use (&$extractedItems) { 19 | $extractedItems[$event->state->currentItemKey] = $event->item; 20 | }); 21 | 22 | // When 23 | $executor->process($items); 24 | 25 | // Then 26 | expect($extractedItems)->toBe($items); 27 | }); 28 | -------------------------------------------------------------------------------- /tests/Behavior/Events/ExtractExceptionEventTest.php: -------------------------------------------------------------------------------- 1 | onExtractException(function (ExtractExceptionEvent $event) { 19 | $event->exception = new ExtractException('It miserably failed.'); 20 | }); 21 | $executor->process($items()); 22 | })->throws(ExtractException::class, 'It miserably failed.'); 23 | -------------------------------------------------------------------------------- /tests/Behavior/Events/FlushEventTest.php: -------------------------------------------------------------------------------- 1 | withOptions(new EtlConfiguration(flushEvery: 2)) 19 | ->onFlush(function (FlushEvent $e) use (&$flushEventsCounter) { 20 | ++$flushEventsCounter; 21 | }); 22 | 23 | // When 24 | $executor->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 25 | 26 | // Then 27 | expect($flushEventsCounter)->toBe(3); 28 | }); 29 | -------------------------------------------------------------------------------- /tests/Behavior/Events/FlushExceptionEventTest.php: -------------------------------------------------------------------------------- 1 | loadInto(new FlushFailsLoader()) 21 | ->onFlushException(function (FlushExceptionEvent $event) { 22 | $event->removeException(); 23 | }) 24 | ; 25 | $report = $executor->process($items); 26 | expect($report->output)->toBe([ 27 | ['strawberry', 'raspberry'], 28 | ['peach'], 29 | ]); 30 | }); 31 | 32 | class FlushFailsLoader implements LoaderInterface 33 | { 34 | public function load(mixed $item, EtlState $state): void 35 | { 36 | $state->context['pending'][] = $item; 37 | } 38 | 39 | /** 40 | * @return list> 41 | */ 42 | public function flush(bool $isPartial, EtlState $state): array 43 | { 44 | $state->context['batchNumber'] ??= 0; 45 | $state->context['hasFailed'] ??= false; 46 | 47 | // Trigger failure on 1st flush 48 | if (!$state->context['hasFailed']) { 49 | $state->context['hasFailed'] = true; 50 | $state->context['pending'] = []; 51 | throw new RuntimeException('Flush failed.'); 52 | } 53 | foreach ($state->context['pending'] as $key => $value) { 54 | $state->context['batches'][$state->context['batchNumber']][] = $value; 55 | } 56 | $state->context['pending'] = []; 57 | ++$state->context['batchNumber']; 58 | 59 | return $state->context['batches']; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /tests/Behavior/Events/InitEventTest.php: -------------------------------------------------------------------------------- 1 | onInit(function (InitEvent $e) use (&$event) { 18 | $event = $e; 19 | $e->state->stop(); 20 | }); 21 | 22 | // When 23 | $executor->process('sourceArgs', 'destArgs'); 24 | 25 | // Then 26 | expect($event)->toBeInstanceOf(InitEvent::class) 27 | ->and($event->state->source)->toBe('sourceArgs') 28 | ->and($event->state->destination)->toBe('destArgs'); 29 | }); 30 | -------------------------------------------------------------------------------- /tests/Behavior/Events/LoadEventTest.php: -------------------------------------------------------------------------------- 1 | transformWith(function (mixed $value) { 18 | yield $value; 19 | yield strtoupper($value); 20 | }) 21 | ->onLoad(function (LoadEvent $e) use (&$loadedItems) { 22 | $loadedItems[] = $e->item; 23 | }); 24 | 25 | // When 26 | $executor->process([2 => 'foo', 3 => 'bar']); 27 | 28 | // Then 29 | expect($loadedItems)->toHaveCount(4) 30 | ->and($loadedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']); 31 | }); 32 | -------------------------------------------------------------------------------- /tests/Behavior/Events/LoadExceptionEventTest.php: -------------------------------------------------------------------------------- 1 | loadInto(function (mixed $value) use (&$loadedItems) { 19 | if ('bar' === $value) { 20 | throw new LoadException('Cannot load `bar`.'); 21 | } 22 | $loadedItems[] = $value; 23 | }) 24 | ->onLoadException(function (LoadExceptionEvent $event) { 25 | $event->removeException(); 26 | }) 27 | ; 28 | $executor->process($items); 29 | 30 | expect($loadedItems)->toBe(['foo', 'baz']); 31 | }); 32 | -------------------------------------------------------------------------------- /tests/Behavior/Events/StartEventTest.php: -------------------------------------------------------------------------------- 1 | onStart(function (StartEvent $e) use (&$event) { 18 | $event = $e; 19 | $e->state->stop(); 20 | }); 21 | 22 | // When 23 | $executor->process(['foo', 'bar']); 24 | 25 | // Then 26 | expect($event)->toBeInstanceOf(StartEvent::class) 27 | ->and($event->state->nbTotalItems)->toBe(2) 28 | ->and($event->state->nbLoadedItems)->toBe(0) 29 | ; 30 | }); 31 | -------------------------------------------------------------------------------- /tests/Behavior/Events/TransformEventTest.php: -------------------------------------------------------------------------------- 1 | transformWith(function (mixed $value) { 18 | yield $value; 19 | yield strtoupper($value); 20 | }) 21 | ->onTransform(function (TransformEvent $e) use (&$transformedItems) { 22 | $transformedItems = [...$transformedItems, ...$e->transformResult]; 23 | }); 24 | 25 | // When 26 | $executor->process([2 => 'foo', 3 => 'bar']); 27 | 28 | // Then 29 | expect($transformedItems)->toHaveCount(4) 30 | ->and($transformedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']); 31 | }); 32 | -------------------------------------------------------------------------------- /tests/Behavior/Events/TransformExceptionEventTest.php: -------------------------------------------------------------------------------- 1 | transformWith(function (mixed $value) { 19 | if ('bar' === $value) { 20 | throw new TransformException('Cannot transform `bar`.'); 21 | } 22 | yield $value; 23 | }) 24 | ->loadInto(function (mixed $value) use (&$loadedItems) { 25 | $loadedItems[] = $value; 26 | }) 27 | ->onTransformException(function (TransformExceptionEvent $event) { 28 | $event->removeException(); 29 | }) 30 | ; 31 | $executor->process($items); 32 | 33 | expect($loadedItems)->toBe(['foo', 'baz']); 34 | }); 35 | -------------------------------------------------------------------------------- /tests/Behavior/ExtractExceptionTest.php: -------------------------------------------------------------------------------- 1 | process($items()); 19 | })->throws(ExtractException::class, 'Something bad happened.'); 20 | 21 | it('throws an extract exception when some other exception is thrown', function () { 22 | $items = function () { 23 | yield 'foo'; 24 | throw new RuntimeException('Something bad happened.'); 25 | }; 26 | 27 | $executor = new EtlExecutor(); 28 | $executor->process($items()); 29 | })->throws(ExtractException::class, 'Error during extraction.'); 30 | -------------------------------------------------------------------------------- /tests/Behavior/FlushExceptionTest.php: -------------------------------------------------------------------------------- 1 | process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 24 | })->throws(FlushException::class, 'Flush failed.'); 25 | 26 | it('throws a load exception when some other exception is thrown', function () { 27 | // Given 28 | $loader = new FlushFailsLoader(new RuntimeException('Flush failed.')); 29 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2))); 30 | 31 | // When 32 | $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 33 | })->throws(FlushException::class, 'Error during flush.'); 34 | 35 | class FlushFailsLoader implements LoaderInterface 36 | { 37 | public function __construct( 38 | private Exception $failure, 39 | ) { 40 | } 41 | 42 | public function load(mixed $item, EtlState $state): void 43 | { 44 | } 45 | 46 | public function flush(bool $isPartial, EtlState $state): never 47 | { 48 | throw $this->failure; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tests/Behavior/FlushTest.php: -------------------------------------------------------------------------------- 1 | process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 22 | 23 | // Then 24 | expect($report->output)->toBeArray() 25 | ->and($report->output)->toHaveCount(3) 26 | ->and($report->output[0])->toBe(['banana', 'apple']) 27 | ->and($report->output[1])->toBe(['strawberry', 'raspberry']) 28 | ->and($report->output[2])->toBe(['peach']); 29 | }); 30 | 31 | it('forces flushes', function () { 32 | $loader = new InMemoryLoader(); 33 | 34 | // Given 35 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2))) 36 | ->onExtract(function (ExtractEvent $event) { 37 | if (0 === $event->state->currentItemIndex) { 38 | $event->state->flush(); 39 | } 40 | }); 41 | 42 | // When 43 | $report = $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 44 | 45 | // Then 46 | expect($report->output)->toBeArray() 47 | ->and($report->output)->toHaveCount(3) 48 | ->and($report->output[0])->toBe(['banana']) 49 | ->and($report->output[1])->toBe(['apple', 'strawberry']) 50 | ->and($report->output[2])->toBe(['raspberry', 'peach']); 51 | }); 52 | -------------------------------------------------------------------------------- /tests/Behavior/LoadExceptionTest.php: -------------------------------------------------------------------------------- 1 | process($items); 22 | })->throws(LoadException::class, 'Cannot load `bar`.'); 23 | 24 | it('throws a load exception when some other exception is thrown', function () { 25 | $items = ['foo', 'bar', 'baz']; 26 | $executor = loadInto(function (mixed $value) { 27 | if ('bar' === $value) { 28 | throw new RuntimeException('Cannot load `bar`.'); 29 | } 30 | }); 31 | $executor->process($items); 32 | })->throws(LoadException::class, 'Error during loading.'); 33 | 34 | it('has stopped processing items, but has loaded the previous ones', function () { 35 | $items = ['foo', 'bar', 'baz']; 36 | $loadedItems = []; 37 | $executor = loadInto(function (mixed $value) use (&$loadedItems) { 38 | if ('bar' === $value) { 39 | throw new LoadException('Cannot load `bar`.'); 40 | } 41 | $loadedItems[] = $value; 42 | }) 43 | ; 44 | try { 45 | $executor->process($items); 46 | } catch (LoadException) { 47 | } 48 | 49 | expect($loadedItems)->toBe(['foo']); 50 | }); 51 | -------------------------------------------------------------------------------- /tests/Behavior/NextTickTest.php: -------------------------------------------------------------------------------- 1 | 3]; 23 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(...$options))) 24 | ->onExtract(function (ExtractEvent $event) { 25 | // Let's trigger an early flush after the NEXT item (apple) 26 | if ('banana' === $event->item) { 27 | $event->state->nextTick(fn (EtlState $state) => $state->flush()); 28 | } 29 | }); 30 | 31 | // When 32 | $report = $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 33 | 34 | // Then 35 | expect($report->output)->toBeArray() 36 | ->and($report->output)->toHaveCount(2) 37 | ->and($report->output[0])->toBe(['banana', 'apple']) 38 | ->and($report->output[1])->toBe(['strawberry', 'raspberry', 'peach']); 39 | }); 40 | 41 | it('can trigger several callbacks, which are called only once', function () { 42 | // Given 43 | $bucket = new ArrayObject(); 44 | $etl = (new EtlExecutor()) 45 | ->onLoad(function (LoadEvent $event) use ($bucket) { 46 | if ('apple' === $event->item) { 47 | $event->state->nextTick(fn (EtlState $state) => $bucket->append('apple')); 48 | $event->state->nextTick(fn (EtlState $state) => $bucket->append('APPLE')); 49 | } 50 | }); 51 | 52 | // When 53 | $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 54 | 55 | // Then 56 | expect([...$bucket])->toBe(['apple', 'APPLE']); 57 | }); 58 | 59 | it("won't complain if a stop request is issued during terminate()", function () { 60 | // Given 61 | $input = ['banana', 'apple', 'strawberry', 'raspberry', 'peach']; 62 | $itWasCalled = false; 63 | $etl = (new EtlExecutor(extractor: new IterableExtractor($input))) 64 | ->onExtract(function (ExtractEvent $event) use (&$itWasCalled) { 65 | if ('peach' === $event->item) { 66 | $event->state->nextTick(function (EtlState $state) use (&$itWasCalled) { 67 | $itWasCalled = true; 68 | $state->stop(); 69 | }); 70 | } 71 | }); 72 | 73 | // When 74 | $report = $etl->process(); 75 | 76 | expect($report->output)->toBe($input) 77 | ->and($itWasCalled)->toBeTrue(); 78 | }); 79 | -------------------------------------------------------------------------------- /tests/Behavior/ReactStreamProcessorTest.php: -------------------------------------------------------------------------------- 1 | $stream->emit('data', ['hello'])); 21 | Loop::futureTick(fn () => $stream->emit('data', ['world'])); 22 | $executor = useReact(); 23 | 24 | // When 25 | $state = $executor->process($stream); 26 | 27 | // Then 28 | expect($state->output)->toBe(['hello', 'world']); 29 | }); 30 | 31 | it('can skip items and stop the workflow', function () { 32 | // Given 33 | $stream = new ReadableResourceStream(fopen('php://temp', 'rb')); 34 | $fruits = ['banana', 'apple', 'strawberry', 'raspberry', 'peach']; 35 | foreach ($fruits as $fruit) { 36 | Loop::futureTick(fn () => $stream->emit('data', [$fruit])); 37 | } 38 | $executor = useReact() 39 | ->onExtract(function (ExtractEvent $event) { 40 | match ($event->item) { 41 | 'apple' => $event->state->skip(), 42 | 'peach' => $event->state->stop(), 43 | default => null, 44 | }; 45 | }) 46 | ; 47 | 48 | // When 49 | $state = $executor->process($stream); 50 | 51 | // Then 52 | expect($state->output)->toBe(['banana', 'strawberry', 'raspberry']); 53 | }); 54 | 55 | it('allows iterables, which will be converted to readable streams', function () { 56 | $fruits = ['banana', 'apple', 'strawberry', 'raspberry', 'peach']; 57 | $executor = useReact() 58 | ->onExtract(function (ExtractEvent $event) { 59 | match ($event->item) { 60 | 'apple' => $event->state->skip(), 61 | 'peach' => $event->state->stop(), 62 | default => null, 63 | }; 64 | }) 65 | ; 66 | 67 | // When 68 | $state = $executor->process($fruits); 69 | 70 | // Then 71 | expect($state->output)->toBe(['banana', 'strawberry', 'raspberry']); 72 | }); 73 | 74 | it('throws ExtractExceptions', function () { 75 | // Given 76 | $stream = new ReadableResourceStream(fopen('php://temp', 'rb')); 77 | Loop::futureTick(fn () => $stream->emit('data', ['hello'])); 78 | $executor = useReact()->onExtract(fn () => throw new RuntimeException()); 79 | 80 | // When 81 | $executor->process($stream); 82 | })->throws(ExtractException::class); 83 | -------------------------------------------------------------------------------- /tests/Behavior/SkipTest.php: -------------------------------------------------------------------------------- 1 | 'auto', 18 | ]); 19 | $cities = []; 20 | 21 | // Given 22 | $executor = (new EtlExecutor(extractor: $extractor)) 23 | ->transformWith(function (mixed $value) { 24 | yield $value['city_english_name']; 25 | }) 26 | ->loadInto(function (string $city) use (&$cities) { 27 | $cities[] = $city; 28 | }) 29 | ->onExtract(function (ExtractEvent $event) { 30 | if ('US' === $event->item['country_iso_code']) { 31 | $event->state->skip(); 32 | } 33 | }); 34 | 35 | // When 36 | $executor->process(); 37 | 38 | // Then 39 | expect($cities)->toBe([ 40 | 'Tokyo', 41 | 'Shanghai', 42 | 'Mumbai', 43 | 'Istanbul', 44 | 'Moscow', 45 | 'Cairo', 46 | 'Lima', 47 | 'London', 48 | ]); 49 | }); 50 | 51 | it('skips items during transformation', function () { 52 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [ 53 | 'columns' => 'auto', 54 | ]); 55 | $cities = []; 56 | 57 | // Given 58 | $executor = (new EtlExecutor(extractor: $extractor)) 59 | ->transformWith(function (mixed $value) { 60 | yield $value['city_english_name']; 61 | }) 62 | ->loadInto(function (string $city) use (&$cities) { 63 | $cities[] = $city; 64 | }) 65 | ->onTransform(function (TransformEvent $event) { 66 | if ('Tokyo' === [...$event->transformResult][0]) { 67 | $event->state->skip(); 68 | } 69 | }); 70 | 71 | // When 72 | $executor->process(); 73 | 74 | // Then 75 | expect($cities)->toBe([ 76 | 'New York', 77 | 'Los Angeles', 78 | 'Shanghai', 79 | 'Mumbai', 80 | 'Istanbul', 81 | 'Moscow', 82 | 'Cairo', 83 | 'Lima', 84 | 'London', 85 | ]); 86 | }); 87 | -------------------------------------------------------------------------------- /tests/Behavior/StopTest.php: -------------------------------------------------------------------------------- 1 | 'auto', 19 | ]); 20 | $cities = []; 21 | 22 | // Given 23 | $executor = (new EtlExecutor(extractor: $extractor)) 24 | ->transformWith(function (mixed $value) { 25 | yield $value['city_english_name']; 26 | }) 27 | ->loadInto(function (string $city) use (&$cities) { 28 | $cities[] = $city; 29 | }) 30 | ->onExtract(function (ExtractEvent $event) { 31 | if ('JP' === $event->item['country_iso_code']) { 32 | $event->state->stop(); 33 | } 34 | }); 35 | 36 | // When 37 | $executor->process(); 38 | 39 | // Then 40 | expect($cities)->toBe([ 41 | 'New York', 42 | 'Los Angeles', 43 | ]); 44 | }); 45 | 46 | it('stops the process during transformation', function () { 47 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [ 48 | 'columns' => 'auto', 49 | ]); 50 | $cities = []; 51 | 52 | // Given 53 | $executor = (new EtlExecutor(extractor: $extractor)) 54 | ->transformWith(function (mixed $value) { 55 | yield $value['city_english_name']; 56 | }) 57 | ->loadInto(function (string $city) use (&$cities) { 58 | $cities[] = $city; 59 | }) 60 | ->onTransform(function (TransformEvent $event) { 61 | if ('Shanghai' === [...$event->transformResult][0]) { 62 | $event->state->stop(); 63 | } 64 | }); 65 | 66 | // When 67 | $executor->process(); 68 | 69 | // Then 70 | expect($cities)->toBe([ 71 | 'New York', 72 | 'Los Angeles', 73 | 'Tokyo', 74 | ]); 75 | }); 76 | 77 | it('stops the process during loading', function () { 78 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [ 79 | 'columns' => 'auto', 80 | ]); 81 | $cities = []; 82 | 83 | // Given 84 | $executor = (new EtlExecutor(extractor: $extractor)) 85 | ->transformWith(function (mixed $value) { 86 | yield $value['city_english_name']; 87 | }) 88 | ->loadInto(function (string $city) use (&$cities) { 89 | $cities[] = $city; 90 | }) 91 | ->onLoad(function (LoadEvent $event) { 92 | if ('Shanghai' === $event->item) { 93 | $event->state->stop(); 94 | } 95 | }); 96 | 97 | // When 98 | $executor->process(); 99 | 100 | // Then 101 | expect($cities)->toBe([ 102 | 'New York', 103 | 'Los Angeles', 104 | 'Tokyo', 105 | 'Shanghai', 106 | ]); 107 | }); 108 | -------------------------------------------------------------------------------- /tests/Behavior/TransformExceptionTest.php: -------------------------------------------------------------------------------- 1 | process($items); 23 | })->throws(TransformException::class, 'Cannot transform `bar`.'); 24 | 25 | it('throws a transform exception when some other exception is thrown', function () { 26 | $items = ['foo', 'bar', 'baz']; 27 | $executor = transformWith(function (mixed $value) { 28 | if ('bar' === $value) { 29 | throw new RuntimeException('Cannot transform `bar`.'); 30 | } 31 | yield $value; 32 | }); 33 | $executor->process($items); 34 | })->throws(TransformException::class, 'Error during transformation.'); 35 | 36 | it('has stopped processing items, but has loaded the previous ones', function () { 37 | $items = ['foo', 'bar', 'baz']; 38 | $loadedItems = []; 39 | $executor = transformWith(function (mixed $value) { 40 | if ('bar' === $value) { 41 | throw new TransformException('Cannot transform `bar`.'); 42 | } 43 | yield $value; 44 | }) 45 | ->loadInto(function (mixed $value) use (&$loadedItems) { 46 | $loadedItems[] = $value; 47 | }) 48 | ; 49 | try { 50 | $executor->process($items); 51 | } catch (TransformException) { 52 | } 53 | 54 | expect($loadedItems)->toBe(['foo']); 55 | }); 56 | -------------------------------------------------------------------------------- /tests/Data/10-biggest-cities.csv: -------------------------------------------------------------------------------- 1 | city_english_name,city_local_name,country_iso_code,continent,population 2 | "New York","New York",US,"North America",8537673 3 | "Los Angeles","Los Angeles",US,"North America",39776830 4 | Tokyo,東京,JP,Asia,13929286 5 | Shanghai,上海,CN,Asia,26317104 6 | Mumbai,मुंबई,IN,Asia,12442373 7 | Istanbul,İstanbul,TR,Europe,15469524 8 | Moscow,Москва,RU,Europe,12615279 9 | Cairo,القاهرة,EG,Africa,9121514 10 | Lima,Lima,PE,"South America",10141329 11 | London,London,GB,Europe,8908081 12 | -------------------------------------------------------------------------------- /tests/Data/10-biggest-cities.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "city_english_name": "New York", 4 | "city_local_name": "New York", 5 | "country_iso_code": "US", 6 | "continent": "North America", 7 | "population": 8537673 8 | }, 9 | { 10 | "city_english_name": "Los Angeles", 11 | "city_local_name": "Los Angeles", 12 | "country_iso_code": "US", 13 | "continent": "North America", 14 | "population": 39776830 15 | }, 16 | { 17 | "city_english_name": "Tokyo", 18 | "city_local_name": "東京", 19 | "country_iso_code": "JP", 20 | "continent": "Asia", 21 | "population": 13929286 22 | }, 23 | { 24 | "city_english_name": "Shanghai", 25 | "city_local_name": "上海", 26 | "country_iso_code": "CN", 27 | "continent": "Asia", 28 | "population": 26317104 29 | }, 30 | { 31 | "city_english_name": "Mumbai", 32 | "city_local_name": "मुंबई", 33 | "country_iso_code": "IN", 34 | "continent": "Asia", 35 | "population": 12442373 36 | }, 37 | { 38 | "city_english_name": "Istanbul", 39 | "city_local_name": "İstanbul", 40 | "country_iso_code": "TR", 41 | "continent": "Europe", 42 | "population": 15469524 43 | }, 44 | { 45 | "city_english_name": "Moscow", 46 | "city_local_name": "Москва", 47 | "country_iso_code": "RU", 48 | "continent": "Europe", 49 | "population": 12615279 50 | }, 51 | { 52 | "city_english_name": "Cairo", 53 | "city_local_name": "القاهرة", 54 | "country_iso_code": "EG", 55 | "continent": "Africa", 56 | "population": 9121514 57 | }, 58 | { 59 | "city_english_name": "Lima", 60 | "city_local_name": "Lima", 61 | "country_iso_code": "PE", 62 | "continent": "South America", 63 | "population": 10141329 64 | }, 65 | { 66 | "city_english_name": "London", 67 | "city_local_name": "London", 68 | "country_iso_code": "GB", 69 | "continent": "Europe", 70 | "population": 8908081 71 | } 72 | ] 73 | -------------------------------------------------------------------------------- /tests/Data/10-biggest-cities.php: -------------------------------------------------------------------------------- 1 | 'New York', 6 | 'city_local_name' => 'New York', 7 | 'country_iso_code' => 'US', 8 | 'continent' => 'North America', 9 | 'population' => 8537673, 10 | ], 11 | [ 12 | 'city_english_name' => 'Los Angeles', 13 | 'city_local_name' => 'Los Angeles', 14 | 'country_iso_code' => 'US', 15 | 'continent' => 'North America', 16 | 'population' => 39776830, 17 | ], 18 | [ 19 | 'city_english_name' => 'Tokyo', 20 | 'city_local_name' => '東京', 21 | 'country_iso_code' => 'JP', 22 | 'continent' => 'Asia', 23 | 'population' => 13929286, 24 | ], 25 | [ 26 | 'city_english_name' => 'Shanghai', 27 | 'city_local_name' => '上海', 28 | 'country_iso_code' => 'CN', 29 | 'continent' => 'Asia', 30 | 'population' => 26317104, 31 | ], 32 | [ 33 | 'city_english_name' => 'Mumbai', 34 | 'city_local_name' => 'मुंबई', 35 | 'country_iso_code' => 'IN', 36 | 'continent' => 'Asia', 37 | 'population' => 12442373, 38 | ], 39 | [ 40 | 'city_english_name' => 'Istanbul', 41 | 'city_local_name' => 'İstanbul', 42 | 'country_iso_code' => 'TR', 43 | 'continent' => 'Europe', 44 | 'population' => 15469524, 45 | ], 46 | [ 47 | 'city_english_name' => 'Moscow', 48 | 'city_local_name' => 'Москва', 49 | 'country_iso_code' => 'RU', 50 | 'continent' => 'Europe', 51 | 'population' => 12615279, 52 | ], 53 | [ 54 | 'city_english_name' => 'Cairo', 55 | 'city_local_name' => 'القاهرة', 56 | 'country_iso_code' => 'EG', 57 | 'continent' => 'Africa', 58 | 'population' => 9121514, 59 | ], 60 | [ 61 | 'city_english_name' => 'Lima', 62 | 'city_local_name' => 'Lima', 63 | 'country_iso_code' => 'PE', 64 | 'continent' => 'South America', 65 | 'population' => 10141329, 66 | ], 67 | [ 68 | 'city_english_name' => 'London', 69 | 'city_local_name' => 'London', 70 | 'country_iso_code' => 'GB', 71 | 'continent' => 'Europe', 72 | 'population' => 8908081, 73 | ], 74 | ]; 75 | -------------------------------------------------------------------------------- /tests/Stubs/InMemoryLoader.php: -------------------------------------------------------------------------------- 1 | context['pending'][] = $item; 15 | } 16 | 17 | /** 18 | * @return list> 19 | */ 20 | public function flush(bool $isPartial, EtlState $state): array 21 | { 22 | $state->context['batchNumber'] ??= 0; 23 | foreach ($state->context['pending'] as $key => $value) { 24 | $state->context['batches'][$state->context['batchNumber']][] = $value; 25 | } 26 | $state->context['pending'] = []; 27 | ++$state->context['batchNumber']; 28 | 29 | return $state->context['batches']; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /tests/Stubs/STDINStub.php: -------------------------------------------------------------------------------- 1 | bufferFilename = sys_get_temp_dir().DIRECTORY_SEPARATOR.'php_input.txt'; 32 | $this->index = 0; 33 | if (file_exists($this->bufferFilename)) { 34 | $this->data = file_get_contents($this->bufferFilename); 35 | } 36 | $this->length = strlen($this->data); 37 | } 38 | 39 | public function stream_open(): true 40 | { 41 | return true; 42 | } 43 | 44 | public function url_stat(): false 45 | { 46 | return false; 47 | } 48 | 49 | public function stream_close(): void 50 | { 51 | } 52 | 53 | public function stream_stat(): false 54 | { 55 | return false; 56 | } 57 | 58 | public function stream_flush(): true 59 | { 60 | return true; 61 | } 62 | 63 | public function stream_read(int $count): string 64 | { 65 | $length = min($count, $this->length - $this->index); 66 | $data = substr($this->data, $this->index); 67 | $this->index += $length; 68 | 69 | return $data; 70 | } 71 | 72 | public function stream_eof(): bool 73 | { 74 | return $this->index >= $this->length; 75 | } 76 | 77 | public function stream_write(string $data): false|int 78 | { 79 | return file_put_contents($this->bufferFilename, $data); 80 | } 81 | 82 | public static function emulate(string $stdInContent, callable $beforeRestore): mixed 83 | { 84 | stream_wrapper_unregister('php'); 85 | stream_wrapper_register('php', __CLASS__); 86 | file_put_contents('php://stdin', $stdInContent); 87 | $result = $beforeRestore(); 88 | stream_wrapper_restore('php'); 89 | 90 | return $result; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /tests/Stubs/STDOUTStub.php: -------------------------------------------------------------------------------- 1 | data; 24 | $consumed += $bucket->datalen; 25 | stream_bucket_append($out, $bucket); 26 | } 27 | 28 | return PSFS_PASS_ON; 29 | } 30 | 31 | public static function read(): string 32 | { 33 | return self::$storage; 34 | } 35 | 36 | public static function emulate(callable $beforeRestore, string $filename = 'php://stdout'): string 37 | { 38 | stream_filter_register('intercept', __CLASS__); 39 | $stdout = fopen($filename, 'wb+'); 40 | $filter = stream_filter_append($stdout, 'intercept'); 41 | $beforeRestore($stdout); 42 | $result = self::$storage; 43 | 44 | self::$storage = ''; 45 | 46 | return $result; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tests/Stubs/WritableStreamStub.php: -------------------------------------------------------------------------------- 1 | 16 | */ 17 | public array $data = []; 18 | 19 | public function isWritable(): bool 20 | { 21 | return true; 22 | } 23 | 24 | public function write($data): bool 25 | { 26 | $this->data[] = $data; 27 | 28 | return true; 29 | } 30 | 31 | public function end($data = null): void 32 | { 33 | } 34 | 35 | public function close(): void 36 | { 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /tests/Unit/ContextTest.php: -------------------------------------------------------------------------------- 1 | 'green', 'shape' => 'square', 'lights' => 'on'])); 15 | 16 | // When 17 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']); 18 | 19 | // Then 20 | expect($report->context)->toBe(['color' => 'green', 'shape' => 'round', 'lights' => 'on', 'size' => 'small']); 21 | }); 22 | 23 | it('adds some more context', function () { 24 | // Given 25 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on'])) 26 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla']); 27 | 28 | // When 29 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']); 30 | 31 | // Then 32 | expect($report->context)->toBe(['color' => 'blue', 'shape' => 'round', 'lights' => 'on', 'flavor' => 'vanilla', 'size' => 'small']); 33 | }); 34 | 35 | it('replaces the whole context', function () { 36 | // Given 37 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on'])) 38 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla'], clear: true); 39 | 40 | // When 41 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']); 42 | 43 | // Then 44 | expect($report->context)->toBe(['color' => 'blue', 'flavor' => 'vanilla', 'shape' => 'round', 'size' => 'small']); 45 | }); 46 | 47 | it('does not override existing values', function () { 48 | // Given 49 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on'])) 50 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla'], overwrite: false); 51 | 52 | // When 53 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']); 54 | 55 | // Then 56 | expect($report->context)->toBe(['color' => 'green', 'shape' => 'round', 'lights' => 'on', 'flavor' => 'vanilla', 'size' => 'small']); 57 | }); 58 | -------------------------------------------------------------------------------- /tests/Unit/EtlConfigurationTest.php: -------------------------------------------------------------------------------- 1 | throws(InvalidArgumentException::class); 13 | 14 | it('denies negative values', function () { 15 | new EtlConfiguration(flushEvery: -10); 16 | })->throws(InvalidArgumentException::class); 17 | -------------------------------------------------------------------------------- /tests/Unit/EtlExecutorTest.php: -------------------------------------------------------------------------------- 1 | extractFrom(fn () => yield from ['foo', 'bar']) 26 | ->transformWith($transformer) 27 | ->loadInto(function (string $item) use (&$items) { 28 | $items[] = $item; 29 | }) 30 | ->withOptions(new EtlConfiguration(flushEvery: 1)); 31 | 32 | // When 33 | $report = $etl->process(); 34 | 35 | // Then 36 | expect($items)->toBe(['FOO', 'BAR']) 37 | ->and($report->nbTotalItems)->toBe(2) 38 | ->and($report->nbLoadedItems)->toBe(2) 39 | ->and($report->getDuration())->toBeBetween(0, 1); 40 | })->with(function () { 41 | yield 'Return value' => fn (mixed $value) => strtoupper($value); 42 | yield 'Generator' => fn (mixed $value) => yield strtoupper($value); 43 | }); 44 | 45 | it('passes the context throughout all the ETL steps', function () { 46 | $items = []; 47 | 48 | // Given 49 | $etl = (new EtlExecutor()) 50 | ->loadInto(function (string $item) use (&$items) { 51 | $items[] = $item; 52 | }) 53 | ->onFlush(fn (FlushEvent $event) => $event->state->context['bar'] = 'baz'); // @phpstan-ignore-line 54 | 55 | // When 56 | $report = $etl->process(['banana', 'apple'], context: ['foo' => 'bar']); 57 | 58 | // Then 59 | expect($items)->toBe(['banana', 'apple']) 60 | ->and($report->context['foo'])->toBe('bar') 61 | ->and($report->context['bar'])->toBe('baz'); 62 | }); 63 | 64 | it('loads conditionally', function () { 65 | // Background 66 | $loader = new class() implements ConditionalLoaderInterface { 67 | public function supports(mixed $item, EtlState $state): bool 68 | { 69 | return 'foo' !== $item; 70 | } 71 | 72 | public function load(mixed $item, EtlState $state): void 73 | { 74 | $state->context[__CLASS__][] = $item; 75 | } 76 | 77 | public function flush(bool $isPartial, EtlState $state): mixed 78 | { 79 | foreach ($state->context[__CLASS__] as $item) { 80 | $state->context['storage'][] = $item; 81 | } 82 | 83 | return $state->context['storage']; 84 | } 85 | }; 86 | 87 | // Given 88 | $input = ['foo', 'bar', 'baz']; 89 | $executor = new EtlExecutor(loader: $loader); 90 | 91 | // When 92 | $report = $executor->process($input, context: ['storage' => []]); 93 | 94 | // Then 95 | expect($report->output)->toBe(['bar', 'baz']); 96 | }); 97 | 98 | it('yells if it cannot process extracted data', function () { 99 | // Given 100 | $executor = (new EtlExecutor())->withProcessor( 101 | new class() implements ProcessorInterface { 102 | public function supports(mixed $extracted): bool 103 | { 104 | return false; 105 | } 106 | 107 | public function process(EtlExecutor $executor, EtlState $state, mixed $extracted): EtlState 108 | { 109 | throw new ShouldNotHappen(new LogicException()); 110 | } 111 | }, 112 | ); 113 | 114 | // When 115 | $executor->process([]); 116 | })->throws(ExtractException::class); 117 | -------------------------------------------------------------------------------- /tests/Unit/EventDispatcher/EventDispatcherTest.php: -------------------------------------------------------------------------------- 1 | visitors[] = $visitor; 23 | if (2 === count($this->visitors)) { 24 | $this->stopPropagation(); 25 | } 26 | } 27 | } 28 | 29 | it('dispatches events, to the appropriate listeners, in the correct order', function () { 30 | $listenerProvider = new PrioritizedListenerProvider(); 31 | $bus = new EventDispatcher($listenerProvider); 32 | $ignored = new class() { 33 | use EventVisitor; 34 | }; 35 | $event = new class() { 36 | use EventVisitor; 37 | }; 38 | 39 | // Given 40 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A')); 41 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1); 42 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1); 43 | 44 | // When 45 | $dispatched = $bus->dispatch($event); 46 | 47 | // Then 48 | expect($dispatched) 49 | ->toBe($event) 50 | ->and($event->visitors)->toBe(['C', 'A', 'B']) 51 | ->and($ignored->visitors)->toBe([]) 52 | ; 53 | }); 54 | 55 | it('stops propagation of events', function () { 56 | $listenerProvider = new PrioritizedListenerProvider(); 57 | $bus = new EventDispatcher($listenerProvider); 58 | $event = new class() implements StoppableEventInterface { 59 | use EventVisitor; 60 | }; 61 | 62 | // Given 63 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A')); 64 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1); 65 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1); 66 | 67 | // When 68 | $dispatched = $bus->dispatch($event); 69 | 70 | // Then 71 | expect($dispatched) 72 | ->toBe($event) 73 | ->and($event->visitors)->toBe(['C', 'A']) 74 | ; 75 | }); 76 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/CSVExtractorTest.php: -------------------------------------------------------------------------------- 1 | extract($state); 20 | })->throws(ExtractException::class); 21 | 22 | it('iterates over a string containing CSV data', function () { 23 | $state = new EtlState(); 24 | $content = file_get_contents(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'); 25 | $expected = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php'; 26 | $extractor = new CSVExtractor($content, ['columns' => 'auto']); 27 | 28 | // When 29 | $extractedItems = [...$extractor->extract($state)]; 30 | 31 | // Then 32 | expect($extractedItems)->toBe($expected); 33 | }); 34 | 35 | it('iterates over a file containing CSV data', function () { 36 | $extractor = new CSVExtractor(options: ['columns' => 'auto']); 37 | 38 | // When 39 | $state = new EtlState(source: 'file://'.dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'); 40 | $extractedItems = [...$extractor->extract($state)]; 41 | 42 | // Then 43 | expect($extractedItems)->toHaveCount(10) 44 | ->and($extractedItems[0]['city_english_name'] ?? null)->toBe('New York') 45 | ->and($extractedItems[9]['city_english_name'] ?? null)->toBe('London'); 46 | }); 47 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/CallableExtractorTest.php: -------------------------------------------------------------------------------- 1 | ['foo', 'bar']; 17 | 18 | // When 19 | $value = (new CallableExtractor($callable))->extract($state); 20 | 21 | // Then 22 | expect($value)->toBe(['foo', 'bar']); 23 | }); 24 | 25 | it('returns an empty iterable when extracted content is null', function () { 26 | // Given 27 | $state = new EtlState(); 28 | $callable = fn () => null; 29 | 30 | // When 31 | $value = (new CallableExtractor($callable))->extract($state); 32 | 33 | // Then 34 | expect($value)->toBeInstanceOf(EmptyIterator::class); 35 | }); 36 | it('returns an iterable of values when extracted content is not iterable', function () { 37 | // Given 38 | $state = new EtlState(); 39 | $callable = fn () => 'foo'; 40 | 41 | // When 42 | $value = (new CallableExtractor($callable))->extract($state); 43 | 44 | // Then 45 | expect($value)->toBe(['foo']); 46 | }); 47 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/ChainExtractorTest.php: -------------------------------------------------------------------------------- 1 | 'banana')); 17 | $executor = $executor->extractFrom(chain($executor->extractor) 18 | ->with(fn () => yield from ['apple', 'strawberry']) 19 | ->with(fn () => ['raspberry', 'peach'])) 20 | ; 21 | 22 | // When 23 | $report = $executor->process(); 24 | 25 | // Then 26 | expect($report->output)->toBe(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 27 | }); 28 | 29 | it('silently chains extractors', function () { 30 | // Given 31 | $executor = extractFrom( 32 | fn () => 'banana', 33 | fn () => yield from ['apple', 'strawberry'], 34 | fn () => ['raspberry', 'peach'] 35 | ); 36 | 37 | // When 38 | $report = $executor->process(); 39 | 40 | // Then 41 | expect($report->output)->toBe(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); 42 | }); 43 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/IterableExtractorTest.php: -------------------------------------------------------------------------------- 1 | extract($state)) 18 | ->toBe(['foo', 'bar']); 19 | 20 | $state = new EtlState(source: ['bar', 'baz']); 21 | expect($extractor->extract($state)) 22 | ->toBe(['bar', 'baz']); 23 | }); 24 | 25 | it('yells whenever source is not iterable', function () { 26 | (new IterableExtractor())->extract(new EtlState(source: 'foo')); 27 | }) 28 | ->throws(ExtractException::class); 29 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/JSONExtractorTest.php: -------------------------------------------------------------------------------- 1 | extract($state); 23 | 24 | // Then 25 | expect([...$items])->toBe(null === $source ? [] : $expected); 26 | })->with(function () { 27 | $source = dirname(__DIR__, 2).'/Data/10-biggest-cities.json'; 28 | $content = file_get_contents($source); 29 | yield ['source' => 'file://'.$source]; 30 | yield ['source' => $content]; 31 | yield ['source' => null]; 32 | })->with(function () { 33 | yield ['useConstructor' => true]; 34 | yield ['useConstructor' => false]; 35 | }); 36 | 37 | it('complains if content cannot be extracted', function () { 38 | [...(new JSONExtractor())->extract(new EtlState(source: new stdClass()))]; 39 | })->throws(ExtractException::class); 40 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/ReactStreamExtractorTest.php: -------------------------------------------------------------------------------- 1 | extract(new EtlState(source: $b)))->toBe($b) 20 | ->and($extractor->extract(new EtlState()))->toBe($a); 21 | }); 22 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/STDINExtractorTest.php: -------------------------------------------------------------------------------- 1 | process(...)); 22 | 23 | expect($report->output)->toBe([ 24 | 'Hello', 25 | '', 26 | 'Everybody!', 27 | ]); 28 | }); 29 | -------------------------------------------------------------------------------- /tests/Unit/Extractor/TextLinesExtractorTest.php: -------------------------------------------------------------------------------- 1 | extract($state); 25 | 26 | // Then 27 | expect([...$items])->toBe($expected); 28 | })->with(function () { 29 | yield [ 30 | 'options' => ['skipEmptyLines' => true], 31 | 'expected' => ['foo', 'bar'], 32 | ]; 33 | yield [ 34 | 'options' => [], 35 | 'expected' => ['foo', 'bar'], 36 | ]; 37 | yield [ 38 | 'options' => ['skipEmptyLines' => false], 39 | 'expected' => ['foo', '', '', 'bar'], 40 | ]; 41 | })->with(function () { 42 | yield ['useConstructor' => true]; 43 | yield ['useConstructor' => false]; 44 | }); 45 | 46 | it('returns an empty iterator when the content is null', function () { 47 | $state = new EtlState(); 48 | $extractor = new TextLinesExtractor(); 49 | 50 | // When 51 | $items = $extractor->extract($state); 52 | 53 | expect([...$items])->toBe([]); 54 | }); 55 | -------------------------------------------------------------------------------- /tests/Unit/FunctionsTest.php: -------------------------------------------------------------------------------- 1 | 'Apple', 14 | 'b' => 'Banana', 15 | 'c' => 'Carrot', 16 | 'd' => 'Dill', 17 | ]; 18 | 19 | // When 20 | $result = array_fill_from(['a', 'b', 'e'], $food, ['b' => 'banana', 'f' => 'Fig']); 21 | 22 | // Then 23 | expect($result)->toBe([ 24 | 'a' => 'Apple', 25 | 'b' => 'banana', 26 | ]); 27 | }); 28 | -------------------------------------------------------------------------------- /tests/Unit/Iterator/CSVIteratorTest.php: -------------------------------------------------------------------------------- 1 | toHaveCount(11) 19 | ->and($rows[0])->toBe([ 20 | 0 => 'city_english_name', 21 | 1 => 'city_local_name', 22 | 2 => 'country_iso_code', 23 | 3 => 'continent', 24 | 4 => 'population', 25 | ]) 26 | ->and($rows[3])->toBe([ 27 | 0 => 'Tokyo', 28 | 1 => '東京', 29 | 2 => 'JP', 30 | 3 => 'Asia', 31 | 4 => 13929286, 32 | ]); 33 | })->with(function () { 34 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 35 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename))); 36 | yield 'file' => new CSVIterator(new SplFileObject($filename)); 37 | }); 38 | 39 | it('can make columns automatically', function (CSVIterator $iterator) { 40 | $rows = [...$iterator]; 41 | 42 | expect($rows)->toHaveCount(10) 43 | ->and($rows[0])->toBe([ 44 | 'city_english_name' => 'New York', 45 | 'city_local_name' => 'New York', 46 | 'country_iso_code' => 'US', 47 | 'continent' => 'North America', 48 | 'population' => 8537673, 49 | ]) 50 | ->and($rows[2])->toBe([ 51 | 'city_english_name' => 'Tokyo', 52 | 'city_local_name' => '東京', 53 | 'country_iso_code' => 'JP', 54 | 'continent' => 'Asia', 55 | 'population' => 13929286, 56 | ]); 57 | })->with(function () { 58 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 59 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => 'auto']); 60 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => 'auto']); 61 | }); 62 | 63 | it('can map user-defined columns', function (CSVIterator $iterator) { 64 | $rows = [...$iterator]; 65 | 66 | expect($rows[1])->toBe([ 67 | 'cityEnglishName' => 'New York', 68 | 'cityLocalName' => 'New York', 69 | 'countryIsoCode' => 'US', 70 | 'continent' => 'North America', 71 | 'population' => 8537673, 72 | ]) 73 | ->and($rows[3])->toBe([ 74 | 'cityEnglishName' => 'Tokyo', 75 | 'cityLocalName' => '東京', 76 | 'countryIsoCode' => 'JP', 77 | 'continent' => 'Asia', 78 | 'population' => 13929286, 79 | ]); 80 | })->with(function () { 81 | $columns = [ 82 | 'cityEnglishName', 83 | 'cityLocalName', 84 | 'countryIsoCode', 85 | 'continent', 86 | 'population', 87 | ]; 88 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 89 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]); 90 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]); 91 | }); 92 | 93 | it('skips the 1st row when asked to', function (CSVIterator $iterator) { 94 | $rows = [...$iterator]; 95 | 96 | expect($rows[0])->toBe([ 97 | 'cityEnglishName' => 'New York', 98 | 'cityLocalName' => 'New York', 99 | 'countryIsoCode' => 'US', 100 | 'continent' => 'North America', 101 | 'population' => 8537673, 102 | ]) 103 | ->and($rows[2])->toBe([ 104 | 'cityEnglishName' => 'Tokyo', 105 | 'cityLocalName' => '東京', 106 | 'countryIsoCode' => 'JP', 107 | 'continent' => 'Asia', 108 | 'population' => 13929286, 109 | ]); 110 | })->with(function () { 111 | $columns = [ 112 | 'cityEnglishName', 113 | 'cityLocalName', 114 | 'countryIsoCode', 115 | 'continent', 116 | 'population', 117 | ]; 118 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 119 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns, 'skipFirstRow' => true]); 120 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns, 'skipFirstRow' => true]); 121 | }); 122 | 123 | it('adds fields when the row has not enough columns', function (CSVIterator $iterator) { 124 | $rows = [...$iterator]; 125 | 126 | expect($rows[1])->toBe([ 127 | 'cityEnglishName' => 'New York', 128 | 'cityLocalName' => 'New York', 129 | 'countryIsoCode' => 'US', 130 | 'continent' => 'North America', 131 | 'population' => 8537673, 132 | 'misc' => null, 133 | ]) 134 | ->and($rows[3])->toBe([ 135 | 'cityEnglishName' => 'Tokyo', 136 | 'cityLocalName' => '東京', 137 | 'countryIsoCode' => 'JP', 138 | 'continent' => 'Asia', 139 | 'population' => 13929286, 140 | 'misc' => null, 141 | ]); 142 | })->with(function () { 143 | $columns = [ 144 | 'cityEnglishName', 145 | 'cityLocalName', 146 | 'countryIsoCode', 147 | 'continent', 148 | 'population', 149 | 'misc', 150 | ]; 151 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 152 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]); 153 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]); 154 | }); 155 | 156 | it('removes extra data whenever there are more fields than columns', function (CSVIterator $iterator) { 157 | $rows = [...$iterator]; 158 | 159 | expect($rows[1])->toBe([ 160 | 'cityEnglishName' => 'New York', 161 | 'cityLocalName' => 'New York', 162 | 'countryIsoCode' => 'US', 163 | 'continent' => 'North America', 164 | ]) 165 | ->and($rows[3])->toBe([ 166 | 'cityEnglishName' => 'Tokyo', 167 | 'cityLocalName' => '東京', 168 | 'countryIsoCode' => 'JP', 169 | 'continent' => 'Asia', 170 | ]); 171 | })->with(function () { 172 | $columns = [ 173 | 'cityEnglishName', 174 | 'cityLocalName', 175 | 'countryIsoCode', 176 | 'continent', 177 | ]; 178 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv'; 179 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]); 180 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]); 181 | }); 182 | -------------------------------------------------------------------------------- /tests/Unit/Iterator/IteratorStreamTest.php: -------------------------------------------------------------------------------- 1 | Loop::set(Factory::create())); 16 | 17 | it('is readable during iteration', function () { 18 | $items = ['foo', 'bar']; 19 | $stream = new IteratorStream($items); 20 | 21 | for ($i = 0; $i < 2; ++$i) { 22 | expect($stream->isReadable())->toBeTrue(); 23 | $stream->iterator->consume(); 24 | } 25 | 26 | expect($stream->isReadable())->toBeFalse(); 27 | Loop::stop(); 28 | }); 29 | 30 | it('can be paused and resumed', function () { 31 | $stream = new IteratorStream([]); 32 | expect($stream->paused)->toBeFalse(); 33 | 34 | // When 35 | $stream->pause(); 36 | 37 | // Then 38 | expect($stream->paused)->toBeTrue(); 39 | 40 | // When 41 | $stream->resume(); 42 | 43 | // Then 44 | expect($stream->paused)->toBeFalse(); 45 | }); 46 | 47 | it('can pipe data', function () { 48 | $items = ['foo', 'bar', 'baz']; 49 | $stream = new IteratorStream($items); 50 | $dest = new WritableStreamStub(); 51 | $stream->pipe($dest); 52 | 53 | // When 54 | Loop::run(); 55 | 56 | // Then 57 | expect($dest->data)->toBe($items); 58 | }); 59 | -------------------------------------------------------------------------------- /tests/Unit/Iterator/PregSplitIteratorTest.php: -------------------------------------------------------------------------------- 1 | toBe([ 23 | 'foo', 24 | '', 25 | '', 26 | 'bar', 27 | ]); 28 | }); 29 | -------------------------------------------------------------------------------- /tests/Unit/Iterator/StrTokIteratorTest.php: -------------------------------------------------------------------------------- 1 | toBe([ 23 | 'foo', 24 | 'bar', 25 | ]); 26 | }); 27 | -------------------------------------------------------------------------------- /tests/Unit/Loader/CSVLoaderTest.php: -------------------------------------------------------------------------------- 1 | 'auto'])); 23 | $output = $executor->process($cities)->output; 24 | expect($output)->toBe($destination); 25 | 26 | // @phpstan-ignore-next-line 27 | $writtenContent = implode('', [...new SplFileObject($output, 'r')]); 28 | // @phpstan-ignore-next-line 29 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]); 30 | 31 | expect($writtenContent)->toBe($expectedContent); 32 | }); 33 | 34 | it('loads items to a CSV string', function () { 35 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php'; 36 | $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => 'auto'])); 37 | $output = $executor->process($cities)->output; 38 | 39 | // @phpstan-ignore-next-line 40 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]); 41 | 42 | expect($output)->toBe($expectedContent); 43 | }); 44 | 45 | it('can write specific columns', function () { 46 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php'; 47 | $initialColumns = [ 48 | 'city_english_name', 49 | 'city_local_name', 50 | 'country_iso_code', 51 | 'continent', 52 | 'population', 53 | ]; 54 | $prettyColumns = [ 55 | 'CityEnglishName', 56 | 'CityLocalName', 57 | 'CountryIsoCode', 58 | 'Continent', 59 | 'Population', 60 | ]; 61 | $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => $prettyColumns])); 62 | $output = $executor->process($cities)->output; 63 | 64 | $expectedContent = strtr( 65 | implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]), // @phpstan-ignore-line 66 | array_combine($initialColumns, $prettyColumns), 67 | ); 68 | 69 | expect($output)->toBe($expectedContent); 70 | }); 71 | 72 | it('can ignore columns', function () { 73 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php'; 74 | $executor = new EtlExecutor(loader: new CSVLoader()); 75 | $output = $executor->process($cities)->output; 76 | 77 | $lines = [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]; 78 | unset($lines[0]); 79 | $expectedContent = implode('', $lines); // @phpstan-ignore-line 80 | 81 | expect($output)->toBe($expectedContent); 82 | }); 83 | -------------------------------------------------------------------------------- /tests/Unit/Loader/CallableLoaderTest.php: -------------------------------------------------------------------------------- 1 | load('foo', $state); 24 | $output = $loader->flush(false, $state); 25 | 26 | // Then 27 | expect($output)->toBe(['foo']); 28 | }); 29 | 30 | it('complains if inner loader is not callable', function () { 31 | // Given 32 | $state = new EtlState(); 33 | $loader = new CallableLoader(); 34 | $loader->load('foo', $state); 35 | })->throws(LoadException::class, 'Invalid destination.'); 36 | -------------------------------------------------------------------------------- /tests/Unit/Loader/ChainLoaderTest.php: -------------------------------------------------------------------------------- 1 | $a[] = $item, // @phpstan-ignore-line 24 | )); 25 | $executor = $executor->loadInto( 26 | chain($executor->loader) 27 | ->with(fn (string $item) => $b[] = $item) // @phpstan-ignore-line 28 | ->with( 29 | new class() implements ConditionalLoaderInterface { 30 | public function supports(mixed $item, EtlState $state): bool 31 | { 32 | return 'foo' !== $item; 33 | } 34 | 35 | public function load(mixed $item, EtlState $state): void 36 | { 37 | $state->context[__CLASS__][] = $item; 38 | } 39 | 40 | public function flush(bool $isPartial, EtlState $state): mixed 41 | { 42 | foreach ($state->context[__CLASS__] as $item) { 43 | $state->context['storage'][] = $item; 44 | } 45 | 46 | return $state->context['storage']; 47 | } 48 | }, 49 | ) 50 | ); 51 | 52 | // Given 53 | $input = ['foo', 'bar']; 54 | 55 | // When 56 | $executor->process($input, context: ['storage' => $c]); 57 | 58 | // Then 59 | expect([...$a])->toBe(['foo', 'bar']) 60 | ->and([...$b])->toBe(['foo', 'bar']) 61 | ->and([...$c])->toBe(['bar']); 62 | }); 63 | 64 | it('silently chains loaders', function () { 65 | // Background 66 | $a = new ArrayObject(); 67 | $b = new ArrayObject(); 68 | 69 | // Given 70 | $input = ['foo', 'bar']; 71 | $executor = (new EtlExecutor())->loadInto( 72 | fn (string $item) => $a[] = $item, // @phpstan-ignore-line 73 | fn (string $item) => $b[] = $item, // @phpstan-ignore-line 74 | ); 75 | 76 | // When 77 | $executor->process($input); 78 | 79 | // Then 80 | expect([...$a])->toBe(['foo', 'bar']) 81 | ->and([...$b])->toBe(['foo', 'bar']); 82 | }); 83 | -------------------------------------------------------------------------------- /tests/Unit/Loader/Doctrine/Book.php: -------------------------------------------------------------------------------- 1 | shouldReceive('getManagerForClass')->andReturn($manager); 22 | $manager->shouldReceive('persist')->twice(); 23 | $manager->shouldReceive('flush')->once(); 24 | 25 | loadInto(new DoctrineORMLoader($registry))->process([ 26 | new Book(id: 1, name: 'Holy Bible'), 27 | new Book(id: 2, name: 'Fifty Shades of Grey'), 28 | ]); 29 | }); 30 | 31 | it('complains if loaded item is not an object', function () { 32 | $loader = new DoctrineORMLoader(Mockery::mock(ManagerRegistry::class)); 33 | $loader->load([], new EtlState()); 34 | })->throws(LoadException::class, 'Expecting object, got array.'); 35 | 36 | it('complains if loaded item is not a mapped Doctrine class', function () { 37 | $registry = Mockery::mock(ManagerRegistry::class); 38 | $registry->shouldReceive('getManagerForClass')->andReturn(null); 39 | $loader = new DoctrineORMLoader($registry); 40 | $loader->load(new stdClass(), new EtlState()); 41 | })->throws(LoadException::class, 'Could not find manager for class stdClass.'); 42 | -------------------------------------------------------------------------------- /tests/Unit/Loader/JSONLoaderTest.php: -------------------------------------------------------------------------------- 1 | process($cities)->output; 31 | expect($output)->toBe($destination); 32 | 33 | // @phpstan-ignore-next-line 34 | $writtenContent = implode('', [...new SplFileObject($output, 'r')]); 35 | // @phpstan-ignore-next-line 36 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.json', 'r')]); 37 | 38 | expect($writtenContent)->toBe($expectedContent); 39 | })->with('config'); 40 | 41 | it('loads items to a JSON string', function (EtlConfiguration $options) { 42 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php'; 43 | $executor = new EtlExecutor(loader: new JSONLoader(), options: $options); 44 | $output = $executor->process($cities)->output; 45 | 46 | // @phpstan-ignore-next-line 47 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.json', 'r')]); 48 | 49 | expect($output)->toBe($expectedContent); 50 | })->with('config'); 51 | -------------------------------------------------------------------------------- /tests/Unit/Loader/STDOUTLoaderTest.php: -------------------------------------------------------------------------------- 1 | $executor->process($input, context: [ 33 | STDOUTLoader::class => [ 34 | 'resource' => $resource, // fake php://stdout 35 | ], 36 | ])); 37 | 38 | // Then 39 | expect($output)->toBe($expected); 40 | }); 41 | 42 | it('cannot load something which is not a string', fn () => loadInto(stdOut())->process([[]])) 43 | ->throws(LoadException::class, 'Expected string, got array.'); 44 | -------------------------------------------------------------------------------- /tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php: -------------------------------------------------------------------------------- 1 | $value = $normalizer->normalize($value)); 23 | 24 | // Then 25 | expect($strings)->toBe(['foo', null]); 26 | }); 27 | -------------------------------------------------------------------------------- /tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php: -------------------------------------------------------------------------------- 1 | $value = $normalizer->normalize($value)); 25 | 26 | // Then 27 | expect($strings)->toBe(['foo', 12345, 12345.67, '']); 28 | }); 29 | -------------------------------------------------------------------------------- /tests/Unit/Recipe/FilterRecipeTest.php: -------------------------------------------------------------------------------- 1 | !in_array($item, $skipItems, true), 26 | $eventClass, 27 | ), 28 | ) 29 | ->transformWith(fn ($item) => strtoupper($item)); 30 | 31 | // When 32 | $report = $executor->process(['banana', 'apple', 'strawberry', 'BANANA', 'APPLE', 'STRAWBERRY']); 33 | 34 | // Then 35 | expect($report->output)->toBe($expectedResult); 36 | })->with(function () { 37 | yield [null, ['APPLE', 'BANANA']]; 38 | yield [ExtractEvent::class, ['APPLE', 'BANANA']]; 39 | yield [BeforeLoadEvent::class, ['BANANA', 'BANANA']]; 40 | }); 41 | 42 | it('filters items (on an allow-list basis)', function (?string $eventClass, array $expectedResult) { 43 | // Given 44 | $executor = withRecipe( 45 | new FilterRecipe( 46 | fn (string $item) => str_contains($item, 'b') || str_contains($item, 'B'), 47 | ), 48 | ) 49 | ->transformWith(fn ($item) => strtoupper($item)); 50 | 51 | // When 52 | $report = $executor->process(['banana', 'apple', 'strawberry', 'BANANA', 'APPLE', 'STRAWBERRY']); 53 | 54 | // Then 55 | expect($report->output)->toBe($expectedResult); 56 | })->with(function () { 57 | yield [null, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']]; 58 | yield [ExtractEvent::class, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']]; 59 | yield [BeforeLoadEvent::class, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']]; 60 | }); 61 | 62 | it('does not accept other types of events', function () { 63 | new FilterRecipe(fn () => '', LoadEvent::class); 64 | })->throws( 65 | InvalidArgumentException::class, 66 | sprintf('Can only filter on ExtractEvent / LoadEvent, not %s', LoadEvent::class), 67 | ); 68 | -------------------------------------------------------------------------------- /tests/Unit/Recipe/LoggerRecipeTest.php: -------------------------------------------------------------------------------- 1 | withRecipe($loggerRecipe); 21 | 22 | // When 23 | $executor->process(['foo', 'bar']); 24 | 25 | // Then 26 | $records = $handler->getRecords(); 27 | expect($records)->toHaveCount(12)->and($records)->sequence( 28 | fn ($record) => $record->message->toEqual('Initializing ETL...')->and($record->level->toBe(Level::Debug)), 29 | fn ($record) => $record->message->toEqual('Starting ETL...')->and($record->level->toBe(Level::Info)), 30 | fn ($record) => $record->message->toContain('Extracting item')->and($record->level->toBe(Level::Debug)), 31 | fn ($record) => $record->message->toContain('Transformed item')->and($record->level->toBe(Level::Debug)), 32 | fn ($record) => $record->message->toContain('Loaded item')->and($record->level->toBe(Level::Debug)), 33 | fn ($record) => $record->message->toContain('Flushing {nb} items (early)...')->and($record->level->toBe(Level::Info)), 34 | fn ($record) => $record->message->toContain('Extracting item')->and($record->level->toBe(Level::Debug)), 35 | fn ($record) => $record->message->toContain('Transformed item')->and($record->level->toBe(Level::Debug)), 36 | fn ($record) => $record->message->toContain('Loaded item')->and($record->level->toBe(Level::Debug)), 37 | fn ($record) => $record->message->toContain('Flushing {nb} items (early)...')->and($record->level->toBe(Level::Info)), 38 | fn ($record) => $record->message->toContain('Flushing {nb} items...')->and($record->level->toBe(Level::Info)), 39 | fn ($record) => $record->message->toContain('ETL complete.')->and($record->level->toBe(Level::Info)), 40 | ); 41 | }); 42 | -------------------------------------------------------------------------------- /tests/Unit/Recipe/RecipeTest.php: -------------------------------------------------------------------------------- 1 | onInit(function () use (&$hasReceivedInitEvent) { 20 | $hasReceivedInitEvent = true; 21 | }) 22 | ->onEnd(function () use (&$hasReceivedEndEvent) { 23 | $hasReceivedEndEvent = true; 24 | }); 25 | }, 26 | fn (EtlExecutor $executor) => $executor->withContext(['foo' => 'bar']) 27 | ); 28 | 29 | // When 30 | $report = $executor->process([]); 31 | 32 | // Then 33 | expect($hasReceivedInitEvent)->toBeTrue() 34 | ->and($hasReceivedEndEvent)->toBeTrue() 35 | ->and($report->context)->toBe(['foo' => 'bar']) 36 | ; 37 | }); 38 | -------------------------------------------------------------------------------- /tests/Unit/Transformer/CallableTransformerTest.php: -------------------------------------------------------------------------------- 1 | yield strtoupper($value)); 16 | 17 | // When 18 | $transformed = $transformer->transform('foo', $state); 19 | 20 | // Then 21 | expect([...$transformed])->toBe(['FOO']); 22 | }); 23 | -------------------------------------------------------------------------------- /tests/Unit/Transformer/ChainTransformerTest.php: -------------------------------------------------------------------------------- 1 | strrev($item) 22 | )); 23 | $executor = $executor->transformWith( 24 | chain($executor->transformer) 25 | ->with(function (string $item): Generator { 26 | yield $item; 27 | yield strtoupper($item); 28 | }) 29 | ->with(fn (Generator $items): array => [...$items]) 30 | ->with(function (array $items): array { 31 | $items[] = 'hey'; 32 | 33 | return $items; 34 | }) 35 | ->with(fn (array $items): string => implode('-', $items)), 36 | ); 37 | 38 | // When 39 | $report = $executor->process($input); 40 | 41 | // Then 42 | expect($report->output)->toBe([ 43 | 'oof-OOF-hey', 44 | 'rab-RAB-hey', 45 | ]); 46 | }); 47 | 48 | it('silently chains transformers', function () { 49 | // Given 50 | $input = ['foo', 'bar']; 51 | 52 | $etl = (new EtlExecutor()) 53 | ->transformWith( 54 | fn (string $item): string => strrev($item), 55 | function (string $item): Generator { 56 | yield $item; 57 | yield strtoupper($item); 58 | }, 59 | fn (Generator $items): array => [...$items], 60 | function (array $items): array { 61 | $items[] = 'hey'; 62 | 63 | return $items; 64 | }, 65 | fn (array $items) => yield implode('-', $items) 66 | ); 67 | 68 | // When 69 | $report = $etl->process($input); 70 | 71 | // Then 72 | expect($report->output)->toBe([ 73 | 'oof-OOF-hey', 74 | 'rab-RAB-hey', 75 | ]); 76 | }); 77 | -------------------------------------------------------------------------------- /tests/Unit/Transformer/NullTransformerTest.php: -------------------------------------------------------------------------------- 1 | transform('foo', $state); 19 | 20 | // Then 21 | expect($transformedItems)->toBe('foo'); 22 | }); 23 | --------------------------------------------------------------------------------