├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── .php-cs-fixer.dist.php
├── LICENSE
├── README.md
├── composer.json
├── doc
├── advanced_usage.md
├── getting-started.md
└── recipes.md
├── phpstan.dist.neon
├── phpunit.xml.dist
├── src
├── EtlConfiguration.php
├── EtlExecutor.php
├── EtlState.php
├── EventDispatcher
│ ├── Event
│ │ ├── BeforeLoadEvent.php
│ │ ├── EndEvent.php
│ │ ├── Event.php
│ │ ├── ExtractEvent.php
│ │ ├── ExtractExceptionEvent.php
│ │ ├── FlushEvent.php
│ │ ├── FlushExceptionEvent.php
│ │ ├── InitEvent.php
│ │ ├── LoadEvent.php
│ │ ├── LoadExceptionEvent.php
│ │ ├── StartEvent.php
│ │ ├── TransformEvent.php
│ │ └── TransformExceptionEvent.php
│ ├── EventDispatcher.php
│ ├── PrioritizedListenerProvider.php
│ └── StoppableEventTrait.php
├── Exception
│ ├── EtlException.php
│ ├── ExtractException.php
│ ├── FlushException.php
│ ├── LoadException.php
│ ├── SkipRequest.php
│ ├── StopRequest.php
│ └── TransformException.php
├── Extractor
│ ├── CSVExtractor.php
│ ├── CallableExtractor.php
│ ├── ChainExtractor.php
│ ├── ExtractorInterface.php
│ ├── FileExtractor.php
│ ├── IterableExtractor.php
│ ├── IterableExtractorInterface.php
│ ├── JSONExtractor.php
│ ├── ReactStreamExtractor.php
│ ├── STDINExtractor.php
│ └── TextLinesExtractor.php
├── Internal
│ ├── ClonableTrait.php
│ ├── ConditionalLoaderTrait.php
│ ├── DispatchEventsTrait.php
│ ├── EtlBuilderTrait.php
│ ├── EtlEventListenersTrait.php
│ ├── StateHolder.php
│ └── TransformResult.php
├── Iterator
│ ├── CSVIterator.php
│ ├── ConsumableIterator.php
│ ├── FileIterator.php
│ ├── IteratorStream.php
│ ├── PregSplitIterator.php
│ └── StrTokIterator.php
├── Loader
│ ├── CSVLoader.php
│ ├── CallableLoader.php
│ ├── ChainLoader.php
│ ├── ConditionalLoaderInterface.php
│ ├── DoctrineORMLoader.php
│ ├── InMemoryLoader.php
│ ├── JSONLoader.php
│ ├── LoaderInterface.php
│ └── STDOUTLoader.php
├── Normalizer
│ ├── EmptyStringToNullNormalizer.php
│ ├── NumericStringToNumberNormalizer.php
│ └── ValueNormalizerInterface.php
├── Processor
│ ├── IterableProcessor.php
│ ├── ProcessorInterface.php
│ └── ReactStreamProcessor.php
├── Recipe
│ ├── FilterRecipe.php
│ ├── FilterRecipeMode.php
│ ├── LoggerRecipe.php
│ └── Recipe.php
├── Transformer
│ ├── CallableTransformer.php
│ ├── ChainTransformer.php
│ ├── NullTransformer.php
│ └── TransformerInterface.php
└── functions.php
└── tests
├── Behavior
├── Events
│ ├── BeforeLoadEventTest.php
│ ├── EndEventTest.php
│ ├── ExtractEventTest.php
│ ├── ExtractExceptionEventTest.php
│ ├── FlushEventTest.php
│ ├── FlushExceptionEventTest.php
│ ├── InitEventTest.php
│ ├── LoadEventTest.php
│ ├── LoadExceptionEventTest.php
│ ├── StartEventTest.php
│ ├── TransformEventTest.php
│ └── TransformExceptionEventTest.php
├── ExtractExceptionTest.php
├── FlushExceptionTest.php
├── FlushTest.php
├── LoadExceptionTest.php
├── NextTickTest.php
├── ReactStreamProcessorTest.php
├── SkipTest.php
├── StopTest.php
└── TransformExceptionTest.php
├── Data
├── 10-biggest-cities.csv
├── 10-biggest-cities.json
└── 10-biggest-cities.php
├── Stubs
├── InMemoryLoader.php
├── STDINStub.php
├── STDOUTStub.php
└── WritableStreamStub.php
└── Unit
├── ContextTest.php
├── EtlConfigurationTest.php
├── EtlExecutorTest.php
├── EventDispatcher
└── EventDispatcherTest.php
├── Extractor
├── CSVExtractorTest.php
├── CallableExtractorTest.php
├── ChainExtractorTest.php
├── IterableExtractorTest.php
├── JSONExtractorTest.php
├── ReactStreamExtractorTest.php
├── STDINExtractorTest.php
└── TextLinesExtractorTest.php
├── FunctionsTest.php
├── Iterator
├── CSVIteratorTest.php
├── IteratorStreamTest.php
├── PregSplitIteratorTest.php
└── StrTokIteratorTest.php
├── Loader
├── CSVLoaderTest.php
├── CallableLoaderTest.php
├── ChainLoaderTest.php
├── Doctrine
│ └── Book.php
├── DoctrineORMLoaderTest.php
├── JSONLoaderTest.php
└── STDOUTLoaderTest.php
├── Normalizer
├── EmptyStringToNullNormalizerTest.php
└── NumericStringToNumberNormalizerTest.php
├── Recipe
├── FilterRecipeTest.php
├── LoggerRecipeTest.php
└── RecipeTest.php
└── Transformer
├── CallableTransformerTest.php
├── ChainTransformerTest.php
└── NullTransformerTest.php
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI Workflow
2 | on:
3 | push:
4 | branches: [ master, '4.0' ]
5 | pull_request:
6 |
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-22.04
10 | strategy:
11 | matrix:
12 | php:
13 | - 8.2
14 | - 8.3
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | - name: Setup PHP
19 | uses: shivammathur/setup-php@v2
20 | with:
21 | php-version: ${{ matrix.php }}
22 |
23 | - name: Install dependencies
24 | run: composer install --prefer-dist --no-progress
25 |
26 | - name: Check types
27 | run: vendor/bin/phpstan analyse
28 |
29 | - name: Run test suite
30 | run: vendor/bin/pest --coverage --coverage-clover=coverage.xml
31 |
32 | - name: Upload coverage to Codecov
33 | uses: codecov/codecov-action@v1
34 | with:
35 | token: ${{ secrets.CODECOV_TOKEN }}
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | vendor/
2 | composer.lock
3 | .php-cs-fixer.cache
4 |
--------------------------------------------------------------------------------
/.php-cs-fixer.dist.php:
--------------------------------------------------------------------------------
1 | in(__DIR__)
5 | ;
6 |
7 | return (new PhpCsFixer\Config())
8 | ->setRules([
9 | '@Symfony' => true,
10 | 'global_namespace_import' => [
11 | 'import_functions' => true,
12 | 'import_constants' => true,
13 | ],
14 | ])
15 | ->setFinder($finder)
16 | ;
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016-2023 Beno!t POLASZEK
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://packagist.org/packages/bentools/etl)
2 | [](https://packagist.org/packages/bentools/etl)
3 | [](https://github.com/bpolaszek/bentools-etl/actions/workflows/ci.yml)
4 | [](https://codecov.io/gh/bpolaszek/bentools-etl)
5 | [](https://packagist.org/packages/bentools/etl)
6 |
7 | Okay, so you heard about the [Extract / Transform / Load](https://en.wikipedia.org/wiki/Extract,_transform,_load) pattern,
8 | and you're looking for a PHP library to do the stuff. Alright, let's go!
9 |
10 | `bentools/etl` is a versatile PHP library for implementing the Extract, Transform, Load (ETL) pattern, designed to streamline data processing tasks.
11 |
12 | Table of Contents
13 | -----------------
14 |
15 | - [Concepts](#concepts)
16 | - [Installation](#installation)
17 | - [Getting started](#usage)
18 | - [The EtlState object](doc/getting-started.md#the-etlstate-object)
19 | - [Skipping items](doc/getting-started.md#skipping-items)
20 | - [Stopping the workflow](doc/getting-started.md#stopping-the-workflow)
21 | - [Using events](doc/getting-started.md#using-events)
22 | - [Flush frequency and early flushes](doc/getting-started.md#flush-frequency-and-early-flushes)
23 | - [Advanced Usage](doc/advanced_usage.md)
24 | - [Creating your own Extractor / Transformers / Loaders](doc/advanced_usage.md#creating-your-own-extractor--transformers--loaders)
25 | - [Difference between yield and return in transformers](doc/advanced_usage.md#difference-between-yield-and-return-in-transformers)
26 | - [Next tick](doc/advanced_usage.md#next-tick)
27 | - [Chaining extractors / transformers / loaders](doc/advanced_usage.md#chaining-extractors--transformers--loaders)
28 | - [Reading from STDIN / Writing to STDOUT](doc/advanced_usage.md#reading-from-stdin--writing-to-stdout)
29 | - [Instantiators](doc/advanced_usage.md#instantiators)
30 | - [Using ReactPHP](doc/advanced_usage.md#using-reactphp-experimental)
31 | - [Recipes](doc/recipes.md)
32 | - [Contributing](#contribute)
33 | - [License](#license)
34 |
35 | Concepts
36 | --------
37 |
38 | Let's cover the basic concepts:
39 | - **Extract**: you have a source of data (a database, a CSV file, whatever) - an **extractor** is able to read that data and provide an iterator of items
40 | - **Transform**: apply transformation to each item. A **transformer** may generate 0, 1 or several items to **load** (for example, 1 item may generate multiple SQL queries)
41 | - **Load**: load transformed item to the destination. For example, **extracted items** have been **transformed** to SQL queries, and your **loader** will run those queries against your database.
42 |
43 | Installation
44 | ------------
45 |
46 | ```bash
47 | composer require bentools/etl
48 | ```
49 |
50 | > [!WARNING]
51 | > Current version (4.0) is a complete redesign and introduces significant BC (backward compatibility) breaks.
52 | > Avoid upgrading from `^2.0` or `^3.0` unless you're fully aware of the changes.
53 |
54 | Usage
55 | -----
56 |
57 | Now let's have a look on how simple it is:
58 |
59 | ```php
60 | use BenTools\ETL\EtlExecutor;
61 |
62 | // Given
63 | $singers = ['Bob Marley', 'Amy Winehouse'];
64 |
65 | // Transform each singer's name to uppercase and process the array
66 | $etl = (new EtlExecutor())
67 | ->transformWith(fn (string $name) => strtoupper($name));
68 |
69 | // When
70 | $report = $etl->process($singers);
71 |
72 | // Then
73 | var_dump($report->output); // ["BOB MARLEY", "AMY WINEHOUSE"]
74 | ```
75 |
76 | OK, that wasn't really hard, here we basically don't have to _extract_ anything (we can already iterate on `$singers`),
77 | and we're not _loading_ anywhere, except into PHP's memory.
78 |
79 | You may ask, "why don't you just `array_map('strtoupper', $singers)` ?" and you're totally right.
80 |
81 | But sometimes, extracting, transforming and / or loading get a little more complex.
82 | You may want to extract from a file, a crawled content on the web, perform one to many transformations, maybe skip some items,
83 | or reuse some extraction, transformation or loading logic.
84 |
85 | Here's another example of what you can do:
86 |
87 | ```php
88 | use BenTools\ETL\EventDispatcher\Event\TransformEvent;
89 | use BenTools\ETL\Loader\JSONLoader;
90 |
91 | use function BenTools\ETL\extractFrom;
92 |
93 | $executor = extractFrom(function () {
94 | yield ['firstName' => 'Barack', 'lastName' => 'Obama'];
95 | yield ['firstName' => 'Donald', 'lastName' => 'Trump'];
96 | yield ['firstName' => 'Joe', 'lastName' => 'Biden'];
97 | })
98 | ->transformWith(fn (array $item) => implode(' ', array_values($item)))
99 | ->loadInto(new JSONLoader())
100 | ->onTransform(function (TransformEvent $event) {
101 | if ('Donald Trump' === $event->transformResult->value) {
102 | $event->state->skip();
103 | }
104 | });
105 |
106 | $report = $executor->process();
107 |
108 | dump($report->output); // string '["Barack Obama", "Joe Biden"]'
109 | ```
110 |
111 | Or:
112 |
113 | ```php
114 | $report = $executor->process(destination: 'file:///tmp/presidents.json');
115 | var_dump($report->output); // string 'file:///tmp/presidents.json' - content has been written here
116 | ```
117 |
118 | You get the point. Now you're up to write your own workflows!
119 |
120 | Continue reading the [Getting Started Guide](doc/getting-started.md).
121 |
122 | Contribute
123 | ----------
124 |
125 | Contributions are welcome! Don't hesitate to suggest recipes.
126 |
127 | This library is 100% covered with [Pest](https://pestphp.com) tests.
128 |
129 | Please ensure to run tests using the command below and maintain code coverage before submitting PRs.
130 |
131 | ```bash
132 | composer ci:check
133 | ```
134 |
135 | License
136 | -------
137 |
138 | MIT.
139 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "bentools/etl",
3 | "description": "PHP ETL (Extract / Transform / Load) implementation, with very few dependencies.",
4 | "type": "library",
5 | "require": {
6 | "php": ">=8.2",
7 | "psr/event-dispatcher": "^1.0",
8 | "psr/log": "^3.0",
9 | "symfony/options-resolver": "@stable",
10 | "thecodingmachine/safe": "^2.5"
11 | },
12 | "require-dev": {
13 | "bentools/iterable-functions": "^2.1",
14 | "doctrine/orm": "^2.16",
15 | "friendsofphp/php-cs-fixer": "^3.35",
16 | "mockery/mockery": "^1.6",
17 | "monolog/monolog": "^3.5",
18 | "pestphp/pest": "^2.24",
19 | "phpstan/phpstan": "^1.10",
20 | "phpstan/phpstan-mockery": "^1.1",
21 | "react/stream": "^1.3",
22 | "symfony/var-dumper": "*"
23 | },
24 | "license": "MIT",
25 | "autoload": {
26 | "psr-4": {
27 | "BenTools\\ETL\\": "src/"
28 | },
29 | "files": [
30 | "src/functions.php"
31 | ]
32 | },
33 | "autoload-dev": {
34 | "psr-4": {
35 | "BenTools\\ETL\\Tests\\": "tests/"
36 | }
37 | },
38 | "scripts": {
39 | "ci:check": [
40 | "vendor/bin/php-cs-fixer fix",
41 | "vendor/bin/phpstan analyse",
42 | "vendor/bin/pest --coverage"
43 | ]
44 | },
45 | "minimum-stability": "stable",
46 | "config": {
47 | "sort-packages": true,
48 | "allow-plugins": {
49 | "pestphp/pest-plugin": true
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/doc/getting-started.md:
--------------------------------------------------------------------------------
1 | # Getting started
2 |
3 | Consider you have a `/tmp/cities.csv` file containing this, and you want to convert it to a JSON file.
4 |
5 |
6 | ```csv
7 | city_english_name,city_local_name,country_iso_code,continent,population
8 | "New York","New York",US,"North America",8537673
9 | "Los Angeles","Los Angeles",US,"North America",39776830
10 | Tokyo,東京,JP,Asia,13929286
11 | ```
12 |
13 | ```php
14 | use BenTools\ETL\EtlExecutor;
15 |
16 | $etl = (new EtlExecutor())
17 | ->extractFrom(new CSVExtractor(options: ['columns' => 'auto']))
18 | ->loadInto(new JSONLoader());
19 |
20 | $report = $etl->process('file:///tmp/cities.csv', 'file:///tmp/cities.json');
21 | dump($report->output); // file:///tmp/cities.json
22 | ```
23 |
24 | Then, let's have a look at `/tmp/cities.json`:
25 | ```json
26 | [
27 | {
28 | "city_english_name": "New York",
29 | "city_local_name": "New York",
30 | "country_iso_code": "US",
31 | "continent": "North America",
32 | "population": 8537673
33 | },
34 | {
35 | "city_english_name": "Los Angeles",
36 | "city_local_name": "Los Angeles",
37 | "country_iso_code": "US",
38 | "continent": "North America",
39 | "population": 39776830
40 | },
41 | {
42 | "city_english_name": "Tokyo",
43 | "city_local_name": "東京",
44 | "country_iso_code": "JP",
45 | "continent": "Asia",
46 | "population": 13929286
47 | }
48 | ]
49 | ```
50 |
51 | > [!NOTE]
52 | > We didn't _transform_ anything here, we just denormalized the CSV file to an array, then serialized that array to a JSON file.
53 |
54 | The `CSVExtractor` has some options to _read_ the data, such as considering that the 1st row is the column keys.
55 |
56 | This libary ships with a few built-in extractors and loaders (plain text, csv, json) to name a few,
57 | but you can of course create your own. See [Advanced Usage](advanced_usage.md).
58 |
59 | The `EtlState` object
60 | ---------------------
61 |
62 | The `ETLState` object is the state of the ETL which is currently processed by the `EtlExecutor`.
63 | This object gives you various information such as the duration, the total number of items,
64 | the current extracted key, and so on.
65 | It also contains a `context` array which is here to hold some data related to the current process.
66 |
67 | The `ETLState` object is injected in extractors' `extract()` method,
68 | in transformers' `transform()` method
69 | and in loaders' `load()` and `flush()` methods.
70 | If you use callables, it will be injected as well.
71 |
72 | The `ETLState` object is also injected into all events.
73 | Most of its properties are read-only, except `context`.
74 |
75 | > [!TIP]
76 | > Your executor can provide a default context (see example below).
77 |
78 | ```php
79 | it('accepts a default context', function () {
80 | // Given
81 | $executor = (new EtlExecutor())->withContext(['foo' => 'bar']);
82 |
83 | // When
84 | $report = $executor->process([], context: ['bar' => 'baz']);
85 |
86 | // Then
87 | expect($report->context)->toBe(['foo' => 'bar', 'bar' => 'baz']);
88 | });
89 | ```
90 |
91 | Skipping items
92 | --------------
93 |
94 | You can skip items at any time.
95 |
96 | > [!TIP]
97 | > Use the `skip()` method from the `EtlState` object as soon as your business logic requires it.
98 |
99 | Stopping the workflow
100 | ---------------------
101 |
102 | You can stop the workflow at any time.
103 |
104 | > [!TIP]
105 | > Use the `stop()` method from the `EtlState` object as soon as your business logic requires it.
106 |
107 | Using Events
108 | ------------
109 |
110 | The `EtlExecutor` emits a variety of events during the ETL workflow, providing insights and control over the process.
111 |
112 | - `InitEvent` when `process()` was just called
113 | - `StartEvent` when extraction just started (we might know the total number of items to extract at this time, if the extractor provides this)
114 | - `ExtractEvent` upon each extracted item
115 | - `ExtractExceptionEvent` when something wrong occured during extraction (this is generally not recoverable)
116 | - `TransformEvent` upon each transformed item (exposes a `TransformResult` object, containing 0, one or more items to load)
117 | - `TransformExceptionEvent` when something wrong occured during transformation (the exception can be dismissed)
118 | - `BeforeLoadEvent` upon each item to be loaded
119 | - `LoadEvent` upon each loaded item
120 | - `LoadExceptionEvent` when something wrong occured during loading (the exception can be dismissed)
121 | - `FlushEvent` at each flush
122 | - `FlushExceptionEvent` when something wrong occured during flush (the exception can be dismissed)
123 | - `EndEvent` whenever the workflow is complete.
124 |
125 | > [!IMPORTANT]
126 | > All events give you access to the `EtlState` object, the state of the running ETL process.
127 |
128 | Accessing `$event->state` allows you to:
129 | - Read what's going on (total number of items, number of loaded items, current extracted item index)
130 | - Write any arbitrary data into the `$state->context` array
131 | - [Skip items](#skipping-items)
132 | - [Stop the workflow](#stopping-the-workflow)
133 | - [Trigger an early flush](#flush-frequency-and-early-flushes).
134 |
135 | You can hook to those events during `EtlExecutor` instantiation, i.e.:
136 |
137 | ```php
138 | $etl = (new EtlExecutor())
139 | ->onExtract(
140 | fn (ExtractEvent $event) => $logger->info('Extracting item #{key}', ['key' => $event->state->currentItemKey]),
141 | );
142 | ```
143 |
144 | Flush frequency and early flushes
145 | ---------------------------------
146 |
147 | By default, the `flush()` method of your loader will be invoked at the end of the ETL,
148 | meaning it will likely keep all loaded items in memory before dumping them to their final destination.
149 |
150 | > [!TIP]
151 | > - Feel free to adjust a `flushFrequency` that fits your needs to manage memory usage and data processing efficiency
152 | > - Optionally, trigger an early flush at any time during the ETL process.
153 |
154 | ```php
155 | $etl = (new EtlExecutor(options: new EtlConfiguration(flushFrequency: 10)))
156 | ->onLoad(
157 | function (LoadEvent $event) {
158 | if (/* whatever reason */) {
159 | $event->state->flush();
160 | }
161 | },
162 | );
163 | ```
164 |
165 | Advanced usage
166 | --------------
167 |
168 | See [Advanced Usage](advanced_usage.md).
169 |
--------------------------------------------------------------------------------
/doc/recipes.md:
--------------------------------------------------------------------------------
1 | # Recipes
2 |
3 | Recipes are pre-configured setups for `EtlExecutor`, facilitating reusable ETL configurations.
4 |
5 | LoggerRecipe
6 | ------------
7 |
8 | The `LoggerRecipe` enables logging for all ETL events.
9 |
10 | ```php
11 | use BenTools\ETL\EtlExecutor;
12 | use BenTools\ETL\Recipe\LoggerRecipe;
13 | use Monolog\Logger;
14 |
15 | $logger = new Logger();
16 | $etl = (new EtlExecutor())
17 | ->withRecipe(new LoggerRecipe($logger));
18 | ```
19 |
20 | This will basically listen to all events and fire log entries.
21 |
22 | FilterRecipe
23 | ------------
24 |
25 | The `FilterRecipe` gives you syntactic sugar for skipping items.
26 |
27 | ```php
28 | use BenTools\ETL\EtlExecutor;
29 | use BenTools\ETL\Recipe\LoggerRecipe;
30 | use Monolog\Logger;
31 |
32 | use function BenTools\ETL\skipWhen;
33 |
34 | $logger = new Logger();
35 | $etl = (new EtlExecutor())->withRecipe(skipWhen(fn ($item) => 'apple' === $item));
36 | $report = $etl->process(['banana', 'apple', 'pinapple']);
37 |
38 | var_dump($report->output); // ['banana', 'pineapple']
39 | ```
40 |
41 | Creating your own recipes
42 | -------------------------
43 |
44 | You can create your own recipes by implementing `BenTools\ETL\Recipe\Recipe`
45 | or using a callable with the same signature.
46 |
47 | ### Example 1. Stop the workflow when a max number of items has been reached
48 |
49 | ```php
50 | use BenTools\ETL\EtlExecutor;
51 | use BenTools\ETL\EtlState;
52 | use BenTools\ETL\EventDispatcher\Event\ExtractEvent;
53 |
54 | use const PHP_INT_MAX;
55 |
56 | final class MaxItemsRecipe extends Recipe
57 | {
58 | public function __construct(
59 | private readonly int $maxItems = PHP_INT_MAX,
60 | ) {
61 | }
62 |
63 | public function decorate(EtlExecutor $executor): EtlExecutor
64 | {
65 | return $executor
66 | ->withContext(['maxItems' => $this->maxItems])
67 | ->onExtract($this);
68 | }
69 |
70 | public function __invoke(ExtractEvent $event): void
71 | {
72 | if ($event->state->nbExtractedItems >= $event->state->context['maxItems']) {
73 | $event->state->nextTick(fn (EtlState $state) => $state->skip());
74 | }
75 | }
76 | }
77 | ```
78 |
79 | Usage:
80 |
81 | ```php
82 | use function BenTools\ETL\withRecipe;
83 |
84 | $etl = withRecipe(new MaxItemsRecipe(10)); // Set to 10 items max by default
85 | $report = $etl->process(['foo', 'bar', 'baz'], context: ['maxItems' => 2]); // Optionally overwrite here
86 | var_dump($report->output); // ['foo', 'bar']
87 | ```
88 |
89 | ### Example 2. Display a progress bar when using the Symfony framework:
90 |
91 | ```php
92 | use BenTools\ETL\EtlExecutor;
93 | use BenTools\ETL\EventDispatcher\Event\Event;
94 | use BenTools\ETL\Recipe\Recipe;
95 | use Symfony\Component\Console\Helper\ProgressBar;
96 |
97 | final class ProgressBarRecipe extends Recipe
98 | {
99 | public function __construct(
100 | public readonly ProgressBar $progressBar,
101 | ) {
102 | }
103 |
104 | public function decorate(EtlExecutor $executor): EtlExecutor
105 | {
106 | return $executor
107 | ->onStart(function (Event $event) {
108 | if (!$event->state->nbTotalItems) {
109 | return;
110 | }
111 | $this->progressBar->setMaxSteps($event->state->nbTotalItems);
112 | })
113 | ->onExtract(fn () => $this->progressBar->advance())
114 | ->onEnd(fn () => $this->progressBar->finish());
115 | }
116 | }
117 | ```
118 |
119 | Usage:
120 |
121 | ```php
122 | use BenTools\ETL\EtlExecutor;
123 | use Symfony\Component\Console\Style\SymfonyStyle;
124 |
125 | $output = new SymfonyStyle($input, $output);
126 | $progressBar = $output->createProgressBar();
127 | $executor = (new EtlExecutor())->withRecipe(new ProgressBarRecipe($progressBar));
128 | ```
129 |
--------------------------------------------------------------------------------
/phpstan.dist.neon:
--------------------------------------------------------------------------------
1 | parameters:
2 | level: 6
3 | paths:
4 | - src/
5 | - tests/
6 | ignoreErrors:
7 | -
8 | message: "#Access to an undefined property#"
9 | path: "tests/Unit/Recipe/LoggerRecipeTest.php"
10 | includes:
11 | - vendor/phpstan/phpstan-mockery/extension.neon
12 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | tests
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | src
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/src/EtlConfiguration.php:
--------------------------------------------------------------------------------
1 | 0, got %d', $flushEvery));
26 | }
27 | $this->flushFrequency = $flushEvery;
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/EtlState.php:
--------------------------------------------------------------------------------
1 |
23 | */
24 | public SplObjectStorage $nextTickCallbacks;
25 |
26 | private bool $earlyFlush = false;
27 |
28 | /**
29 | * @param array $context
30 | */
31 | public function __construct(
32 | public readonly EtlConfiguration $options = new EtlConfiguration(),
33 | public readonly mixed $source = null,
34 | public readonly mixed $destination = null,
35 | public array $context = [],
36 | public readonly mixed $currentItemKey = null,
37 | public readonly int $currentItemIndex = -1,
38 | public readonly int $nbExtractedItems = 0,
39 | public readonly int $nbLoadedItems = 0,
40 | public readonly int $nbLoadedItemsSinceLastFlush = 0,
41 | public readonly ?int $nbTotalItems = null,
42 | public readonly DateTimeImmutable $startedAt = new DateTimeImmutable(),
43 | public readonly ?DateTimeImmutable $endedAt = null,
44 | public readonly mixed $output = null,
45 | public readonly StateHolder $stateHolder = new StateHolder(),
46 | ) {
47 | $this->nextTickCallbacks ??= new SplObjectStorage();
48 | $this->stateHolder->state ??= $this;
49 | }
50 |
51 | /**
52 | * @internal
53 | */
54 | public function getLastVersion(): self
55 | {
56 | return $this->stateHolder->state;
57 | }
58 |
59 | /**
60 | * @internal
61 | */
62 | public function update(self $state): self
63 | {
64 | $this->stateHolder->state = $state;
65 |
66 | return $state;
67 | }
68 |
69 | public function nextTick(callable $callback): void
70 | {
71 | $this->nextTickCallbacks->attach(static fn (EtlState $state) => $callback($state));
72 | }
73 |
74 | /**
75 | * Flush after current item.
76 | */
77 | public function flush(): void
78 | {
79 | $this->earlyFlush = true;
80 | }
81 |
82 | /**
83 | * Skip current item.
84 | */
85 | public function skip(): never
86 | {
87 | throw new SkipRequest();
88 | }
89 |
90 | /**
91 | * Stop after current item.
92 | */
93 | public function stop(): never
94 | {
95 | throw new StopRequest();
96 | }
97 |
98 | public function getDuration(): float
99 | {
100 | $endedAt = $this->endedAt ?? new DateTimeImmutable();
101 |
102 | return (float) ($endedAt->format('U.u') - $this->startedAt->format('U.u'));
103 | }
104 |
105 | /**
106 | * @internal
107 | */
108 | public function shouldFlush(): bool
109 | {
110 | return match (true) {
111 | $this->earlyFlush => true,
112 | INF === $this->options->flushFrequency => false,
113 | 0 === $this->nbLoadedItemsSinceLastFlush => false,
114 | 0 === ($this->nbLoadedItemsSinceLastFlush % $this->options->flushFrequency) => true,
115 | default => false,
116 | };
117 | }
118 |
119 | /**
120 | * @internal
121 | */
122 | public function withUpdatedItemKey(mixed $key): self
123 | {
124 | return $this->cloneWith([
125 | 'currentItemKey' => $key,
126 | 'currentItemIndex' => $this->currentItemIndex + 1,
127 | 'nbExtractedItems' => $this->nbExtractedItems + 1,
128 | ]);
129 | }
130 |
131 | /**
132 | * @internal
133 | */
134 | public function withIncrementedNbLoadedItems(): self
135 | {
136 | return $this->cloneWith([
137 | 'nbLoadedItems' => $this->nbLoadedItems + 1,
138 | 'nbLoadedItemsSinceLastFlush' => $this->nbLoadedItemsSinceLastFlush + 1,
139 | ]);
140 | }
141 |
142 | /**
143 | * @internal
144 | */
145 | public function withNbTotalItems(?int $nbTotalItems): self
146 | {
147 | return $this->cloneWith(['nbTotalItems' => $nbTotalItems]);
148 | }
149 |
150 | /**
151 | * @internal
152 | */
153 | public function withOutput(mixed $output): self
154 | {
155 | return $this->cloneWith(['output' => $output]);
156 | }
157 |
158 | /**
159 | * @internal
160 | */
161 | public function withClearedFlush(): self
162 | {
163 | return $this->cloneWith([
164 | 'earlyFlush' => false,
165 | 'nbLoadedItemsSinceLastFlush' => 0,
166 | ]);
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/src/EventDispatcher/Event/BeforeLoadEvent.php:
--------------------------------------------------------------------------------
1 | exception = null;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/EventDispatcher/Event/InitEvent.php:
--------------------------------------------------------------------------------
1 | exception = null;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/EventDispatcher/Event/StartEvent.php:
--------------------------------------------------------------------------------
1 | exception = null;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/EventDispatcher/EventDispatcher.php:
--------------------------------------------------------------------------------
1 | listenerProvider->getListenersForEvent($event);
26 | $isStoppable = $event instanceof StoppableEventInterface;
27 |
28 | foreach ($listeners as $callback) {
29 | if ($isStoppable && $event->isPropagationStopped()) {
30 | break;
31 | }
32 |
33 | $callback($event);
34 | }
35 |
36 | return $event;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/EventDispatcher/PrioritizedListenerProvider.php:
--------------------------------------------------------------------------------
1 | >>
16 | */
17 | private array $prioritizedListeners = [];
18 |
19 | /**
20 | * @var array>
21 | */
22 | private array $flattenedListeners = [];
23 |
24 | public function listenTo(string $eventClass, callable $callback, int $priority = 0): void
25 | {
26 | $this->prioritizedListeners[$eventClass][$priority][] = $callback;
27 | krsort($this->prioritizedListeners[$eventClass]);
28 | $this->flattenedListeners[$eventClass] = array_merge(...$this->prioritizedListeners[$eventClass]);
29 | }
30 |
31 | public function hasListeners(string $eventClass): bool
32 | {
33 | return isset($this->flattenedListeners[$eventClass]);
34 | }
35 |
36 | /**
37 | * @return iterable
38 | */
39 | public function getListenersForEvent(object $event): iterable
40 | {
41 | return $this->flattenedListeners[$event::class] ?? [];
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/EventDispatcher/StoppableEventTrait.php:
--------------------------------------------------------------------------------
1 | propagationStopped = true;
14 | }
15 |
16 | final public function isPropagationStopped(): bool
17 | {
18 | return true === $this->propagationStopped;
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/Exception/EtlException.php:
--------------------------------------------------------------------------------
1 | dispatch(new ExtractExceptionEvent($state->getLastVersion(), $exception))->exception;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/Exception/FlushException.php:
--------------------------------------------------------------------------------
1 | dispatch(new FlushExceptionEvent($state, $exception))->exception;
21 |
22 | if ($exception) {
23 | throw $exception;
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/Exception/LoadException.php:
--------------------------------------------------------------------------------
1 | dispatch(new LoadExceptionEvent($state->getLastVersion(), $exception))->exception;
21 |
22 | if ($exception) {
23 | throw $exception;
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/Exception/SkipRequest.php:
--------------------------------------------------------------------------------
1 | dispatch(new TransformExceptionEvent($state->getLastVersion(), $exception))->exception;
21 |
22 | if ($exception) {
23 | throw $exception;
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/Extractor/CSVExtractor.php:
--------------------------------------------------------------------------------
1 | source ?? $this->content;
29 |
30 | if (!is_string($content)) {
31 | throw new ExtractException('Invalid source.');
32 | }
33 |
34 | if (str_starts_with($content, 'file://')) {
35 | $iterator = (new FileExtractor(substr($content, 7), $this->options))->extract($state);
36 | } else {
37 | $iterator = (new TextLinesExtractor($content, $this->options))->extract($state);
38 | }
39 |
40 | return new CSVIterator($iterator, $this->options);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/Extractor/CallableExtractor.php:
--------------------------------------------------------------------------------
1 | closure)($state);
23 |
24 | if (null === $extracted) {
25 | return new EmptyIterator();
26 | }
27 |
28 | if (!is_iterable($extracted)) {
29 | return [$extracted];
30 | }
31 |
32 | return $extracted;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/Extractor/ChainExtractor.php:
--------------------------------------------------------------------------------
1 | $_extractor) {
22 | if (!$_extractor instanceof ExtractorInterface) {
23 | $extractors[$e] = new CallableExtractor($_extractor(...));
24 | }
25 | }
26 | $this->extractors = $extractors;
27 | }
28 |
29 | public function with(
30 | ExtractorInterface|callable $extractor,
31 | ExtractorInterface|callable ...$extractors,
32 | ): self {
33 | return new self(...[...$this->extractors, $extractor, ...$extractors]);
34 | }
35 |
36 | public function extract(EtlState $state): iterable
37 | {
38 | foreach ($this->extractors as $extractor) {
39 | foreach ($extractor->extract($state) as $item) {
40 | yield $item;
41 | }
42 | }
43 | }
44 |
45 | public static function from(ExtractorInterface $extractor): self
46 | {
47 | return match ($extractor instanceof self) {
48 | true => $extractor,
49 | false => new self($extractor),
50 | };
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/Extractor/ExtractorInterface.php:
--------------------------------------------------------------------------------
1 | source ?? $this->file;
28 |
29 | return new FileIterator($this->resolveFile($file), $this->options);
30 | }
31 |
32 | private function resolveFile(mixed $file): SplFileObject
33 | {
34 | return match (true) {
35 | $file instanceof SplFileObject => $file,
36 | is_string($file) => new SplFileObject($file),
37 | default => throw new ExtractException('Invalid file.'),
38 | };
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/Extractor/IterableExtractor.php:
--------------------------------------------------------------------------------
1 | $source
17 | */
18 | public function __construct(
19 | public iterable $source = new EmptyIterator(),
20 | ) {
21 | }
22 |
23 | public function extract(EtlState $state): iterable
24 | {
25 | $source = $state->source ?? $this->source;
26 |
27 | if (!is_iterable($source)) {
28 | throw new ExtractException('Provided source is not iterable.');
29 | }
30 |
31 | return $source;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/Extractor/IterableExtractorInterface.php:
--------------------------------------------------------------------------------
1 |
13 | */
14 | public function extract(EtlState $state): iterable;
15 | }
16 |
--------------------------------------------------------------------------------
/src/Extractor/JSONExtractor.php:
--------------------------------------------------------------------------------
1 | source ?? $this->source;
26 |
27 | $source = $this->resolveFile($source);
28 | if ($source instanceof SplFileObject) {
29 | $content = $source->fread($source->getSize());
30 | }
31 |
32 | if (is_string($content)) {
33 | $content = json_decode($content, true);
34 | }
35 |
36 | if (null === $content) {
37 | return new EmptyIterator();
38 | }
39 |
40 | if (!is_iterable($content)) {
41 | throw new ExtractException('Provided JSON is not iterable.');
42 | }
43 |
44 | yield from $content;
45 | }
46 |
47 | private function resolveFile(mixed $source): ?SplFileObject
48 | {
49 | return match (true) {
50 | $source instanceof SplFileObject => $source,
51 | is_string($source) && str_starts_with($source, 'file://') => new SplFileObject(substr($source, 7)),
52 | default => null,
53 | };
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/Extractor/ReactStreamExtractor.php:
--------------------------------------------------------------------------------
1 | |ReadableStreamInterface|null $stream
15 | */
16 | public function __construct(
17 | public ReadableStreamInterface|iterable|null $stream = null,
18 | ) {
19 | }
20 |
21 | public function extract(EtlState $state): ReadableStreamInterface
22 | {
23 | return $this->ensureStream($state->source ?? $this->stream);
24 | }
25 |
26 | /**
27 | * @param iterable|ReadableStreamInterface $items
28 | */
29 | private function ensureStream(iterable|ReadableStreamInterface $items): ReadableStreamInterface
30 | {
31 | return $items instanceof ReadableStreamInterface ? $items : new IteratorStream($items);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/Extractor/STDINExtractor.php:
--------------------------------------------------------------------------------
1 |
13 | */
14 | final class STDINExtractor implements Iterator, IterableExtractorInterface
15 | {
16 | private SplFileObject $stdIn;
17 |
18 | public function current(): string|false
19 | {
20 | return $this->stdIn->current();
21 | }
22 |
23 | public function next(): void
24 | {
25 | $this->stdIn->next();
26 | }
27 |
28 | public function key(): int
29 | {
30 | return $this->stdIn->key();
31 | }
32 |
33 | public function valid(): bool
34 | {
35 | return $this->stdIn->valid();
36 | }
37 |
38 | public function rewind(): void
39 | {
40 | $this->stdIn = new SplFileObject('php://stdin');
41 | $this->stdIn->setFlags(SplFileObject::DROP_NEW_LINE);
42 | }
43 |
44 | public function extract(EtlState $state): iterable
45 | {
46 | yield from $this;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/Extractor/TextLinesExtractor.php:
--------------------------------------------------------------------------------
1 | setIgnoreUndefined();
29 | $resolver->setDefaults(['skipEmptyLines' => true]);
30 | $resolver->setAllowedTypes('skipEmptyLines', 'bool');
31 | $this->options = $resolver->resolve($options);
32 | }
33 |
34 | public function extract(EtlState $state): StrTokIterator|PregSplitIterator|EmptyIterator
35 | {
36 | $content = $state->source ?? $this->content;
37 |
38 | if (null === $content) {
39 | return new EmptyIterator();
40 | }
41 |
42 | if ($this->options['skipEmptyLines']) {
43 | return new StrTokIterator($content);
44 | }
45 |
46 | return new PregSplitIterator($content);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/Internal/ClonableTrait.php:
--------------------------------------------------------------------------------
1 | $cloneArgs
26 | */
27 | public function cloneWith(array $cloneArgs = []): static
28 | {
29 | static $refl, $notPromotedWritablePropNames, $constructorParamNames;
30 | $refl ??= new ReflectionClass($this);
31 | $constructorParamNames ??= array_column($refl->getConstructor()->getParameters(), 'name');
32 | $notPromotedWritablePropNames ??= array_column(
33 | array_filter(
34 | $refl->getProperties(),
35 | fn (ReflectionProperty $property) => !$property->isReadOnly() && !$property->isPromoted(),
36 | ),
37 | 'name'
38 | );
39 |
40 | $clone = new static(...array_fill_from($constructorParamNames, get_object_vars($this), $cloneArgs));
41 | $notPromotedProps = array_fill_from($notPromotedWritablePropNames, get_object_vars($this), $cloneArgs);
42 | foreach ($notPromotedProps as $prop => $value) {
43 | $clone->{$prop} = $value;
44 | }
45 |
46 | return $clone;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/Internal/ConditionalLoaderTrait.php:
--------------------------------------------------------------------------------
1 | supports($item, $state);
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/Internal/DispatchEventsTrait.php:
--------------------------------------------------------------------------------
1 | eventDispatcher->dispatch($event);
32 |
33 | return $event;
34 | }
35 |
36 | /**
37 | * @template E of Event
38 | *
39 | * @param class-string $eventClass
40 | *
41 | * @return E|null
42 | */
43 | private function emit(string $eventClass, EtlState $state, mixed ...$args): ?Event
44 | {
45 | if (!$this->listenerProvider->hasListeners($eventClass)) {
46 | return null;
47 | }
48 |
49 | return $this->dispatch(new $eventClass($state, ...$args));
50 | }
51 |
52 | private function emitExtractEvent(EtlState $state, mixed $item): mixed
53 | {
54 | $event = $this->emit(ExtractEvent::class, $state, $item);
55 |
56 | return $event?->item ?? $item;
57 | }
58 |
59 | private function emitTransformEvent(EtlState $state, TransformResult $transformResult): TransformResult
60 | {
61 | $event = $this->emit(TransformEvent::class, $state, $transformResult);
62 |
63 | return TransformResult::create($event?->transformResult ?? $transformResult);
64 | }
65 |
66 | private function emitBeforeLoadEvent(EtlState $state, mixed $item): mixed
67 | {
68 | $event = $this->emit(BeforeLoadEvent::class, $state, $item);
69 |
70 | return $event?->item ?? $item;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/Internal/EtlBuilderTrait.php:
--------------------------------------------------------------------------------
1 |
33 | */
34 | use EtlEventListenersTrait;
35 |
36 | public function extractFrom(
37 | ExtractorInterface|callable $extractor,
38 | ExtractorInterface|callable ...$extractors
39 | ): self {
40 | $extractors = [$extractor, ...$extractors];
41 |
42 | foreach ($extractors as $e => $_extractor) {
43 | if (!$_extractor instanceof ExtractorInterface) {
44 | $extractors[$e] = new CallableExtractor($_extractor(...));
45 | }
46 | }
47 |
48 | if (count($extractors) > 1) {
49 | return $this->cloneWith(['extractor' => new ChainExtractor(...$extractors)]);
50 | }
51 |
52 | return $this->cloneWith(['extractor' => $extractors[0]]);
53 | }
54 |
55 | public function transformWith(
56 | TransformerInterface|callable $transformer,
57 | TransformerInterface|callable ...$transformers
58 | ): self {
59 | $transformers = [$transformer, ...$transformers];
60 |
61 | foreach ($transformers as $t => $_transformer) {
62 | if (!$_transformer instanceof TransformerInterface) {
63 | $transformers[$t] = new CallableTransformer($_transformer(...));
64 | }
65 | }
66 |
67 | if (count($transformers) > 1) {
68 | return $this->cloneWith(['transformer' => new ChainTransformer(...$transformers)]);
69 | }
70 |
71 | return $this->cloneWith(['transformer' => $transformers[0]]);
72 | }
73 |
74 | public function loadInto(LoaderInterface|callable $loader, LoaderInterface|callable ...$loaders): self
75 | {
76 | $loaders = [$loader, ...$loaders];
77 |
78 | foreach ($loaders as $l => $_loader) {
79 | if (!$_loader instanceof LoaderInterface) {
80 | $loaders[$l] = new CallableLoader($_loader(...));
81 | }
82 | }
83 |
84 | if (count($loaders) > 1) {
85 | return $this->cloneWith(['loader' => new ChainLoader(...$loaders)]);
86 | }
87 |
88 | return $this->cloneWith(['loader' => $loaders[0]]);
89 | }
90 |
91 | public function withOptions(EtlConfiguration $configuration): self
92 | {
93 | return $this->cloneWith(['options' => $configuration]);
94 | }
95 |
96 | public function withRecipe(Recipe|callable $recipe, Recipe|callable ...$recipes): self
97 | {
98 | $executor = $this;
99 | foreach ([$recipe, ...$recipes] as $_recipe) {
100 | if (!$_recipe instanceof Recipe) {
101 | $_recipe = Recipe::fromCallable($_recipe);
102 | }
103 | $executor = $_recipe->decorate($executor);
104 | }
105 |
106 | return $executor;
107 | }
108 |
109 | public function withProcessor(ProcessorInterface $processor): self
110 | {
111 | return $this->cloneWith(['processor' => $processor]);
112 | }
113 |
114 | /**
115 | * @param array $context
116 | */
117 | public function withContext(array $context, bool $clear = false, bool $overwrite = true): self
118 | {
119 | return $this->cloneWith(['context' => [
120 | ...($clear ? [] : $this->context),
121 | ...$context,
122 | ...($overwrite ? [] : array_intersect_key($this->context, $context)),
123 | ]]);
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/src/Internal/EtlEventListenersTrait.php:
--------------------------------------------------------------------------------
1 | listenTo(InitEvent::class, $callback, $priority);
36 | }
37 |
38 | /**
39 | * @param callable(StartEvent): void $callback
40 | */
41 | public function onStart(callable $callback, int $priority = 0): self
42 | {
43 | return $this->listenTo(StartEvent::class, $callback, $priority);
44 | }
45 |
46 | /**
47 | * @param callable(ExtractEvent): void $callback
48 | */
49 | public function onExtract(callable $callback, int $priority = 0): self
50 | {
51 | return $this->listenTo(ExtractEvent::class, $callback, $priority);
52 | }
53 |
54 | /**
55 | * @param callable(ExtractExceptionEvent): void $callback
56 | */
57 | public function onExtractException(callable $callback, int $priority = 0): self
58 | {
59 | return $this->listenTo(ExtractExceptionEvent::class, $callback, $priority);
60 | }
61 |
62 | /**
63 | * @param callable(TransformEvent): void $callback
64 | */
65 | public function onTransform(callable $callback, int $priority = 0): self
66 | {
67 | return $this->listenTo(TransformEvent::class, $callback, $priority);
68 | }
69 |
70 | /**
71 | * @param callable(TransformExceptionEvent): void $callback
72 | */
73 | public function onTransformException(callable $callback, int $priority = 0): self
74 | {
75 | return $this->listenTo(TransformExceptionEvent::class, $callback, $priority);
76 | }
77 |
78 | /**
79 | * @param callable(BeforeLoadEvent): void $callback
80 | */
81 | public function onBeforeLoad(callable $callback, int $priority = 0): self
82 | {
83 | return $this->listenTo(BeforeLoadEvent::class, $callback, $priority);
84 | }
85 |
86 | /**
87 | * @param callable(LoadEvent): void $callback
88 | */
89 | public function onLoad(callable $callback, int $priority = 0): self
90 | {
91 | return $this->listenTo(LoadEvent::class, $callback, $priority);
92 | }
93 |
94 | /**
95 | * @param callable(LoadExceptionEvent): void $callback
96 | */
97 | public function onLoadException(callable $callback, int $priority = 0): self
98 | {
99 | return $this->listenTo(LoadExceptionEvent::class, $callback, $priority);
100 | }
101 |
102 | /**
103 | * @param callable(FlushEvent): void $callback
104 | */
105 | public function onFlush(callable $callback, int $priority = 0): self
106 | {
107 | return $this->listenTo(FlushEvent::class, $callback, $priority);
108 | }
109 |
110 | /**
111 | * @param callable(FlushExceptionEvent): void $callback
112 | */
113 | public function onFlushException(callable $callback, int $priority = 0): self
114 | {
115 | return $this->listenTo(FlushExceptionEvent::class, $callback, $priority);
116 | }
117 |
118 | /**
119 | * @param callable(EndEvent): void $callback
120 | */
121 | public function onEnd(callable $callback, int $priority = 0): self
122 | {
123 | return $this->listenTo(EndEvent::class, $callback, $priority);
124 | }
125 |
126 | private function listenTo(string $eventClass, callable $callback, int $priority = 0): self
127 | {
128 | $clone = $this->cloneWith();
129 | $clone->listenerProvider->listenTo($eventClass, $callback, $priority);
130 |
131 | return $clone;
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/src/Internal/StateHolder.php:
--------------------------------------------------------------------------------
1 |
15 | */
16 | final class TransformResult implements IteratorAggregate
17 | {
18 | public mixed $value;
19 | public bool $iterable;
20 |
21 | private function __construct()
22 | {
23 | }
24 |
25 | public function getIterator(): Traversable
26 | {
27 | if ($this->iterable) {
28 | yield from $this->value;
29 | } else {
30 | yield $this->value;
31 | }
32 | }
33 |
34 | public static function create(mixed $value): self
35 | {
36 | static $prototype;
37 | $prototype ??= new self();
38 |
39 | if ($value instanceof self) {
40 | return $value;
41 | }
42 |
43 | $that = clone $prototype;
44 | if ($value instanceof Generator) {
45 | $that->value = [...$value];
46 | $that->iterable = true;
47 | } else {
48 | $that->value = $value;
49 | $that->iterable = false;
50 | }
51 |
52 | return $that;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/Iterator/CSVIterator.php:
--------------------------------------------------------------------------------
1 | >
27 | */
28 | final readonly class CSVIterator implements IteratorAggregate
29 | {
30 | /**
31 | * @var array{
32 | * delimiter: string,
33 | * enclosure: string,
34 | * escapeString: string,
35 | * columns: 'auto'|string[]|null,
36 | * normalizers: ValueNormalizerInterface[],
37 | * skipFirstRow: bool,
38 | * }
39 | */
40 | private array $options;
41 |
42 | /**
43 | * @param Traversable $text
44 | * @param array{
45 | * delimiter?: string,
46 | * enclosure?: string,
47 | * escapeString?: string,
48 | * columns?: 'auto'|string[]|null,
49 | * normalizers?: ValueNormalizerInterface[],
50 | * skipFirstRow?: bool,
51 | * } $options
52 | */
53 | public function __construct(
54 | private Traversable $text,
55 | array $options = [],
56 | ) {
57 | $resolver = (new OptionsResolver())->setIgnoreUndefined();
58 | $resolver->setDefaults([
59 | 'delimiter' => ',',
60 | 'enclosure' => '"',
61 | 'escapeString' => '\\',
62 | 'columns' => null,
63 | 'normalizers' => [
64 | new NumericStringToNumberNormalizer(),
65 | new EmptyStringToNullNormalizer(),
66 | ],
67 | 'skipFirstRow' => false,
68 | ]);
69 | $resolver->setAllowedTypes('delimiter', 'string');
70 | $resolver->setAllowedTypes('enclosure', 'string');
71 | $resolver->setAllowedTypes('escapeString', 'string');
72 | $resolver->setAllowedTypes('normalizers', ValueNormalizerInterface::class.'[]');
73 | $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']);
74 | $resolver->setAllowedValues('columns', function (array|string|null $value) {
75 | return 'auto' === $value || null === $value || is_array($value);
76 | });
77 | $resolver->setAllowedTypes('skipFirstRow', 'bool');
78 | $this->options = $resolver->resolve($options);
79 | }
80 |
81 | /**
82 | * @param array $data
83 | * @param list|null $columns
84 | *
85 | * @return array|string[]
86 | */
87 | private function extract(array $data, ?array $columns): array
88 | {
89 | if ($this->options['normalizers']) {
90 | array_walk($data, function (&$value) {
91 | foreach ($this->options['normalizers'] as $normalizer) {
92 | $value = $normalizer->normalize($value);
93 | }
94 |
95 | return $value;
96 | });
97 | }
98 |
99 | return !empty($columns) ? self::combine($columns, $data) : $data;
100 | }
101 |
102 | public function getIterator(): Traversable
103 | {
104 | if ($this->text instanceof SplFileObject) {
105 | return $this->iterateFromFile($this->text);
106 | }
107 |
108 | return $this->iterateFromContent($this->text);
109 | }
110 |
111 | private function shouldSkipFirstRow(): bool
112 | {
113 | return $this->options['skipFirstRow'] || 'auto' === $this->options['columns'];
114 | }
115 |
116 | /**
117 | * @return Traversable
118 | */
119 | private function iterateFromFile(SplFileObject $file): Traversable
120 | {
121 | $flags = [SplFileObject::READ_CSV, $file->getFlags()];
122 | $file->setFlags(array_reduce($flags, fn ($a, $b) => $a | $b, 0));
123 | $columns = $this->options['columns'];
124 | if ('auto' === $columns) {
125 | $columns = null;
126 | }
127 | while (!$file->eof()) {
128 | $fields = $file->fgetcsv(
129 | $this->options['delimiter'],
130 | $this->options['enclosure'],
131 | $this->options['escapeString'],
132 | );
133 | if ([null] === $fields) {
134 | continue;
135 | }
136 | if (0 === $file->key() && $this->shouldSkipFirstRow()) {
137 | $columns ??= $fields;
138 | continue;
139 | }
140 |
141 | yield $this->extract($fields, $columns);
142 | }
143 | }
144 |
145 | /**
146 | * @param Traversable $content
147 | *
148 | * @return Traversable
149 | */
150 | private function iterateFromContent(Traversable $content): Traversable
151 | {
152 | $columns = $this->options['columns'];
153 | if ('auto' === $columns) {
154 | $columns = null;
155 | }
156 | foreach ($content as $r => $row) {
157 | $fields = str_getcsv(
158 | $row,
159 | $this->options['delimiter'],
160 | $this->options['enclosure'],
161 | $this->options['escapeString'],
162 | );
163 | if (0 === $r && $this->shouldSkipFirstRow()) {
164 | $columns ??= $fields;
165 | continue;
166 | }
167 | yield $this->extract($fields, $columns);
168 | }
169 | }
170 |
171 | /**
172 | * @param string[] $keys
173 | * @param string[] $values
174 | *
175 | * @return string[]
176 | */
177 | private static function combine(array $keys, array $values): array
178 | {
179 | $nbKeys = count($keys);
180 | $nbValues = count($values);
181 |
182 | if ($nbKeys < $nbValues) {
183 | return array_combine($keys, array_slice(array_values($values), 0, $nbKeys));
184 | }
185 |
186 | if ($nbKeys > $nbValues) {
187 | return array_combine($keys, array_merge($values, array_fill(0, $nbKeys - $nbValues, null)));
188 | }
189 |
190 | return array_combine($keys, $values);
191 | }
192 | }
193 |
--------------------------------------------------------------------------------
/src/Iterator/ConsumableIterator.php:
--------------------------------------------------------------------------------
1 | $items
25 | */
26 | public function __construct(iterable $items)
27 | {
28 | $this->iterator = iterable_to_iterator($items);
29 | }
30 |
31 | public function consume(): mixed
32 | {
33 | if ($this->ended) {
34 | throw new OutOfRangeException('This iterator has no more items.'); // @codeCoverageIgnore
35 | }
36 |
37 | if (!$this->started) {
38 | $this->iterator->rewind();
39 | $this->started = true;
40 | }
41 |
42 | $value = $this->iterator->current();
43 | $this->iterator->next();
44 |
45 | if (!$this->iterator->valid()) {
46 | $this->ended = true;
47 | }
48 |
49 | return $value;
50 | }
51 |
52 | public function isComplete(): bool
53 | {
54 | return $this->ended;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/Iterator/FileIterator.php:
--------------------------------------------------------------------------------
1 |
18 | */
19 | final readonly class FileIterator implements IteratorAggregate
20 | {
21 | /**
22 | * @var array{skipEmptyLines: bool}
23 | */
24 | private array $options;
25 |
26 | /**
27 | * @param array{skipEmptyLines?: bool} $options
28 | */
29 | public function __construct(
30 | private SplFileObject $file,
31 | array $options = [],
32 | ) {
33 | $resolver = (new OptionsResolver())->setIgnoreUndefined();
34 | $resolver->setDefaults(['skipEmptyLines' => true]);
35 | $resolver->setAllowedTypes('skipEmptyLines', 'bool');
36 | $this->options = $resolver->resolve($options);
37 | }
38 |
39 | public function getIterator(): Traversable
40 | {
41 | foreach ($this->file as $row) {
42 | $line = rtrim($row, PHP_EOL);
43 | if ($this->options['skipEmptyLines'] && '' === $line) {
44 | continue;
45 | }
46 | yield $line;
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/Iterator/IteratorStream.php:
--------------------------------------------------------------------------------
1 |
24 | */
25 | public readonly ConsumableIterator $iterator;
26 | public bool $paused = false;
27 |
28 | /**
29 | * @param iterable $items
30 | */
31 | public function __construct(iterable $items)
32 | {
33 | $this->iterator = new ConsumableIterator($items);
34 | $this->resume();
35 | }
36 |
37 | public function isReadable(): bool
38 | {
39 | return !$this->iterator->isComplete();
40 | }
41 |
42 | public function pause(): void
43 | {
44 | $this->paused = true;
45 | }
46 |
47 | public function resume(): void
48 | {
49 | $this->paused = false;
50 | $this->process();
51 | }
52 |
53 | private function process(): void
54 | {
55 | if (!$this->iterator->isComplete()) {
56 | Loop::futureTick(function () {
57 | if (!$this->paused) {
58 | $this->emit('data', [$this->iterator->consume()]);
59 | }
60 | $this->process();
61 | });
62 | } else {
63 | $this->emit('end');
64 | $this->close();
65 | }
66 | }
67 |
68 | /**
69 | * @param array $options
70 | */
71 | public function pipe(WritableStreamInterface $dest, array $options = []): WritableStreamInterface
72 | {
73 | Util::pipe($this, $dest, $options);
74 |
75 | return $dest;
76 | }
77 |
78 | public function close(): void
79 | {
80 | $this->emit('close');
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/src/Iterator/PregSplitIterator.php:
--------------------------------------------------------------------------------
1 |
17 | */
18 | final readonly class PregSplitIterator implements IteratorAggregate
19 | {
20 | public function __construct(
21 | public string $content,
22 | ) {
23 | }
24 |
25 | public function getIterator(): Traversable
26 | {
27 | $lines = preg_split("/((\r?\n)|(\r\n?))/", $this->content);
28 | foreach ($lines as $line) {
29 | yield rtrim($line, PHP_EOL);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Iterator/StrTokIterator.php:
--------------------------------------------------------------------------------
1 |
19 | */
20 | final readonly class StrTokIterator implements IteratorAggregate
21 | {
22 | public function __construct(
23 | public string $content,
24 | ) {
25 | }
26 |
27 | public function getIterator(): Traversable
28 | {
29 | $tok = strtok($this->content, "\r\n");
30 | while (false !== $tok) {
31 | $line = $tok;
32 | $tok = strtok("\n\r");
33 | yield rtrim($line, PHP_EOL);
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/Loader/CSVLoader.php:
--------------------------------------------------------------------------------
1 | setIgnoreUndefined();
35 | $resolver->setDefaults([
36 | 'delimiter' => ',',
37 | 'enclosure' => '"',
38 | 'escapeString' => '\\',
39 | 'columns' => null,
40 | 'eol' => PHP_EOL,
41 | ]);
42 | $resolver->setAllowedTypes('delimiter', 'string');
43 | $resolver->setAllowedTypes('enclosure', 'string');
44 | $resolver->setAllowedTypes('escapeString', 'string');
45 | $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']);
46 | $resolver->setAllowedValues('columns', function (array|string|null $value) {
47 | return 'auto' === $value || null === $value || is_array($value);
48 | });
49 | $resolver->setAllowedTypes('eol', 'string');
50 | $this->options = $resolver->resolve($options);
51 | }
52 |
53 | public function load(mixed $item, EtlState $state): void
54 | {
55 | $context = &$state->context[__CLASS__];
56 | $context['columsWritten'] ??= false;
57 |
58 | if (!$context['columsWritten']) {
59 | if (is_array($this->options['columns'])) {
60 | $context['pending'][] = $this->options['columns'];
61 | $context['columsWritten'] = true;
62 | } elseif ('auto' === $this->options['columns']) {
63 | $context['pending'][] = array_keys($item);
64 | $context['columsWritten'] = true;
65 | }
66 | }
67 |
68 | $context['pending'][] = $item;
69 | }
70 |
71 | public function flush(bool $isPartial, EtlState $state): string
72 | {
73 | $context = &$state->context[__CLASS__];
74 | $context['pending'] ??= [];
75 | $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination);
76 | foreach ($context['pending'] as $item) {
77 | $this->write($file, $item);
78 | }
79 |
80 | $context['pending'] = [];
81 |
82 | if (!$isPartial && $file instanceof SplTempFileObject) {
83 | $file->rewind();
84 |
85 | return implode('', [...$file]); // @phpstan-ignore-line
86 | }
87 |
88 | return 'file://'.$file->getPathname();
89 | }
90 |
91 | /**
92 | * @param array $item
93 | */
94 | private function write(SplFileObject $file, array $item): void
95 | {
96 | $options = $this->options;
97 | $file->fputcsv($item, $options['delimiter'], $options['enclosure'], $options['escapeString'], $options['eol']);
98 | }
99 |
100 | private function resolveDestination(mixed $destination): SplFileObject
101 | {
102 | $isFileName = is_string($destination) && str_starts_with($destination, 'file://');
103 |
104 | return match (true) {
105 | $destination instanceof SplFileObject => $destination,
106 | $isFileName => new SplFileObject(substr($destination, 7), 'w'),
107 | null === $destination => new SplTempFileObject(),
108 | default => throw new LoadException('Invalid destination.'),
109 | };
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/src/Loader/CallableLoader.php:
--------------------------------------------------------------------------------
1 | destination ?? $this->closure;
23 | if (!is_callable($callback)) {
24 | throw new LoadException('Invalid destination.');
25 | }
26 | $state->context[__CLASS__]['loaded'][] = $callback($item, $state);
27 | }
28 |
29 | /**
30 | * @codeCoverageIgnore
31 | */
32 | public function flush(bool $isPartial, EtlState $state): mixed
33 | {
34 | foreach ($state->context[__CLASS__]['loaded'] ?? [] as $i => $item) {
35 | $state->context[__CLASS__]['output'][] = $item;
36 | unset($state->context[__CLASS__]['loaded'][$i]);
37 | }
38 |
39 | return $state->context[__CLASS__]['output'] ?? [];
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Loader/ChainLoader.php:
--------------------------------------------------------------------------------
1 | $_loader) {
25 | if (!$_loader instanceof LoaderInterface) {
26 | $loaders[$l] = new CallableLoader($_loader(...));
27 | }
28 | }
29 | $this->loaders = $loaders;
30 | }
31 |
32 | public function with(
33 | LoaderInterface|callable $loader,
34 | LoaderInterface|callable ...$loaders,
35 | ): self {
36 | return new self(...[...$this->loaders, $loader, ...$loaders]);
37 | }
38 |
39 | public function load(mixed $item, EtlState $state): void
40 | {
41 | foreach ($this->loaders as $loader) {
42 | if (self::shouldLoad($loader, $item, $state)) {
43 | $loader->load($item, $state);
44 | }
45 | }
46 | }
47 |
48 | public function flush(bool $isPartial, EtlState $state): mixed
49 | {
50 | foreach ($this->loaders as $loader) {
51 | $output = $loader->flush($isPartial, $state);
52 | }
53 |
54 | return $output ?? null;
55 | }
56 |
57 | public static function from(LoaderInterface $loader): self
58 | {
59 | return match ($loader instanceof self) {
60 | true => $loader,
61 | false => new self($loader),
62 | };
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/Loader/ConditionalLoaderInterface.php:
--------------------------------------------------------------------------------
1 | managerRegistry->getManagerForClass($item::class)
30 | ?? throw new LoadException(sprintf('Could not find manager for class %s.', $item::class));
31 |
32 | $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage();
33 | $managers->attach($manager);
34 | $manager->persist($item);
35 | }
36 |
37 | public function flush(bool $isPartial, EtlState $state): null
38 | {
39 | $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage();
40 | foreach ($managers as $manager) {
41 | $manager->flush();
42 | $managers->detach($manager);
43 | }
44 |
45 | return null;
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/Loader/InMemoryLoader.php:
--------------------------------------------------------------------------------
1 | context['pending'][] = $item;
16 | }
17 |
18 | /**
19 | * @return list>
20 | */
21 | public function flush(bool $isPartial, EtlState $state): array
22 | {
23 | $state->context['batchNumber'] ??= 0;
24 | foreach ($state->context['pending'] as $key => $value) {
25 | $state->context['batches'][$state->context['batchNumber']][] = $value;
26 | }
27 | $state->context['pending'] = [];
28 | ++$state->context['batchNumber'];
29 |
30 | return array_merge(...$state->context['batches'] ?? []);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Loader/JSONLoader.php:
--------------------------------------------------------------------------------
1 | context[__CLASS__]['pending'][] = $item;
31 | }
32 |
33 | public function flush(bool $isPartial, EtlState $state): string
34 | {
35 | $context = &$state->context[__CLASS__];
36 | $context['hasStarted'] ??= false;
37 | $context['pending'] ??= [];
38 |
39 | $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination);
40 | // $this->writeOpeningBracketIfNotDoneYet($state, $file);
41 | match ($isPartial) {
42 | true => $this->earlyFlush($state, $file),
43 | false => $this->finalFlush($state, $file),
44 | };
45 | $context['pending'] = [];
46 |
47 | if (!$isPartial && $file instanceof SplTempFileObject) {
48 | $file->rewind();
49 |
50 | return implode('', [...$file]); // @phpstan-ignore-line
51 | }
52 |
53 | return 'file://'.$file->getPathname();
54 | }
55 |
56 | private function earlyFlush(EtlState $state, SplFileObject $file): void
57 | {
58 | $context = &$state->context[__CLASS__];
59 | $serialized = json_encode($context['pending'], JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
60 | $serialized = ltrim($serialized, '[');
61 | $serialized = rtrim($serialized, ']');
62 | $serialized = trim($serialized);
63 |
64 | if (!($context['openingBracket'] ?? false)) {
65 | $file->fwrite('[');
66 | $context['openingBracket'] = true;
67 | $file->fwrite(PHP_EOL.' '.$serialized);
68 | } elseif ([] !== $context['pending']) {
69 | $file->fwrite(',');
70 | $file->fwrite(PHP_EOL.' '.$serialized);
71 | }
72 | }
73 |
74 | private function finalFlush(EtlState $state, SplFileObject $file): void
75 | {
76 | $this->earlyFlush($state, $file);
77 | if ($state->nbLoadedItems > 0) {
78 | $file->fwrite(PHP_EOL);
79 | }
80 | $file->fwrite(']'.PHP_EOL);
81 | }
82 |
83 | private function resolveDestination(mixed $destination): SplFileObject
84 | {
85 | $isFileName = is_string($destination) && str_starts_with($destination, 'file://');
86 |
87 | return match (true) {
88 | $destination instanceof SplFileObject => $destination,
89 | $isFileName => new SplFileObject(substr($destination, 7), 'w'),
90 | null === $destination => new SplTempFileObject(),
91 | default => throw new LoadException('Invalid destination.'),
92 | };
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/Loader/LoaderInterface.php:
--------------------------------------------------------------------------------
1 | context[__CLASS__]['pending'][] = $item;
33 | }
34 |
35 | public function flush(bool $isPartial, EtlState $state): int
36 | {
37 | $pendingItems = $state->context[__CLASS__]['pending'] ?? [];
38 | $state->context[__CLASS__]['resource'] ??= fopen('php://stdout', 'wb+');
39 | $state->context[__CLASS__]['nbWrittenBytes'] ??= 0;
40 | foreach ($pendingItems as $item) {
41 | $state->context[__CLASS__]['nbWrittenBytes'] += fwrite(
42 | $state->context[__CLASS__]['resource'],
43 | $item.$this->eol,
44 | );
45 | }
46 |
47 | $nbWrittenBytes = $state->context[__CLASS__]['nbWrittenBytes'];
48 | if (!$isPartial) {
49 | // fclose($state->context[__CLASS__]['resource']);
50 | unset($state->context[__CLASS__]);
51 | }
52 |
53 | return $nbWrittenBytes;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/Normalizer/EmptyStringToNullNormalizer.php:
--------------------------------------------------------------------------------
1 | $items
28 | */
29 | public function process(EtlExecutor $executor, EtlState $state, mixed $items): EtlState
30 | {
31 | foreach ($this->extract($executor, $state, $items) as $key => $item) {
32 | try {
33 | $executor->processItem($item, $key, $state);
34 | } catch (SkipRequest) {
35 | }
36 | }
37 |
38 | return $state;
39 | }
40 |
41 | /**
42 | * @param iterable $items
43 | */
44 | public function extract(EtlExecutor $executor, EtlState $state, iterable $items): Generator
45 | {
46 | try {
47 | yield from $items;
48 | } catch (Throwable $exception) {
49 | ExtractException::emit($executor, $exception, $state);
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/Processor/ProcessorInterface.php:
--------------------------------------------------------------------------------
1 | on('data', function (mixed $item) use ($executor, &$key, $state, $stream) {
38 | if (is_string($item)) {
39 | $item = trim($item);
40 | }
41 | try {
42 | $executor->processItem($item, ++$key, $state);
43 | } catch (SkipRequest) {
44 | } catch (StopRequest) {
45 | $stream->close();
46 | } catch (Throwable $e) {
47 | $stream->close();
48 | ExtractException::emit($executor, $e, $state);
49 | }
50 | });
51 |
52 | Loop::run();
53 |
54 | return $state->getLastVersion();
55 | }
56 |
57 | public function decorate(EtlExecutor $executor): EtlExecutor
58 | {
59 | return $executor->extractFrom(new ReactStreamExtractor())->withProcessor($this);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/Recipe/FilterRecipe.php:
--------------------------------------------------------------------------------
1 | eventClass, self::EVENTS_CLASSES)) {
27 | throw new InvalidArgumentException(sprintf('Can only filter on ExtractEvent / LoadEvent, not %s', $this->eventClass));
28 | }
29 | }
30 |
31 | public function decorate(EtlExecutor $executor): EtlExecutor
32 | {
33 | return match ($this->eventClass) {
34 | ExtractEvent::class => $executor->onExtract($this(...), $this->priority),
35 | BeforeLoadEvent::class => $executor->onBeforeLoad($this(...), $this->priority),
36 | default => $executor,
37 | };
38 | }
39 |
40 | public function __invoke(ExtractEvent|BeforeLoadEvent $event): void
41 | {
42 | $matchFilter = !($this->filter)($event->item, $event->state);
43 | if (FilterRecipeMode::EXCLUDE === $this->mode) {
44 | $matchFilter = !$matchFilter;
45 | }
46 |
47 | if ($matchFilter) {
48 | $event->state->skip();
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/Recipe/FilterRecipeMode.php:
--------------------------------------------------------------------------------
1 | $logLevels
29 | * @param array $priorities
30 | */
31 | public function __construct(
32 | private readonly LoggerInterface $logger = new NullLogger(),
33 | private readonly array $logLevels = [
34 | StartEvent::class => LogLevel::INFO,
35 | FlushEvent::class => LogLevel::INFO,
36 | EndEvent::class => LogLevel::INFO,
37 | ExtractExceptionEvent::class => LogLevel::ERROR,
38 | TransformExceptionEvent::class => LogLevel::ERROR,
39 | LoadExceptionEvent::class => LogLevel::ERROR,
40 | FlushExceptionEvent::class => LogLevel::ERROR,
41 | ],
42 | private readonly string $defaultLogLevel = LogLevel::DEBUG,
43 | private readonly array $priorities = [],
44 | private readonly int $defaultPriority = -1,
45 | ) {
46 | }
47 |
48 | public function decorate(EtlExecutor $executor): EtlExecutor
49 | {
50 | return $executor
51 | ->onInit(fn (InitEvent $event) => $this->log($event, 'Initializing ETL...', ['state' => $event->state]),
52 | $this->priorities[InitEvent::class] ?? $this->defaultPriority)
53 | ->onStart(fn (StartEvent $event) => $this->log($event, 'Starting ETL...', ['state' => $event->state]),
54 | $this->priorities[StartEvent::class] ?? $this->defaultPriority)
55 | ->onExtract(
56 | fn (ExtractEvent $event) => $this->log(
57 | $event,
58 | 'Extracting item #{key}',
59 | [
60 | 'key' => $event->state->currentItemKey,
61 | 'state' => $event->state,
62 | 'item' => $event->item,
63 | ],
64 | ),
65 | $this->priorities[ExtractEvent::class] ?? $this->defaultPriority,
66 | )
67 | ->onExtractException(
68 | fn (ExtractExceptionEvent $event) => $this->log(
69 | $event,
70 | 'Extract exception on key #{key}: {msg}',
71 | [
72 | 'msg' => $event->exception->getMessage(),
73 | 'key' => $event->state->currentItemKey,
74 | 'state' => $event->state,
75 | ],
76 | ),
77 | $this->priorities[ExtractExceptionEvent::class] ?? $this->defaultPriority,
78 | )
79 | ->onTransform(
80 | fn (TransformEvent $event) => $this->log(
81 | $event,
82 | 'Transformed item #{key}',
83 | [
84 | 'key' => $event->state->currentItemKey,
85 | 'state' => $event->state,
86 | 'items' => $event->transformResult,
87 | ],
88 | ),
89 | $this->priorities[TransformEvent::class] ?? $this->defaultPriority,
90 | )
91 | ->onTransformException(
92 | fn (TransformExceptionEvent $event) => $this->log(
93 | $event,
94 | 'Transform exception on key #{key}: {msg}',
95 | [
96 | 'msg' => $event->exception->getMessage(),
97 | 'key' => $event->state->currentItemKey,
98 | 'state' => $event->state,
99 | ],
100 | ),
101 | $this->priorities[TransformExceptionEvent::class] ?? $this->defaultPriority,
102 | )
103 | ->onLoad(
104 | fn (LoadEvent $event) => $this->log(
105 | $event,
106 | 'Loaded item #{key}',
107 | [
108 | 'key' => $event->state->currentItemKey,
109 | 'state' => $event->state,
110 | 'item' => $event->item,
111 | ],
112 | ),
113 | $this->priorities[LoadEvent::class] ?? $this->defaultPriority,
114 | )
115 | ->onLoadException(
116 | fn (LoadExceptionEvent $event) => $this->log(
117 | $event,
118 | 'Load exception on key #{key}: {msg}',
119 | [
120 | 'msg' => $event->exception->getMessage(),
121 | 'key' => $event->state->currentItemKey,
122 | 'state' => $event->state,
123 | ],
124 | ),
125 | $this->priorities[LoadExceptionEvent::class] ?? $this->defaultPriority,
126 | )
127 | ->onFlush(
128 | fn (FlushEvent $event) => $this->log(
129 | $event,
130 | $event->early ? 'Flushing {nb} items (early)...' : 'Flushing {nb} items...',
131 | [
132 | 'nb' => $event->state->nbLoadedItemsSinceLastFlush,
133 | 'state' => $event->state,
134 | ],
135 | ),
136 | $this->priorities[FlushEvent::class] ?? $this->defaultPriority,
137 | )
138 | ->onFlushException(
139 | fn (FlushExceptionEvent $event) => $this->log(
140 | $event,
141 | 'Flush exception: {msg}',
142 | [
143 | 'msg' => $event->exception->getMessage(),
144 | 'state' => $event->state,
145 | ],
146 | ),
147 | $this->priorities[FlushExceptionEvent::class] ?? $this->defaultPriority,
148 | )
149 | ->onEnd(
150 | fn (EndEvent $event) => $this->log(
151 | $event,
152 | 'ETL complete. {nb} items were loaded in {duration}s.',
153 | [
154 | 'nb' => $event->state->nbLoadedItems,
155 | 'duration' => $event->state->getDuration(),
156 | 'state' => $event->state,
157 | ],
158 | ),
159 | $this->priorities[EndEvent::class] ?? $this->defaultPriority,
160 | );
161 | }
162 |
163 | /**
164 | * @param array $context
165 | */
166 | private function log(Event $event, string|Stringable $message, array $context = []): void
167 | {
168 | $level = $this->logLevels[$event::class] ?? $this->defaultLogLevel;
169 |
170 | $this->logger->log($level, $message, $context);
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/Recipe/Recipe.php:
--------------------------------------------------------------------------------
1 | recipe)($executor);
25 | }
26 | };
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/Transformer/CallableTransformer.php:
--------------------------------------------------------------------------------
1 | closure)($item, $state);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/Transformer/ChainTransformer.php:
--------------------------------------------------------------------------------
1 | $_transformer) {
22 | if (!$_transformer instanceof TransformerInterface) {
23 | $transformers[$t] = new CallableTransformer($_transformer(...));
24 | }
25 | }
26 | $this->transformers = $transformers;
27 | }
28 |
29 | public function with(
30 | TransformerInterface|callable $transformer,
31 | TransformerInterface|callable ...$transformers,
32 | ): self {
33 | return new self(...[...$this->transformers, $transformer, ...$transformers]);
34 | }
35 |
36 | public function transform(mixed $item, EtlState $state): mixed
37 | {
38 | foreach ($this->transformers as $transformer) {
39 | $item = $transformer->transform($item, $state);
40 | }
41 |
42 | return $item;
43 | }
44 |
45 | public static function from(TransformerInterface $transformer): self
46 | {
47 | return match ($transformer instanceof self) {
48 | true => $transformer,
49 | false => new self($transformer),
50 | };
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/Transformer/NullTransformer.php:
--------------------------------------------------------------------------------
1 | $keys
29 | * @param array $values
30 | * @param array ...$extraValues
31 | *
32 | * @return array
33 | *
34 | * @internal
35 | */
36 | function array_fill_from(array $keys, array $values, array ...$extraValues): array
37 | {
38 | $defaults = array_fill_keys($keys, null);
39 | $values = array_replace($values, ...$extraValues);
40 |
41 | return array_intersect_key($values, $defaults);
42 | }
43 |
44 | /**
45 | * @internal
46 | *
47 | * @template T
48 | *
49 | * @param iterable $items
50 | *
51 | * @return Iterator
52 | */
53 | function iterable_to_iterator(iterable $items): Iterator
54 | {
55 | return $items instanceof Iterator ? $items : (fn () => yield from $items)();
56 | }
57 |
58 | function extractFrom(ExtractorInterface|callable $extractor, ExtractorInterface|callable ...$extractors): EtlExecutor
59 | {
60 | return (new EtlExecutor())->extractFrom(...func_get_args());
61 | }
62 |
63 | function transformWith(
64 | TransformerInterface|callable $transformer,
65 | TransformerInterface|callable ...$transformers
66 | ): EtlExecutor {
67 | return (new EtlExecutor())->transformWith(...func_get_args());
68 | }
69 |
70 | function loadInto(LoaderInterface|callable $loader, LoaderInterface|callable ...$loaders): EtlExecutor
71 | {
72 | return (new EtlExecutor())->loadInto(...func_get_args());
73 | }
74 |
75 | function withRecipe(Recipe|callable $recipe): EtlExecutor
76 | {
77 | return (new EtlExecutor())->withRecipe(...func_get_args());
78 | }
79 |
80 | function useReact(): EtlExecutor
81 | {
82 | return withRecipe(new ReactStreamProcessor());
83 | }
84 |
85 | function chain(ExtractorInterface|TransformerInterface|LoaderInterface $service,
86 | ): ChainExtractor|ChainTransformer|ChainLoader {
87 | return match (true) {
88 | $service instanceof ExtractorInterface => ChainExtractor::from($service),
89 | $service instanceof TransformerInterface => ChainTransformer::from($service),
90 | $service instanceof LoaderInterface => ChainLoader::from($service),
91 | };
92 | }
93 |
94 | function stdIn(): STDINExtractor
95 | {
96 | return new STDINExtractor();
97 | }
98 |
99 | function stdOut(): STDOUTLoader
100 | {
101 | return new STDOUTLoader();
102 | }
103 |
104 | function skipWhen(callable $filter, ?string $eventClass = ExtractEvent::class, int $priority = 0): Recipe
105 | {
106 | return new FilterRecipe(
107 | $filter(...),
108 | $eventClass ?? ExtractEvent::class,
109 | $priority,
110 | FilterRecipeMode::EXCLUDE
111 | );
112 | }
113 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/BeforeLoadEventTest.php:
--------------------------------------------------------------------------------
1 | transformWith(function (mixed $value) {
16 | yield $value;
17 | yield strtoupper($value);
18 | })
19 | ->onBeforeLoad(function (BeforeLoadEvent $e) {
20 | match ($e->item) {
21 | 'bar' => $e->state->skip(),
22 | 'baz' => $e->state->stop(),
23 | default => null,
24 | };
25 | });
26 |
27 | // When
28 | $report = $executor->process(['foo', 'bar', 'baz']);
29 |
30 | // Then
31 | expect($report->output)->toHaveCount(3)
32 | ->and($report->output)->toBe(['foo', 'FOO', 'BAR']);
33 | });
34 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/EndEventTest.php:
--------------------------------------------------------------------------------
1 | onEnd(function (EndEvent $e) use (&$event) {
18 | $event = $e;
19 | });
20 |
21 | // When
22 | $report = $executor->process(['foo', 'bar']);
23 |
24 | // Then
25 | expect($event)->toBeInstanceOf(EndEvent::class)
26 | ->and($report->nbTotalItems)->toBe(2)
27 | ->and($report->nbLoadedItems)->toBe(2)
28 | ;
29 | });
30 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/ExtractEventTest.php:
--------------------------------------------------------------------------------
1 | 'foo', 3 => 'bar'];
14 | $extractedItems = [];
15 |
16 | // Given
17 | $executor = (new EtlExecutor())
18 | ->onExtract(function (ExtractEvent $event) use (&$extractedItems) {
19 | $extractedItems[$event->state->currentItemKey] = $event->item;
20 | });
21 |
22 | // When
23 | $executor->process($items);
24 |
25 | // Then
26 | expect($extractedItems)->toBe($items);
27 | });
28 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/ExtractExceptionEventTest.php:
--------------------------------------------------------------------------------
1 | onExtractException(function (ExtractExceptionEvent $event) {
19 | $event->exception = new ExtractException('It miserably failed.');
20 | });
21 | $executor->process($items());
22 | })->throws(ExtractException::class, 'It miserably failed.');
23 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/FlushEventTest.php:
--------------------------------------------------------------------------------
1 | withOptions(new EtlConfiguration(flushEvery: 2))
19 | ->onFlush(function (FlushEvent $e) use (&$flushEventsCounter) {
20 | ++$flushEventsCounter;
21 | });
22 |
23 | // When
24 | $executor->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
25 |
26 | // Then
27 | expect($flushEventsCounter)->toBe(3);
28 | });
29 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/FlushExceptionEventTest.php:
--------------------------------------------------------------------------------
1 | loadInto(new FlushFailsLoader())
21 | ->onFlushException(function (FlushExceptionEvent $event) {
22 | $event->removeException();
23 | })
24 | ;
25 | $report = $executor->process($items);
26 | expect($report->output)->toBe([
27 | ['strawberry', 'raspberry'],
28 | ['peach'],
29 | ]);
30 | });
31 |
32 | class FlushFailsLoader implements LoaderInterface
33 | {
34 | public function load(mixed $item, EtlState $state): void
35 | {
36 | $state->context['pending'][] = $item;
37 | }
38 |
39 | /**
40 | * @return list>
41 | */
42 | public function flush(bool $isPartial, EtlState $state): array
43 | {
44 | $state->context['batchNumber'] ??= 0;
45 | $state->context['hasFailed'] ??= false;
46 |
47 | // Trigger failure on 1st flush
48 | if (!$state->context['hasFailed']) {
49 | $state->context['hasFailed'] = true;
50 | $state->context['pending'] = [];
51 | throw new RuntimeException('Flush failed.');
52 | }
53 | foreach ($state->context['pending'] as $key => $value) {
54 | $state->context['batches'][$state->context['batchNumber']][] = $value;
55 | }
56 | $state->context['pending'] = [];
57 | ++$state->context['batchNumber'];
58 |
59 | return $state->context['batches'];
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/InitEventTest.php:
--------------------------------------------------------------------------------
1 | onInit(function (InitEvent $e) use (&$event) {
18 | $event = $e;
19 | $e->state->stop();
20 | });
21 |
22 | // When
23 | $executor->process('sourceArgs', 'destArgs');
24 |
25 | // Then
26 | expect($event)->toBeInstanceOf(InitEvent::class)
27 | ->and($event->state->source)->toBe('sourceArgs')
28 | ->and($event->state->destination)->toBe('destArgs');
29 | });
30 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/LoadEventTest.php:
--------------------------------------------------------------------------------
1 | transformWith(function (mixed $value) {
18 | yield $value;
19 | yield strtoupper($value);
20 | })
21 | ->onLoad(function (LoadEvent $e) use (&$loadedItems) {
22 | $loadedItems[] = $e->item;
23 | });
24 |
25 | // When
26 | $executor->process([2 => 'foo', 3 => 'bar']);
27 |
28 | // Then
29 | expect($loadedItems)->toHaveCount(4)
30 | ->and($loadedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']);
31 | });
32 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/LoadExceptionEventTest.php:
--------------------------------------------------------------------------------
1 | loadInto(function (mixed $value) use (&$loadedItems) {
19 | if ('bar' === $value) {
20 | throw new LoadException('Cannot load `bar`.');
21 | }
22 | $loadedItems[] = $value;
23 | })
24 | ->onLoadException(function (LoadExceptionEvent $event) {
25 | $event->removeException();
26 | })
27 | ;
28 | $executor->process($items);
29 |
30 | expect($loadedItems)->toBe(['foo', 'baz']);
31 | });
32 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/StartEventTest.php:
--------------------------------------------------------------------------------
1 | onStart(function (StartEvent $e) use (&$event) {
18 | $event = $e;
19 | $e->state->stop();
20 | });
21 |
22 | // When
23 | $executor->process(['foo', 'bar']);
24 |
25 | // Then
26 | expect($event)->toBeInstanceOf(StartEvent::class)
27 | ->and($event->state->nbTotalItems)->toBe(2)
28 | ->and($event->state->nbLoadedItems)->toBe(0)
29 | ;
30 | });
31 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/TransformEventTest.php:
--------------------------------------------------------------------------------
1 | transformWith(function (mixed $value) {
18 | yield $value;
19 | yield strtoupper($value);
20 | })
21 | ->onTransform(function (TransformEvent $e) use (&$transformedItems) {
22 | $transformedItems = [...$transformedItems, ...$e->transformResult];
23 | });
24 |
25 | // When
26 | $executor->process([2 => 'foo', 3 => 'bar']);
27 |
28 | // Then
29 | expect($transformedItems)->toHaveCount(4)
30 | ->and($transformedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']);
31 | });
32 |
--------------------------------------------------------------------------------
/tests/Behavior/Events/TransformExceptionEventTest.php:
--------------------------------------------------------------------------------
1 | transformWith(function (mixed $value) {
19 | if ('bar' === $value) {
20 | throw new TransformException('Cannot transform `bar`.');
21 | }
22 | yield $value;
23 | })
24 | ->loadInto(function (mixed $value) use (&$loadedItems) {
25 | $loadedItems[] = $value;
26 | })
27 | ->onTransformException(function (TransformExceptionEvent $event) {
28 | $event->removeException();
29 | })
30 | ;
31 | $executor->process($items);
32 |
33 | expect($loadedItems)->toBe(['foo', 'baz']);
34 | });
35 |
--------------------------------------------------------------------------------
/tests/Behavior/ExtractExceptionTest.php:
--------------------------------------------------------------------------------
1 | process($items());
19 | })->throws(ExtractException::class, 'Something bad happened.');
20 |
21 | it('throws an extract exception when some other exception is thrown', function () {
22 | $items = function () {
23 | yield 'foo';
24 | throw new RuntimeException('Something bad happened.');
25 | };
26 |
27 | $executor = new EtlExecutor();
28 | $executor->process($items());
29 | })->throws(ExtractException::class, 'Error during extraction.');
30 |
--------------------------------------------------------------------------------
/tests/Behavior/FlushExceptionTest.php:
--------------------------------------------------------------------------------
1 | process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
24 | })->throws(FlushException::class, 'Flush failed.');
25 |
26 | it('throws a load exception when some other exception is thrown', function () {
27 | // Given
28 | $loader = new FlushFailsLoader(new RuntimeException('Flush failed.'));
29 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2)));
30 |
31 | // When
32 | $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
33 | })->throws(FlushException::class, 'Error during flush.');
34 |
35 | class FlushFailsLoader implements LoaderInterface
36 | {
37 | public function __construct(
38 | private Exception $failure,
39 | ) {
40 | }
41 |
42 | public function load(mixed $item, EtlState $state): void
43 | {
44 | }
45 |
46 | public function flush(bool $isPartial, EtlState $state): never
47 | {
48 | throw $this->failure;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/tests/Behavior/FlushTest.php:
--------------------------------------------------------------------------------
1 | process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
22 |
23 | // Then
24 | expect($report->output)->toBeArray()
25 | ->and($report->output)->toHaveCount(3)
26 | ->and($report->output[0])->toBe(['banana', 'apple'])
27 | ->and($report->output[1])->toBe(['strawberry', 'raspberry'])
28 | ->and($report->output[2])->toBe(['peach']);
29 | });
30 |
31 | it('forces flushes', function () {
32 | $loader = new InMemoryLoader();
33 |
34 | // Given
35 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2)))
36 | ->onExtract(function (ExtractEvent $event) {
37 | if (0 === $event->state->currentItemIndex) {
38 | $event->state->flush();
39 | }
40 | });
41 |
42 | // When
43 | $report = $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
44 |
45 | // Then
46 | expect($report->output)->toBeArray()
47 | ->and($report->output)->toHaveCount(3)
48 | ->and($report->output[0])->toBe(['banana'])
49 | ->and($report->output[1])->toBe(['apple', 'strawberry'])
50 | ->and($report->output[2])->toBe(['raspberry', 'peach']);
51 | });
52 |
--------------------------------------------------------------------------------
/tests/Behavior/LoadExceptionTest.php:
--------------------------------------------------------------------------------
1 | process($items);
22 | })->throws(LoadException::class, 'Cannot load `bar`.');
23 |
24 | it('throws a load exception when some other exception is thrown', function () {
25 | $items = ['foo', 'bar', 'baz'];
26 | $executor = loadInto(function (mixed $value) {
27 | if ('bar' === $value) {
28 | throw new RuntimeException('Cannot load `bar`.');
29 | }
30 | });
31 | $executor->process($items);
32 | })->throws(LoadException::class, 'Error during loading.');
33 |
34 | it('has stopped processing items, but has loaded the previous ones', function () {
35 | $items = ['foo', 'bar', 'baz'];
36 | $loadedItems = [];
37 | $executor = loadInto(function (mixed $value) use (&$loadedItems) {
38 | if ('bar' === $value) {
39 | throw new LoadException('Cannot load `bar`.');
40 | }
41 | $loadedItems[] = $value;
42 | })
43 | ;
44 | try {
45 | $executor->process($items);
46 | } catch (LoadException) {
47 | }
48 |
49 | expect($loadedItems)->toBe(['foo']);
50 | });
51 |
--------------------------------------------------------------------------------
/tests/Behavior/NextTickTest.php:
--------------------------------------------------------------------------------
1 | 3];
23 | $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(...$options)))
24 | ->onExtract(function (ExtractEvent $event) {
25 | // Let's trigger an early flush after the NEXT item (apple)
26 | if ('banana' === $event->item) {
27 | $event->state->nextTick(fn (EtlState $state) => $state->flush());
28 | }
29 | });
30 |
31 | // When
32 | $report = $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
33 |
34 | // Then
35 | expect($report->output)->toBeArray()
36 | ->and($report->output)->toHaveCount(2)
37 | ->and($report->output[0])->toBe(['banana', 'apple'])
38 | ->and($report->output[1])->toBe(['strawberry', 'raspberry', 'peach']);
39 | });
40 |
41 | it('can trigger several callbacks, which are called only once', function () {
42 | // Given
43 | $bucket = new ArrayObject();
44 | $etl = (new EtlExecutor())
45 | ->onLoad(function (LoadEvent $event) use ($bucket) {
46 | if ('apple' === $event->item) {
47 | $event->state->nextTick(fn (EtlState $state) => $bucket->append('apple'));
48 | $event->state->nextTick(fn (EtlState $state) => $bucket->append('APPLE'));
49 | }
50 | });
51 |
52 | // When
53 | $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
54 |
55 | // Then
56 | expect([...$bucket])->toBe(['apple', 'APPLE']);
57 | });
58 |
59 | it("won't complain if a stop request is issued during terminate()", function () {
60 | // Given
61 | $input = ['banana', 'apple', 'strawberry', 'raspberry', 'peach'];
62 | $itWasCalled = false;
63 | $etl = (new EtlExecutor(extractor: new IterableExtractor($input)))
64 | ->onExtract(function (ExtractEvent $event) use (&$itWasCalled) {
65 | if ('peach' === $event->item) {
66 | $event->state->nextTick(function (EtlState $state) use (&$itWasCalled) {
67 | $itWasCalled = true;
68 | $state->stop();
69 | });
70 | }
71 | });
72 |
73 | // When
74 | $report = $etl->process();
75 |
76 | expect($report->output)->toBe($input)
77 | ->and($itWasCalled)->toBeTrue();
78 | });
79 |
--------------------------------------------------------------------------------
/tests/Behavior/ReactStreamProcessorTest.php:
--------------------------------------------------------------------------------
1 | $stream->emit('data', ['hello']));
21 | Loop::futureTick(fn () => $stream->emit('data', ['world']));
22 | $executor = useReact();
23 |
24 | // When
25 | $state = $executor->process($stream);
26 |
27 | // Then
28 | expect($state->output)->toBe(['hello', 'world']);
29 | });
30 |
31 | it('can skip items and stop the workflow', function () {
32 | // Given
33 | $stream = new ReadableResourceStream(fopen('php://temp', 'rb'));
34 | $fruits = ['banana', 'apple', 'strawberry', 'raspberry', 'peach'];
35 | foreach ($fruits as $fruit) {
36 | Loop::futureTick(fn () => $stream->emit('data', [$fruit]));
37 | }
38 | $executor = useReact()
39 | ->onExtract(function (ExtractEvent $event) {
40 | match ($event->item) {
41 | 'apple' => $event->state->skip(),
42 | 'peach' => $event->state->stop(),
43 | default => null,
44 | };
45 | })
46 | ;
47 |
48 | // When
49 | $state = $executor->process($stream);
50 |
51 | // Then
52 | expect($state->output)->toBe(['banana', 'strawberry', 'raspberry']);
53 | });
54 |
55 | it('allows iterables, which will be converted to readable streams', function () {
56 | $fruits = ['banana', 'apple', 'strawberry', 'raspberry', 'peach'];
57 | $executor = useReact()
58 | ->onExtract(function (ExtractEvent $event) {
59 | match ($event->item) {
60 | 'apple' => $event->state->skip(),
61 | 'peach' => $event->state->stop(),
62 | default => null,
63 | };
64 | })
65 | ;
66 |
67 | // When
68 | $state = $executor->process($fruits);
69 |
70 | // Then
71 | expect($state->output)->toBe(['banana', 'strawberry', 'raspberry']);
72 | });
73 |
74 | it('throws ExtractExceptions', function () {
75 | // Given
76 | $stream = new ReadableResourceStream(fopen('php://temp', 'rb'));
77 | Loop::futureTick(fn () => $stream->emit('data', ['hello']));
78 | $executor = useReact()->onExtract(fn () => throw new RuntimeException());
79 |
80 | // When
81 | $executor->process($stream);
82 | })->throws(ExtractException::class);
83 |
--------------------------------------------------------------------------------
/tests/Behavior/SkipTest.php:
--------------------------------------------------------------------------------
1 | 'auto',
18 | ]);
19 | $cities = [];
20 |
21 | // Given
22 | $executor = (new EtlExecutor(extractor: $extractor))
23 | ->transformWith(function (mixed $value) {
24 | yield $value['city_english_name'];
25 | })
26 | ->loadInto(function (string $city) use (&$cities) {
27 | $cities[] = $city;
28 | })
29 | ->onExtract(function (ExtractEvent $event) {
30 | if ('US' === $event->item['country_iso_code']) {
31 | $event->state->skip();
32 | }
33 | });
34 |
35 | // When
36 | $executor->process();
37 |
38 | // Then
39 | expect($cities)->toBe([
40 | 'Tokyo',
41 | 'Shanghai',
42 | 'Mumbai',
43 | 'Istanbul',
44 | 'Moscow',
45 | 'Cairo',
46 | 'Lima',
47 | 'London',
48 | ]);
49 | });
50 |
51 | it('skips items during transformation', function () {
52 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [
53 | 'columns' => 'auto',
54 | ]);
55 | $cities = [];
56 |
57 | // Given
58 | $executor = (new EtlExecutor(extractor: $extractor))
59 | ->transformWith(function (mixed $value) {
60 | yield $value['city_english_name'];
61 | })
62 | ->loadInto(function (string $city) use (&$cities) {
63 | $cities[] = $city;
64 | })
65 | ->onTransform(function (TransformEvent $event) {
66 | if ('Tokyo' === [...$event->transformResult][0]) {
67 | $event->state->skip();
68 | }
69 | });
70 |
71 | // When
72 | $executor->process();
73 |
74 | // Then
75 | expect($cities)->toBe([
76 | 'New York',
77 | 'Los Angeles',
78 | 'Shanghai',
79 | 'Mumbai',
80 | 'Istanbul',
81 | 'Moscow',
82 | 'Cairo',
83 | 'Lima',
84 | 'London',
85 | ]);
86 | });
87 |
--------------------------------------------------------------------------------
/tests/Behavior/StopTest.php:
--------------------------------------------------------------------------------
1 | 'auto',
19 | ]);
20 | $cities = [];
21 |
22 | // Given
23 | $executor = (new EtlExecutor(extractor: $extractor))
24 | ->transformWith(function (mixed $value) {
25 | yield $value['city_english_name'];
26 | })
27 | ->loadInto(function (string $city) use (&$cities) {
28 | $cities[] = $city;
29 | })
30 | ->onExtract(function (ExtractEvent $event) {
31 | if ('JP' === $event->item['country_iso_code']) {
32 | $event->state->stop();
33 | }
34 | });
35 |
36 | // When
37 | $executor->process();
38 |
39 | // Then
40 | expect($cities)->toBe([
41 | 'New York',
42 | 'Los Angeles',
43 | ]);
44 | });
45 |
46 | it('stops the process during transformation', function () {
47 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [
48 | 'columns' => 'auto',
49 | ]);
50 | $cities = [];
51 |
52 | // Given
53 | $executor = (new EtlExecutor(extractor: $extractor))
54 | ->transformWith(function (mixed $value) {
55 | yield $value['city_english_name'];
56 | })
57 | ->loadInto(function (string $city) use (&$cities) {
58 | $cities[] = $city;
59 | })
60 | ->onTransform(function (TransformEvent $event) {
61 | if ('Shanghai' === [...$event->transformResult][0]) {
62 | $event->state->stop();
63 | }
64 | });
65 |
66 | // When
67 | $executor->process();
68 |
69 | // Then
70 | expect($cities)->toBe([
71 | 'New York',
72 | 'Los Angeles',
73 | 'Tokyo',
74 | ]);
75 | });
76 |
77 | it('stops the process during loading', function () {
78 | $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/Data/10-biggest-cities.csv', [
79 | 'columns' => 'auto',
80 | ]);
81 | $cities = [];
82 |
83 | // Given
84 | $executor = (new EtlExecutor(extractor: $extractor))
85 | ->transformWith(function (mixed $value) {
86 | yield $value['city_english_name'];
87 | })
88 | ->loadInto(function (string $city) use (&$cities) {
89 | $cities[] = $city;
90 | })
91 | ->onLoad(function (LoadEvent $event) {
92 | if ('Shanghai' === $event->item) {
93 | $event->state->stop();
94 | }
95 | });
96 |
97 | // When
98 | $executor->process();
99 |
100 | // Then
101 | expect($cities)->toBe([
102 | 'New York',
103 | 'Los Angeles',
104 | 'Tokyo',
105 | 'Shanghai',
106 | ]);
107 | });
108 |
--------------------------------------------------------------------------------
/tests/Behavior/TransformExceptionTest.php:
--------------------------------------------------------------------------------
1 | process($items);
23 | })->throws(TransformException::class, 'Cannot transform `bar`.');
24 |
25 | it('throws a transform exception when some other exception is thrown', function () {
26 | $items = ['foo', 'bar', 'baz'];
27 | $executor = transformWith(function (mixed $value) {
28 | if ('bar' === $value) {
29 | throw new RuntimeException('Cannot transform `bar`.');
30 | }
31 | yield $value;
32 | });
33 | $executor->process($items);
34 | })->throws(TransformException::class, 'Error during transformation.');
35 |
36 | it('has stopped processing items, but has loaded the previous ones', function () {
37 | $items = ['foo', 'bar', 'baz'];
38 | $loadedItems = [];
39 | $executor = transformWith(function (mixed $value) {
40 | if ('bar' === $value) {
41 | throw new TransformException('Cannot transform `bar`.');
42 | }
43 | yield $value;
44 | })
45 | ->loadInto(function (mixed $value) use (&$loadedItems) {
46 | $loadedItems[] = $value;
47 | })
48 | ;
49 | try {
50 | $executor->process($items);
51 | } catch (TransformException) {
52 | }
53 |
54 | expect($loadedItems)->toBe(['foo']);
55 | });
56 |
--------------------------------------------------------------------------------
/tests/Data/10-biggest-cities.csv:
--------------------------------------------------------------------------------
1 | city_english_name,city_local_name,country_iso_code,continent,population
2 | "New York","New York",US,"North America",8537673
3 | "Los Angeles","Los Angeles",US,"North America",39776830
4 | Tokyo,東京,JP,Asia,13929286
5 | Shanghai,上海,CN,Asia,26317104
6 | Mumbai,मुंबई,IN,Asia,12442373
7 | Istanbul,İstanbul,TR,Europe,15469524
8 | Moscow,Москва,RU,Europe,12615279
9 | Cairo,القاهرة,EG,Africa,9121514
10 | Lima,Lima,PE,"South America",10141329
11 | London,London,GB,Europe,8908081
12 |
--------------------------------------------------------------------------------
/tests/Data/10-biggest-cities.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "city_english_name": "New York",
4 | "city_local_name": "New York",
5 | "country_iso_code": "US",
6 | "continent": "North America",
7 | "population": 8537673
8 | },
9 | {
10 | "city_english_name": "Los Angeles",
11 | "city_local_name": "Los Angeles",
12 | "country_iso_code": "US",
13 | "continent": "North America",
14 | "population": 39776830
15 | },
16 | {
17 | "city_english_name": "Tokyo",
18 | "city_local_name": "東京",
19 | "country_iso_code": "JP",
20 | "continent": "Asia",
21 | "population": 13929286
22 | },
23 | {
24 | "city_english_name": "Shanghai",
25 | "city_local_name": "上海",
26 | "country_iso_code": "CN",
27 | "continent": "Asia",
28 | "population": 26317104
29 | },
30 | {
31 | "city_english_name": "Mumbai",
32 | "city_local_name": "मुंबई",
33 | "country_iso_code": "IN",
34 | "continent": "Asia",
35 | "population": 12442373
36 | },
37 | {
38 | "city_english_name": "Istanbul",
39 | "city_local_name": "İstanbul",
40 | "country_iso_code": "TR",
41 | "continent": "Europe",
42 | "population": 15469524
43 | },
44 | {
45 | "city_english_name": "Moscow",
46 | "city_local_name": "Москва",
47 | "country_iso_code": "RU",
48 | "continent": "Europe",
49 | "population": 12615279
50 | },
51 | {
52 | "city_english_name": "Cairo",
53 | "city_local_name": "القاهرة",
54 | "country_iso_code": "EG",
55 | "continent": "Africa",
56 | "population": 9121514
57 | },
58 | {
59 | "city_english_name": "Lima",
60 | "city_local_name": "Lima",
61 | "country_iso_code": "PE",
62 | "continent": "South America",
63 | "population": 10141329
64 | },
65 | {
66 | "city_english_name": "London",
67 | "city_local_name": "London",
68 | "country_iso_code": "GB",
69 | "continent": "Europe",
70 | "population": 8908081
71 | }
72 | ]
73 |
--------------------------------------------------------------------------------
/tests/Data/10-biggest-cities.php:
--------------------------------------------------------------------------------
1 | 'New York',
6 | 'city_local_name' => 'New York',
7 | 'country_iso_code' => 'US',
8 | 'continent' => 'North America',
9 | 'population' => 8537673,
10 | ],
11 | [
12 | 'city_english_name' => 'Los Angeles',
13 | 'city_local_name' => 'Los Angeles',
14 | 'country_iso_code' => 'US',
15 | 'continent' => 'North America',
16 | 'population' => 39776830,
17 | ],
18 | [
19 | 'city_english_name' => 'Tokyo',
20 | 'city_local_name' => '東京',
21 | 'country_iso_code' => 'JP',
22 | 'continent' => 'Asia',
23 | 'population' => 13929286,
24 | ],
25 | [
26 | 'city_english_name' => 'Shanghai',
27 | 'city_local_name' => '上海',
28 | 'country_iso_code' => 'CN',
29 | 'continent' => 'Asia',
30 | 'population' => 26317104,
31 | ],
32 | [
33 | 'city_english_name' => 'Mumbai',
34 | 'city_local_name' => 'मुंबई',
35 | 'country_iso_code' => 'IN',
36 | 'continent' => 'Asia',
37 | 'population' => 12442373,
38 | ],
39 | [
40 | 'city_english_name' => 'Istanbul',
41 | 'city_local_name' => 'İstanbul',
42 | 'country_iso_code' => 'TR',
43 | 'continent' => 'Europe',
44 | 'population' => 15469524,
45 | ],
46 | [
47 | 'city_english_name' => 'Moscow',
48 | 'city_local_name' => 'Москва',
49 | 'country_iso_code' => 'RU',
50 | 'continent' => 'Europe',
51 | 'population' => 12615279,
52 | ],
53 | [
54 | 'city_english_name' => 'Cairo',
55 | 'city_local_name' => 'القاهرة',
56 | 'country_iso_code' => 'EG',
57 | 'continent' => 'Africa',
58 | 'population' => 9121514,
59 | ],
60 | [
61 | 'city_english_name' => 'Lima',
62 | 'city_local_name' => 'Lima',
63 | 'country_iso_code' => 'PE',
64 | 'continent' => 'South America',
65 | 'population' => 10141329,
66 | ],
67 | [
68 | 'city_english_name' => 'London',
69 | 'city_local_name' => 'London',
70 | 'country_iso_code' => 'GB',
71 | 'continent' => 'Europe',
72 | 'population' => 8908081,
73 | ],
74 | ];
75 |
--------------------------------------------------------------------------------
/tests/Stubs/InMemoryLoader.php:
--------------------------------------------------------------------------------
1 | context['pending'][] = $item;
15 | }
16 |
17 | /**
18 | * @return list>
19 | */
20 | public function flush(bool $isPartial, EtlState $state): array
21 | {
22 | $state->context['batchNumber'] ??= 0;
23 | foreach ($state->context['pending'] as $key => $value) {
24 | $state->context['batches'][$state->context['batchNumber']][] = $value;
25 | }
26 | $state->context['pending'] = [];
27 | ++$state->context['batchNumber'];
28 |
29 | return $state->context['batches'];
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/tests/Stubs/STDINStub.php:
--------------------------------------------------------------------------------
1 | bufferFilename = sys_get_temp_dir().DIRECTORY_SEPARATOR.'php_input.txt';
32 | $this->index = 0;
33 | if (file_exists($this->bufferFilename)) {
34 | $this->data = file_get_contents($this->bufferFilename);
35 | }
36 | $this->length = strlen($this->data);
37 | }
38 |
39 | public function stream_open(): true
40 | {
41 | return true;
42 | }
43 |
44 | public function url_stat(): false
45 | {
46 | return false;
47 | }
48 |
49 | public function stream_close(): void
50 | {
51 | }
52 |
53 | public function stream_stat(): false
54 | {
55 | return false;
56 | }
57 |
58 | public function stream_flush(): true
59 | {
60 | return true;
61 | }
62 |
63 | public function stream_read(int $count): string
64 | {
65 | $length = min($count, $this->length - $this->index);
66 | $data = substr($this->data, $this->index);
67 | $this->index += $length;
68 |
69 | return $data;
70 | }
71 |
72 | public function stream_eof(): bool
73 | {
74 | return $this->index >= $this->length;
75 | }
76 |
77 | public function stream_write(string $data): false|int
78 | {
79 | return file_put_contents($this->bufferFilename, $data);
80 | }
81 |
82 | public static function emulate(string $stdInContent, callable $beforeRestore): mixed
83 | {
84 | stream_wrapper_unregister('php');
85 | stream_wrapper_register('php', __CLASS__);
86 | file_put_contents('php://stdin', $stdInContent);
87 | $result = $beforeRestore();
88 | stream_wrapper_restore('php');
89 |
90 | return $result;
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/tests/Stubs/STDOUTStub.php:
--------------------------------------------------------------------------------
1 | data;
24 | $consumed += $bucket->datalen;
25 | stream_bucket_append($out, $bucket);
26 | }
27 |
28 | return PSFS_PASS_ON;
29 | }
30 |
31 | public static function read(): string
32 | {
33 | return self::$storage;
34 | }
35 |
36 | public static function emulate(callable $beforeRestore, string $filename = 'php://stdout'): string
37 | {
38 | stream_filter_register('intercept', __CLASS__);
39 | $stdout = fopen($filename, 'wb+');
40 | $filter = stream_filter_append($stdout, 'intercept');
41 | $beforeRestore($stdout);
42 | $result = self::$storage;
43 |
44 | self::$storage = '';
45 |
46 | return $result;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/tests/Stubs/WritableStreamStub.php:
--------------------------------------------------------------------------------
1 |
16 | */
17 | public array $data = [];
18 |
19 | public function isWritable(): bool
20 | {
21 | return true;
22 | }
23 |
24 | public function write($data): bool
25 | {
26 | $this->data[] = $data;
27 |
28 | return true;
29 | }
30 |
31 | public function end($data = null): void
32 | {
33 | }
34 |
35 | public function close(): void
36 | {
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/tests/Unit/ContextTest.php:
--------------------------------------------------------------------------------
1 | 'green', 'shape' => 'square', 'lights' => 'on']));
15 |
16 | // When
17 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']);
18 |
19 | // Then
20 | expect($report->context)->toBe(['color' => 'green', 'shape' => 'round', 'lights' => 'on', 'size' => 'small']);
21 | });
22 |
23 | it('adds some more context', function () {
24 | // Given
25 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on']))
26 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla']);
27 |
28 | // When
29 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']);
30 |
31 | // Then
32 | expect($report->context)->toBe(['color' => 'blue', 'shape' => 'round', 'lights' => 'on', 'flavor' => 'vanilla', 'size' => 'small']);
33 | });
34 |
35 | it('replaces the whole context', function () {
36 | // Given
37 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on']))
38 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla'], clear: true);
39 |
40 | // When
41 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']);
42 |
43 | // Then
44 | expect($report->context)->toBe(['color' => 'blue', 'flavor' => 'vanilla', 'shape' => 'round', 'size' => 'small']);
45 | });
46 |
47 | it('does not override existing values', function () {
48 | // Given
49 | $executor = (new EtlExecutor(context: ['color' => 'green', 'shape' => 'square', 'lights' => 'on']))
50 | ->withContext(['color' => 'blue', 'flavor' => 'vanilla'], overwrite: false);
51 |
52 | // When
53 | $report = $executor->process([], context: ['shape' => 'round', 'size' => 'small']);
54 |
55 | // Then
56 | expect($report->context)->toBe(['color' => 'green', 'shape' => 'round', 'lights' => 'on', 'flavor' => 'vanilla', 'size' => 'small']);
57 | });
58 |
--------------------------------------------------------------------------------
/tests/Unit/EtlConfigurationTest.php:
--------------------------------------------------------------------------------
1 | throws(InvalidArgumentException::class);
13 |
14 | it('denies negative values', function () {
15 | new EtlConfiguration(flushEvery: -10);
16 | })->throws(InvalidArgumentException::class);
17 |
--------------------------------------------------------------------------------
/tests/Unit/EtlExecutorTest.php:
--------------------------------------------------------------------------------
1 | extractFrom(fn () => yield from ['foo', 'bar'])
26 | ->transformWith($transformer)
27 | ->loadInto(function (string $item) use (&$items) {
28 | $items[] = $item;
29 | })
30 | ->withOptions(new EtlConfiguration(flushEvery: 1));
31 |
32 | // When
33 | $report = $etl->process();
34 |
35 | // Then
36 | expect($items)->toBe(['FOO', 'BAR'])
37 | ->and($report->nbTotalItems)->toBe(2)
38 | ->and($report->nbLoadedItems)->toBe(2)
39 | ->and($report->getDuration())->toBeBetween(0, 1);
40 | })->with(function () {
41 | yield 'Return value' => fn (mixed $value) => strtoupper($value);
42 | yield 'Generator' => fn (mixed $value) => yield strtoupper($value);
43 | });
44 |
45 | it('passes the context throughout all the ETL steps', function () {
46 | $items = [];
47 |
48 | // Given
49 | $etl = (new EtlExecutor())
50 | ->loadInto(function (string $item) use (&$items) {
51 | $items[] = $item;
52 | })
53 | ->onFlush(fn (FlushEvent $event) => $event->state->context['bar'] = 'baz'); // @phpstan-ignore-line
54 |
55 | // When
56 | $report = $etl->process(['banana', 'apple'], context: ['foo' => 'bar']);
57 |
58 | // Then
59 | expect($items)->toBe(['banana', 'apple'])
60 | ->and($report->context['foo'])->toBe('bar')
61 | ->and($report->context['bar'])->toBe('baz');
62 | });
63 |
64 | it('loads conditionally', function () {
65 | // Background
66 | $loader = new class() implements ConditionalLoaderInterface {
67 | public function supports(mixed $item, EtlState $state): bool
68 | {
69 | return 'foo' !== $item;
70 | }
71 |
72 | public function load(mixed $item, EtlState $state): void
73 | {
74 | $state->context[__CLASS__][] = $item;
75 | }
76 |
77 | public function flush(bool $isPartial, EtlState $state): mixed
78 | {
79 | foreach ($state->context[__CLASS__] as $item) {
80 | $state->context['storage'][] = $item;
81 | }
82 |
83 | return $state->context['storage'];
84 | }
85 | };
86 |
87 | // Given
88 | $input = ['foo', 'bar', 'baz'];
89 | $executor = new EtlExecutor(loader: $loader);
90 |
91 | // When
92 | $report = $executor->process($input, context: ['storage' => []]);
93 |
94 | // Then
95 | expect($report->output)->toBe(['bar', 'baz']);
96 | });
97 |
98 | it('yells if it cannot process extracted data', function () {
99 | // Given
100 | $executor = (new EtlExecutor())->withProcessor(
101 | new class() implements ProcessorInterface {
102 | public function supports(mixed $extracted): bool
103 | {
104 | return false;
105 | }
106 |
107 | public function process(EtlExecutor $executor, EtlState $state, mixed $extracted): EtlState
108 | {
109 | throw new ShouldNotHappen(new LogicException());
110 | }
111 | },
112 | );
113 |
114 | // When
115 | $executor->process([]);
116 | })->throws(ExtractException::class);
117 |
--------------------------------------------------------------------------------
/tests/Unit/EventDispatcher/EventDispatcherTest.php:
--------------------------------------------------------------------------------
1 | visitors[] = $visitor;
23 | if (2 === count($this->visitors)) {
24 | $this->stopPropagation();
25 | }
26 | }
27 | }
28 |
29 | it('dispatches events, to the appropriate listeners, in the correct order', function () {
30 | $listenerProvider = new PrioritizedListenerProvider();
31 | $bus = new EventDispatcher($listenerProvider);
32 | $ignored = new class() {
33 | use EventVisitor;
34 | };
35 | $event = new class() {
36 | use EventVisitor;
37 | };
38 |
39 | // Given
40 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A'));
41 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1);
42 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1);
43 |
44 | // When
45 | $dispatched = $bus->dispatch($event);
46 |
47 | // Then
48 | expect($dispatched)
49 | ->toBe($event)
50 | ->and($event->visitors)->toBe(['C', 'A', 'B'])
51 | ->and($ignored->visitors)->toBe([])
52 | ;
53 | });
54 |
55 | it('stops propagation of events', function () {
56 | $listenerProvider = new PrioritizedListenerProvider();
57 | $bus = new EventDispatcher($listenerProvider);
58 | $event = new class() implements StoppableEventInterface {
59 | use EventVisitor;
60 | };
61 |
62 | // Given
63 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A'));
64 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1);
65 | $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1);
66 |
67 | // When
68 | $dispatched = $bus->dispatch($event);
69 |
70 | // Then
71 | expect($dispatched)
72 | ->toBe($event)
73 | ->and($event->visitors)->toBe(['C', 'A'])
74 | ;
75 | });
76 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/CSVExtractorTest.php:
--------------------------------------------------------------------------------
1 | extract($state);
20 | })->throws(ExtractException::class);
21 |
22 | it('iterates over a string containing CSV data', function () {
23 | $state = new EtlState();
24 | $content = file_get_contents(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv');
25 | $expected = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php';
26 | $extractor = new CSVExtractor($content, ['columns' => 'auto']);
27 |
28 | // When
29 | $extractedItems = [...$extractor->extract($state)];
30 |
31 | // Then
32 | expect($extractedItems)->toBe($expected);
33 | });
34 |
35 | it('iterates over a file containing CSV data', function () {
36 | $extractor = new CSVExtractor(options: ['columns' => 'auto']);
37 |
38 | // When
39 | $state = new EtlState(source: 'file://'.dirname(__DIR__, 2).'/Data/10-biggest-cities.csv');
40 | $extractedItems = [...$extractor->extract($state)];
41 |
42 | // Then
43 | expect($extractedItems)->toHaveCount(10)
44 | ->and($extractedItems[0]['city_english_name'] ?? null)->toBe('New York')
45 | ->and($extractedItems[9]['city_english_name'] ?? null)->toBe('London');
46 | });
47 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/CallableExtractorTest.php:
--------------------------------------------------------------------------------
1 | ['foo', 'bar'];
17 |
18 | // When
19 | $value = (new CallableExtractor($callable))->extract($state);
20 |
21 | // Then
22 | expect($value)->toBe(['foo', 'bar']);
23 | });
24 |
25 | it('returns an empty iterable when extracted content is null', function () {
26 | // Given
27 | $state = new EtlState();
28 | $callable = fn () => null;
29 |
30 | // When
31 | $value = (new CallableExtractor($callable))->extract($state);
32 |
33 | // Then
34 | expect($value)->toBeInstanceOf(EmptyIterator::class);
35 | });
36 | it('returns an iterable of values when extracted content is not iterable', function () {
37 | // Given
38 | $state = new EtlState();
39 | $callable = fn () => 'foo';
40 |
41 | // When
42 | $value = (new CallableExtractor($callable))->extract($state);
43 |
44 | // Then
45 | expect($value)->toBe(['foo']);
46 | });
47 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/ChainExtractorTest.php:
--------------------------------------------------------------------------------
1 | 'banana'));
17 | $executor = $executor->extractFrom(chain($executor->extractor)
18 | ->with(fn () => yield from ['apple', 'strawberry'])
19 | ->with(fn () => ['raspberry', 'peach']))
20 | ;
21 |
22 | // When
23 | $report = $executor->process();
24 |
25 | // Then
26 | expect($report->output)->toBe(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
27 | });
28 |
29 | it('silently chains extractors', function () {
30 | // Given
31 | $executor = extractFrom(
32 | fn () => 'banana',
33 | fn () => yield from ['apple', 'strawberry'],
34 | fn () => ['raspberry', 'peach']
35 | );
36 |
37 | // When
38 | $report = $executor->process();
39 |
40 | // Then
41 | expect($report->output)->toBe(['banana', 'apple', 'strawberry', 'raspberry', 'peach']);
42 | });
43 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/IterableExtractorTest.php:
--------------------------------------------------------------------------------
1 | extract($state))
18 | ->toBe(['foo', 'bar']);
19 |
20 | $state = new EtlState(source: ['bar', 'baz']);
21 | expect($extractor->extract($state))
22 | ->toBe(['bar', 'baz']);
23 | });
24 |
25 | it('yells whenever source is not iterable', function () {
26 | (new IterableExtractor())->extract(new EtlState(source: 'foo'));
27 | })
28 | ->throws(ExtractException::class);
29 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/JSONExtractorTest.php:
--------------------------------------------------------------------------------
1 | extract($state);
23 |
24 | // Then
25 | expect([...$items])->toBe(null === $source ? [] : $expected);
26 | })->with(function () {
27 | $source = dirname(__DIR__, 2).'/Data/10-biggest-cities.json';
28 | $content = file_get_contents($source);
29 | yield ['source' => 'file://'.$source];
30 | yield ['source' => $content];
31 | yield ['source' => null];
32 | })->with(function () {
33 | yield ['useConstructor' => true];
34 | yield ['useConstructor' => false];
35 | });
36 |
37 | it('complains if content cannot be extracted', function () {
38 | [...(new JSONExtractor())->extract(new EtlState(source: new stdClass()))];
39 | })->throws(ExtractException::class);
40 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/ReactStreamExtractorTest.php:
--------------------------------------------------------------------------------
1 | extract(new EtlState(source: $b)))->toBe($b)
20 | ->and($extractor->extract(new EtlState()))->toBe($a);
21 | });
22 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/STDINExtractorTest.php:
--------------------------------------------------------------------------------
1 | process(...));
22 |
23 | expect($report->output)->toBe([
24 | 'Hello',
25 | '',
26 | 'Everybody!',
27 | ]);
28 | });
29 |
--------------------------------------------------------------------------------
/tests/Unit/Extractor/TextLinesExtractorTest.php:
--------------------------------------------------------------------------------
1 | extract($state);
25 |
26 | // Then
27 | expect([...$items])->toBe($expected);
28 | })->with(function () {
29 | yield [
30 | 'options' => ['skipEmptyLines' => true],
31 | 'expected' => ['foo', 'bar'],
32 | ];
33 | yield [
34 | 'options' => [],
35 | 'expected' => ['foo', 'bar'],
36 | ];
37 | yield [
38 | 'options' => ['skipEmptyLines' => false],
39 | 'expected' => ['foo', '', '', 'bar'],
40 | ];
41 | })->with(function () {
42 | yield ['useConstructor' => true];
43 | yield ['useConstructor' => false];
44 | });
45 |
46 | it('returns an empty iterator when the content is null', function () {
47 | $state = new EtlState();
48 | $extractor = new TextLinesExtractor();
49 |
50 | // When
51 | $items = $extractor->extract($state);
52 |
53 | expect([...$items])->toBe([]);
54 | });
55 |
--------------------------------------------------------------------------------
/tests/Unit/FunctionsTest.php:
--------------------------------------------------------------------------------
1 | 'Apple',
14 | 'b' => 'Banana',
15 | 'c' => 'Carrot',
16 | 'd' => 'Dill',
17 | ];
18 |
19 | // When
20 | $result = array_fill_from(['a', 'b', 'e'], $food, ['b' => 'banana', 'f' => 'Fig']);
21 |
22 | // Then
23 | expect($result)->toBe([
24 | 'a' => 'Apple',
25 | 'b' => 'banana',
26 | ]);
27 | });
28 |
--------------------------------------------------------------------------------
/tests/Unit/Iterator/CSVIteratorTest.php:
--------------------------------------------------------------------------------
1 | toHaveCount(11)
19 | ->and($rows[0])->toBe([
20 | 0 => 'city_english_name',
21 | 1 => 'city_local_name',
22 | 2 => 'country_iso_code',
23 | 3 => 'continent',
24 | 4 => 'population',
25 | ])
26 | ->and($rows[3])->toBe([
27 | 0 => 'Tokyo',
28 | 1 => '東京',
29 | 2 => 'JP',
30 | 3 => 'Asia',
31 | 4 => 13929286,
32 | ]);
33 | })->with(function () {
34 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
35 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)));
36 | yield 'file' => new CSVIterator(new SplFileObject($filename));
37 | });
38 |
39 | it('can make columns automatically', function (CSVIterator $iterator) {
40 | $rows = [...$iterator];
41 |
42 | expect($rows)->toHaveCount(10)
43 | ->and($rows[0])->toBe([
44 | 'city_english_name' => 'New York',
45 | 'city_local_name' => 'New York',
46 | 'country_iso_code' => 'US',
47 | 'continent' => 'North America',
48 | 'population' => 8537673,
49 | ])
50 | ->and($rows[2])->toBe([
51 | 'city_english_name' => 'Tokyo',
52 | 'city_local_name' => '東京',
53 | 'country_iso_code' => 'JP',
54 | 'continent' => 'Asia',
55 | 'population' => 13929286,
56 | ]);
57 | })->with(function () {
58 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
59 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => 'auto']);
60 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => 'auto']);
61 | });
62 |
63 | it('can map user-defined columns', function (CSVIterator $iterator) {
64 | $rows = [...$iterator];
65 |
66 | expect($rows[1])->toBe([
67 | 'cityEnglishName' => 'New York',
68 | 'cityLocalName' => 'New York',
69 | 'countryIsoCode' => 'US',
70 | 'continent' => 'North America',
71 | 'population' => 8537673,
72 | ])
73 | ->and($rows[3])->toBe([
74 | 'cityEnglishName' => 'Tokyo',
75 | 'cityLocalName' => '東京',
76 | 'countryIsoCode' => 'JP',
77 | 'continent' => 'Asia',
78 | 'population' => 13929286,
79 | ]);
80 | })->with(function () {
81 | $columns = [
82 | 'cityEnglishName',
83 | 'cityLocalName',
84 | 'countryIsoCode',
85 | 'continent',
86 | 'population',
87 | ];
88 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
89 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]);
90 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]);
91 | });
92 |
93 | it('skips the 1st row when asked to', function (CSVIterator $iterator) {
94 | $rows = [...$iterator];
95 |
96 | expect($rows[0])->toBe([
97 | 'cityEnglishName' => 'New York',
98 | 'cityLocalName' => 'New York',
99 | 'countryIsoCode' => 'US',
100 | 'continent' => 'North America',
101 | 'population' => 8537673,
102 | ])
103 | ->and($rows[2])->toBe([
104 | 'cityEnglishName' => 'Tokyo',
105 | 'cityLocalName' => '東京',
106 | 'countryIsoCode' => 'JP',
107 | 'continent' => 'Asia',
108 | 'population' => 13929286,
109 | ]);
110 | })->with(function () {
111 | $columns = [
112 | 'cityEnglishName',
113 | 'cityLocalName',
114 | 'countryIsoCode',
115 | 'continent',
116 | 'population',
117 | ];
118 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
119 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns, 'skipFirstRow' => true]);
120 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns, 'skipFirstRow' => true]);
121 | });
122 |
123 | it('adds fields when the row has not enough columns', function (CSVIterator $iterator) {
124 | $rows = [...$iterator];
125 |
126 | expect($rows[1])->toBe([
127 | 'cityEnglishName' => 'New York',
128 | 'cityLocalName' => 'New York',
129 | 'countryIsoCode' => 'US',
130 | 'continent' => 'North America',
131 | 'population' => 8537673,
132 | 'misc' => null,
133 | ])
134 | ->and($rows[3])->toBe([
135 | 'cityEnglishName' => 'Tokyo',
136 | 'cityLocalName' => '東京',
137 | 'countryIsoCode' => 'JP',
138 | 'continent' => 'Asia',
139 | 'population' => 13929286,
140 | 'misc' => null,
141 | ]);
142 | })->with(function () {
143 | $columns = [
144 | 'cityEnglishName',
145 | 'cityLocalName',
146 | 'countryIsoCode',
147 | 'continent',
148 | 'population',
149 | 'misc',
150 | ];
151 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
152 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]);
153 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]);
154 | });
155 |
156 | it('removes extra data whenever there are more fields than columns', function (CSVIterator $iterator) {
157 | $rows = [...$iterator];
158 |
159 | expect($rows[1])->toBe([
160 | 'cityEnglishName' => 'New York',
161 | 'cityLocalName' => 'New York',
162 | 'countryIsoCode' => 'US',
163 | 'continent' => 'North America',
164 | ])
165 | ->and($rows[3])->toBe([
166 | 'cityEnglishName' => 'Tokyo',
167 | 'cityLocalName' => '東京',
168 | 'countryIsoCode' => 'JP',
169 | 'continent' => 'Asia',
170 | ]);
171 | })->with(function () {
172 | $columns = [
173 | 'cityEnglishName',
174 | 'cityLocalName',
175 | 'countryIsoCode',
176 | 'continent',
177 | ];
178 | $filename = dirname(__DIR__, 2).'/Data/10-biggest-cities.csv';
179 | yield 'string content' => new CSVIterator(new StrTokIterator(file_get_contents($filename)), ['columns' => $columns]);
180 | yield 'file' => new CSVIterator(new SplFileObject($filename), ['columns' => $columns]);
181 | });
182 |
--------------------------------------------------------------------------------
/tests/Unit/Iterator/IteratorStreamTest.php:
--------------------------------------------------------------------------------
1 | Loop::set(Factory::create()));
16 |
17 | it('is readable during iteration', function () {
18 | $items = ['foo', 'bar'];
19 | $stream = new IteratorStream($items);
20 |
21 | for ($i = 0; $i < 2; ++$i) {
22 | expect($stream->isReadable())->toBeTrue();
23 | $stream->iterator->consume();
24 | }
25 |
26 | expect($stream->isReadable())->toBeFalse();
27 | Loop::stop();
28 | });
29 |
30 | it('can be paused and resumed', function () {
31 | $stream = new IteratorStream([]);
32 | expect($stream->paused)->toBeFalse();
33 |
34 | // When
35 | $stream->pause();
36 |
37 | // Then
38 | expect($stream->paused)->toBeTrue();
39 |
40 | // When
41 | $stream->resume();
42 |
43 | // Then
44 | expect($stream->paused)->toBeFalse();
45 | });
46 |
47 | it('can pipe data', function () {
48 | $items = ['foo', 'bar', 'baz'];
49 | $stream = new IteratorStream($items);
50 | $dest = new WritableStreamStub();
51 | $stream->pipe($dest);
52 |
53 | // When
54 | Loop::run();
55 |
56 | // Then
57 | expect($dest->data)->toBe($items);
58 | });
59 |
--------------------------------------------------------------------------------
/tests/Unit/Iterator/PregSplitIteratorTest.php:
--------------------------------------------------------------------------------
1 | toBe([
23 | 'foo',
24 | '',
25 | '',
26 | 'bar',
27 | ]);
28 | });
29 |
--------------------------------------------------------------------------------
/tests/Unit/Iterator/StrTokIteratorTest.php:
--------------------------------------------------------------------------------
1 | toBe([
23 | 'foo',
24 | 'bar',
25 | ]);
26 | });
27 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/CSVLoaderTest.php:
--------------------------------------------------------------------------------
1 | 'auto']));
23 | $output = $executor->process($cities)->output;
24 | expect($output)->toBe($destination);
25 |
26 | // @phpstan-ignore-next-line
27 | $writtenContent = implode('', [...new SplFileObject($output, 'r')]);
28 | // @phpstan-ignore-next-line
29 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]);
30 |
31 | expect($writtenContent)->toBe($expectedContent);
32 | });
33 |
34 | it('loads items to a CSV string', function () {
35 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php';
36 | $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => 'auto']));
37 | $output = $executor->process($cities)->output;
38 |
39 | // @phpstan-ignore-next-line
40 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]);
41 |
42 | expect($output)->toBe($expectedContent);
43 | });
44 |
45 | it('can write specific columns', function () {
46 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php';
47 | $initialColumns = [
48 | 'city_english_name',
49 | 'city_local_name',
50 | 'country_iso_code',
51 | 'continent',
52 | 'population',
53 | ];
54 | $prettyColumns = [
55 | 'CityEnglishName',
56 | 'CityLocalName',
57 | 'CountryIsoCode',
58 | 'Continent',
59 | 'Population',
60 | ];
61 | $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => $prettyColumns]));
62 | $output = $executor->process($cities)->output;
63 |
64 | $expectedContent = strtr(
65 | implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')]), // @phpstan-ignore-line
66 | array_combine($initialColumns, $prettyColumns),
67 | );
68 |
69 | expect($output)->toBe($expectedContent);
70 | });
71 |
72 | it('can ignore columns', function () {
73 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php';
74 | $executor = new EtlExecutor(loader: new CSVLoader());
75 | $output = $executor->process($cities)->output;
76 |
77 | $lines = [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.csv', 'r')];
78 | unset($lines[0]);
79 | $expectedContent = implode('', $lines); // @phpstan-ignore-line
80 |
81 | expect($output)->toBe($expectedContent);
82 | });
83 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/CallableLoaderTest.php:
--------------------------------------------------------------------------------
1 | load('foo', $state);
24 | $output = $loader->flush(false, $state);
25 |
26 | // Then
27 | expect($output)->toBe(['foo']);
28 | });
29 |
30 | it('complains if inner loader is not callable', function () {
31 | // Given
32 | $state = new EtlState();
33 | $loader = new CallableLoader();
34 | $loader->load('foo', $state);
35 | })->throws(LoadException::class, 'Invalid destination.');
36 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/ChainLoaderTest.php:
--------------------------------------------------------------------------------
1 | $a[] = $item, // @phpstan-ignore-line
24 | ));
25 | $executor = $executor->loadInto(
26 | chain($executor->loader)
27 | ->with(fn (string $item) => $b[] = $item) // @phpstan-ignore-line
28 | ->with(
29 | new class() implements ConditionalLoaderInterface {
30 | public function supports(mixed $item, EtlState $state): bool
31 | {
32 | return 'foo' !== $item;
33 | }
34 |
35 | public function load(mixed $item, EtlState $state): void
36 | {
37 | $state->context[__CLASS__][] = $item;
38 | }
39 |
40 | public function flush(bool $isPartial, EtlState $state): mixed
41 | {
42 | foreach ($state->context[__CLASS__] as $item) {
43 | $state->context['storage'][] = $item;
44 | }
45 |
46 | return $state->context['storage'];
47 | }
48 | },
49 | )
50 | );
51 |
52 | // Given
53 | $input = ['foo', 'bar'];
54 |
55 | // When
56 | $executor->process($input, context: ['storage' => $c]);
57 |
58 | // Then
59 | expect([...$a])->toBe(['foo', 'bar'])
60 | ->and([...$b])->toBe(['foo', 'bar'])
61 | ->and([...$c])->toBe(['bar']);
62 | });
63 |
64 | it('silently chains loaders', function () {
65 | // Background
66 | $a = new ArrayObject();
67 | $b = new ArrayObject();
68 |
69 | // Given
70 | $input = ['foo', 'bar'];
71 | $executor = (new EtlExecutor())->loadInto(
72 | fn (string $item) => $a[] = $item, // @phpstan-ignore-line
73 | fn (string $item) => $b[] = $item, // @phpstan-ignore-line
74 | );
75 |
76 | // When
77 | $executor->process($input);
78 |
79 | // Then
80 | expect([...$a])->toBe(['foo', 'bar'])
81 | ->and([...$b])->toBe(['foo', 'bar']);
82 | });
83 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/Doctrine/Book.php:
--------------------------------------------------------------------------------
1 | shouldReceive('getManagerForClass')->andReturn($manager);
22 | $manager->shouldReceive('persist')->twice();
23 | $manager->shouldReceive('flush')->once();
24 |
25 | loadInto(new DoctrineORMLoader($registry))->process([
26 | new Book(id: 1, name: 'Holy Bible'),
27 | new Book(id: 2, name: 'Fifty Shades of Grey'),
28 | ]);
29 | });
30 |
31 | it('complains if loaded item is not an object', function () {
32 | $loader = new DoctrineORMLoader(Mockery::mock(ManagerRegistry::class));
33 | $loader->load([], new EtlState());
34 | })->throws(LoadException::class, 'Expecting object, got array.');
35 |
36 | it('complains if loaded item is not a mapped Doctrine class', function () {
37 | $registry = Mockery::mock(ManagerRegistry::class);
38 | $registry->shouldReceive('getManagerForClass')->andReturn(null);
39 | $loader = new DoctrineORMLoader($registry);
40 | $loader->load(new stdClass(), new EtlState());
41 | })->throws(LoadException::class, 'Could not find manager for class stdClass.');
42 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/JSONLoaderTest.php:
--------------------------------------------------------------------------------
1 | process($cities)->output;
31 | expect($output)->toBe($destination);
32 |
33 | // @phpstan-ignore-next-line
34 | $writtenContent = implode('', [...new SplFileObject($output, 'r')]);
35 | // @phpstan-ignore-next-line
36 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.json', 'r')]);
37 |
38 | expect($writtenContent)->toBe($expectedContent);
39 | })->with('config');
40 |
41 | it('loads items to a JSON string', function (EtlConfiguration $options) {
42 | $cities = require dirname(__DIR__, 2).'/Data/10-biggest-cities.php';
43 | $executor = new EtlExecutor(loader: new JSONLoader(), options: $options);
44 | $output = $executor->process($cities)->output;
45 |
46 | // @phpstan-ignore-next-line
47 | $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/Data/10-biggest-cities.json', 'r')]);
48 |
49 | expect($output)->toBe($expectedContent);
50 | })->with('config');
51 |
--------------------------------------------------------------------------------
/tests/Unit/Loader/STDOUTLoaderTest.php:
--------------------------------------------------------------------------------
1 | $executor->process($input, context: [
33 | STDOUTLoader::class => [
34 | 'resource' => $resource, // fake php://stdout
35 | ],
36 | ]));
37 |
38 | // Then
39 | expect($output)->toBe($expected);
40 | });
41 |
42 | it('cannot load something which is not a string', fn () => loadInto(stdOut())->process([[]]))
43 | ->throws(LoadException::class, 'Expected string, got array.');
44 |
--------------------------------------------------------------------------------
/tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php:
--------------------------------------------------------------------------------
1 | $value = $normalizer->normalize($value));
23 |
24 | // Then
25 | expect($strings)->toBe(['foo', null]);
26 | });
27 |
--------------------------------------------------------------------------------
/tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php:
--------------------------------------------------------------------------------
1 | $value = $normalizer->normalize($value));
25 |
26 | // Then
27 | expect($strings)->toBe(['foo', 12345, 12345.67, '']);
28 | });
29 |
--------------------------------------------------------------------------------
/tests/Unit/Recipe/FilterRecipeTest.php:
--------------------------------------------------------------------------------
1 | !in_array($item, $skipItems, true),
26 | $eventClass,
27 | ),
28 | )
29 | ->transformWith(fn ($item) => strtoupper($item));
30 |
31 | // When
32 | $report = $executor->process(['banana', 'apple', 'strawberry', 'BANANA', 'APPLE', 'STRAWBERRY']);
33 |
34 | // Then
35 | expect($report->output)->toBe($expectedResult);
36 | })->with(function () {
37 | yield [null, ['APPLE', 'BANANA']];
38 | yield [ExtractEvent::class, ['APPLE', 'BANANA']];
39 | yield [BeforeLoadEvent::class, ['BANANA', 'BANANA']];
40 | });
41 |
42 | it('filters items (on an allow-list basis)', function (?string $eventClass, array $expectedResult) {
43 | // Given
44 | $executor = withRecipe(
45 | new FilterRecipe(
46 | fn (string $item) => str_contains($item, 'b') || str_contains($item, 'B'),
47 | ),
48 | )
49 | ->transformWith(fn ($item) => strtoupper($item));
50 |
51 | // When
52 | $report = $executor->process(['banana', 'apple', 'strawberry', 'BANANA', 'APPLE', 'STRAWBERRY']);
53 |
54 | // Then
55 | expect($report->output)->toBe($expectedResult);
56 | })->with(function () {
57 | yield [null, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']];
58 | yield [ExtractEvent::class, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']];
59 | yield [BeforeLoadEvent::class, ['BANANA', 'STRAWBERRY', 'BANANA', 'STRAWBERRY']];
60 | });
61 |
62 | it('does not accept other types of events', function () {
63 | new FilterRecipe(fn () => '', LoadEvent::class);
64 | })->throws(
65 | InvalidArgumentException::class,
66 | sprintf('Can only filter on ExtractEvent / LoadEvent, not %s', LoadEvent::class),
67 | );
68 |
--------------------------------------------------------------------------------
/tests/Unit/Recipe/LoggerRecipeTest.php:
--------------------------------------------------------------------------------
1 | withRecipe($loggerRecipe);
21 |
22 | // When
23 | $executor->process(['foo', 'bar']);
24 |
25 | // Then
26 | $records = $handler->getRecords();
27 | expect($records)->toHaveCount(12)->and($records)->sequence(
28 | fn ($record) => $record->message->toEqual('Initializing ETL...')->and($record->level->toBe(Level::Debug)),
29 | fn ($record) => $record->message->toEqual('Starting ETL...')->and($record->level->toBe(Level::Info)),
30 | fn ($record) => $record->message->toContain('Extracting item')->and($record->level->toBe(Level::Debug)),
31 | fn ($record) => $record->message->toContain('Transformed item')->and($record->level->toBe(Level::Debug)),
32 | fn ($record) => $record->message->toContain('Loaded item')->and($record->level->toBe(Level::Debug)),
33 | fn ($record) => $record->message->toContain('Flushing {nb} items (early)...')->and($record->level->toBe(Level::Info)),
34 | fn ($record) => $record->message->toContain('Extracting item')->and($record->level->toBe(Level::Debug)),
35 | fn ($record) => $record->message->toContain('Transformed item')->and($record->level->toBe(Level::Debug)),
36 | fn ($record) => $record->message->toContain('Loaded item')->and($record->level->toBe(Level::Debug)),
37 | fn ($record) => $record->message->toContain('Flushing {nb} items (early)...')->and($record->level->toBe(Level::Info)),
38 | fn ($record) => $record->message->toContain('Flushing {nb} items...')->and($record->level->toBe(Level::Info)),
39 | fn ($record) => $record->message->toContain('ETL complete.')->and($record->level->toBe(Level::Info)),
40 | );
41 | });
42 |
--------------------------------------------------------------------------------
/tests/Unit/Recipe/RecipeTest.php:
--------------------------------------------------------------------------------
1 | onInit(function () use (&$hasReceivedInitEvent) {
20 | $hasReceivedInitEvent = true;
21 | })
22 | ->onEnd(function () use (&$hasReceivedEndEvent) {
23 | $hasReceivedEndEvent = true;
24 | });
25 | },
26 | fn (EtlExecutor $executor) => $executor->withContext(['foo' => 'bar'])
27 | );
28 |
29 | // When
30 | $report = $executor->process([]);
31 |
32 | // Then
33 | expect($hasReceivedInitEvent)->toBeTrue()
34 | ->and($hasReceivedEndEvent)->toBeTrue()
35 | ->and($report->context)->toBe(['foo' => 'bar'])
36 | ;
37 | });
38 |
--------------------------------------------------------------------------------
/tests/Unit/Transformer/CallableTransformerTest.php:
--------------------------------------------------------------------------------
1 | yield strtoupper($value));
16 |
17 | // When
18 | $transformed = $transformer->transform('foo', $state);
19 |
20 | // Then
21 | expect([...$transformed])->toBe(['FOO']);
22 | });
23 |
--------------------------------------------------------------------------------
/tests/Unit/Transformer/ChainTransformerTest.php:
--------------------------------------------------------------------------------
1 | strrev($item)
22 | ));
23 | $executor = $executor->transformWith(
24 | chain($executor->transformer)
25 | ->with(function (string $item): Generator {
26 | yield $item;
27 | yield strtoupper($item);
28 | })
29 | ->with(fn (Generator $items): array => [...$items])
30 | ->with(function (array $items): array {
31 | $items[] = 'hey';
32 |
33 | return $items;
34 | })
35 | ->with(fn (array $items): string => implode('-', $items)),
36 | );
37 |
38 | // When
39 | $report = $executor->process($input);
40 |
41 | // Then
42 | expect($report->output)->toBe([
43 | 'oof-OOF-hey',
44 | 'rab-RAB-hey',
45 | ]);
46 | });
47 |
48 | it('silently chains transformers', function () {
49 | // Given
50 | $input = ['foo', 'bar'];
51 |
52 | $etl = (new EtlExecutor())
53 | ->transformWith(
54 | fn (string $item): string => strrev($item),
55 | function (string $item): Generator {
56 | yield $item;
57 | yield strtoupper($item);
58 | },
59 | fn (Generator $items): array => [...$items],
60 | function (array $items): array {
61 | $items[] = 'hey';
62 |
63 | return $items;
64 | },
65 | fn (array $items) => yield implode('-', $items)
66 | );
67 |
68 | // When
69 | $report = $etl->process($input);
70 |
71 | // Then
72 | expect($report->output)->toBe([
73 | 'oof-OOF-hey',
74 | 'rab-RAB-hey',
75 | ]);
76 | });
77 |
--------------------------------------------------------------------------------
/tests/Unit/Transformer/NullTransformerTest.php:
--------------------------------------------------------------------------------
1 | transform('foo', $state);
19 |
20 | // Then
21 | expect($transformedItems)->toBe('foo');
22 | });
23 |
--------------------------------------------------------------------------------