├── .gitignore
├── .scrutinizer.yml
├── .travis.yml
├── LICENSE
├── README.md
├── behat.yml
├── composer.json
├── examples
├── 01_default_csv_noop_console.php
├── 02_default_csv_callback_console.php
├── 03_default_amazon_chain_console.php
├── 04_default_amazon_chain_console_with_events.php
├── doctrine
│ ├── 01_default_doctrine_query_console.php
│ ├── 02_default_doctrine_prepared_query_console.php
│ ├── 03_default_doctrine_query_dbal_loader.php
│ └── mysql-bootstrap.php
└── lib.php
├── features
├── bootstrap
│ ├── BaseContext.php
│ ├── DefaultPipelineContext.php
│ └── HookContext.php
└── default_pipeline.feature
├── fixtures
├── books.csv
└── mysql
│ └── books.sql
├── spec
└── Extraload
│ ├── Extractor
│ └── CsvExtractorSpec.php
│ ├── Loader
│ ├── ConsoleLoaderSpec.php
│ └── Doctrine
│ │ └── DbalLoaderSpec.php
│ ├── Pipeline
│ └── DefaultPipelineSpec.php
│ └── Transformer
│ ├── CallbackTransformerSpec.php
│ ├── NoopTransformerSpec.php
│ ├── PropertyTransformerSpec.php
│ └── TransformerChainSpec.php
└── src
└── Extraload
├── Events.php
├── Extractor
├── CsvExtractor.php
├── Doctrine
│ └── QueryExtractor.php
└── ExtractorInterface.php
├── Loader
├── AutoFlushLoader.php
├── ConsoleLoader.php
├── Doctrine
│ └── DbalLoader.php
└── LoaderInterface.php
├── Pipeline
├── DefaultPipeline.php
└── PipelineInterface.php
└── Transformer
├── CallbackTransformer.php
├── NoopTransformer.php
├── PropertyTransformer.php
├── TransformerChain.php
└── TransformerInterface.php
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | vendor
3 | composer.lock
4 |
--------------------------------------------------------------------------------
/.scrutinizer.yml:
--------------------------------------------------------------------------------
1 | filter:
2 | paths:
3 | - src/*
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: php
2 |
3 | sudo: false
4 |
5 | php:
6 | - 5.6
7 | - 7.0
8 |
9 | before_script:
10 | - composer install --dev
11 |
12 | script:
13 | - bin/phpspec run -f dot
14 | - bin/behat -f progress
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Saša Stamenković
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
6 |
7 | symfony upgrade fixer •
8 | twig gettext extractor •
9 | wisdom •
10 | centipede •
11 | permissions handler •
12 | extraload •
13 | gravatar •
14 | locurro •
15 | country list •
16 | transliterator
17 |
18 |
19 | # Extraload [](https://travis-ci.org/umpirsky/Extraload) [](https://scrutinizer-ci.com/g/umpirsky/Extraload/?branch=master)
20 |
21 | Powerful ETL library.
22 |
23 |
24 | ## Examples
25 |
26 | ### Dumping CSV data into the console
27 |
28 | Input data is given in csv format:
29 | ```csv
30 | "99921-58-10-7", "Divine Comedy", "Dante Alighieri"
31 | "9971-5-0210-0", "A Tale of Two Cities", "Charles Dickens"
32 | "960-425-059-0", "The Lord of the Rings", "J. R. R. Tolkien"
33 | "80-902734-1-6", "And Then There Were None", "Agatha Christie"
34 | ```
35 | With:
36 | ```php
37 | (new DefaultPipeline(
38 | new CsvExtractor(
39 | new \SplFileObject('books.csv')
40 | ),
41 | new NoopTransformer(),
42 | new ConsoleLoader(
43 | new Table(new ConsoleOutput())
44 | )
45 | ))->process();
46 | ```
47 | It can be dumped as table to console:
48 | ```
49 | +---------------+--------------------------+------------------+
50 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
51 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens |
52 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien |
53 | | 80-902734-1-6 | And Then There Were None | Agatha Christie |
54 | +---------------+--------------------------+------------------+
55 | ```
56 | In this example `NoopTransformer` is used, but various transformations can be applied. Transformers can also be chained using `TransformerChain`.
57 |
58 | ### Dumping a Doctrine query into the console
59 |
60 | First of all make sure to load the fixtures into a database -- this example works with MySQL:
61 |
62 | mysql> source /home/standard/projects/Extraload/fixtures/mysql/books.sql
63 |
64 | So the following code:
65 |
66 | ```php
67 | (new DefaultPipeline(
68 | new QueryExtractor($conn, 'SELECT * FROM books'),
69 | new NoopTransformer(),
70 | new ConsoleLoader(
71 | new Table($output = new ConsoleOutput())
72 | )
73 | ))->process();
74 | ```
75 |
76 | Will dump these results to the console:
77 |
78 | +---------------+--------------------------+------------------+
79 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
80 | | 9781847493583 | La Vita Nuova | Dante Alighieri |
81 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens |
82 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien |
83 | | 80-902734-1-6 | And Then There Were None | Agatha Christie |
84 | +---------------+--------------------------+------------------+
85 |
86 | ### Dumping a Doctrine prepared query into the console
87 |
88 | The following code:
89 |
90 | ```php
91 | // ...
92 |
93 | $sql = "SELECT * FROM books WHERE author = :author";
94 | $values = [
95 | [
96 | 'parameter' => ':author',
97 | 'value' => 'Dante Alighieri',
98 | 'data_type' => PDO::PARAM_STR // optional
99 | ]
100 | ];
101 |
102 | (new DefaultPipeline(
103 | new QueryExtractor($conn, $sql, $values),
104 | new NoopTransformer(),
105 | new ConsoleLoader(
106 | new Table($output = new ConsoleOutput())
107 | )
108 | ))->process();
109 | ```
110 |
111 | Will dump these results to the console:
112 |
113 | +---------------+---------------+-----------------+
114 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
115 | | 9781847493583 | La Vita Nuova | Dante Alighieri |
116 | +---------------+---------------+-----------------+
117 |
118 | ### Dumping a Doctrine query into a table
119 |
120 | The following code:
121 |
122 | ```php
123 | // ...
124 |
125 | (new DefaultPipeline(
126 | new QueryExtractor($conn, 'SELECT * FROM books'),
127 | new NoopTransformer(),
128 | new DbalLoader($conn, 'my_books')
129 | ))->process();
130 | ```
131 |
132 | Will dump the results into the `my_books` table:
133 |
134 | mysql> select * from my_books;
135 | +----------------+--------------------------+----------------------------+
136 | | isbn | title | author |
137 | +----------------+--------------------------+----------------------------+
138 | | 9781503262140 | Faust | Johann Wolfgang von Goethe |
139 | | 978-0156949606 | The Waves | Virgina Woolf |
140 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
141 | | 9781847493583 | La Vita Nuova | Dante Alighieri |
142 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens |
143 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien |
144 | | 80-902734-1-6 | And Then There Were None | Agatha Christie |
145 | +----------------+--------------------------+----------------------------+
146 | 7 rows in set (0.00 sec)
147 |
148 | See more [examples](https://github.com/umpirsky/Extraload/tree/master/examples).
149 |
150 | ## 2. Inspiration
151 |
152 | Inspired by [php-etl](https://github.com/docteurklein/php-etl) and [petl](https://github.com/alimanfoo/petl).
153 |
--------------------------------------------------------------------------------
/behat.yml:
--------------------------------------------------------------------------------
1 | default:
2 | suites:
3 | default:
4 | contexts:
5 | - HookContext
6 | - DefaultPipelineContext
7 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "umpirsky/extraload",
3 | "description": "Powerful ETL library.",
4 | "keywords": [
5 | "etl",
6 | "extract",
7 | "transform",
8 | "convert",
9 | "load",
10 | "import",
11 | "export"
12 | ],
13 | "homepage": "http://umpirsky.com",
14 | "type": "library",
15 | "require": {
16 | "php": ">=5.6",
17 | "symfony/event-dispatcher": "^2.7 || ^3.0 || ^4.0"
18 | },
19 | "require-dev": {
20 | "phpspec/phpspec": "^3.0",
21 | "behat/behat": "^3.1",
22 | "phpunit/phpunit": "^5.1",
23 | "symfony/console": "^3.0",
24 | "doctrine/dbal": "^2.5",
25 | "symfony/property-access": "^3.0",
26 | "behat/mink-selenium2-driver": "^1.3"
27 | },
28 | "suggest": {
29 | "doctrine/dbal": "Allows loading data into database using DBAL loader",
30 | "symfony/property-access": "Allows applying transformer on property by path",
31 | "symfony/console": "Allows displaying tabular data in console"
32 | },
33 | "license": "MIT",
34 | "authors": [
35 | {
36 | "name": "Saša Stamenković",
37 | "email": "umpirsky@gmail.com"
38 | }
39 | ],
40 | "autoload": {
41 | "psr-0": { "Extraload\\": "src/" }
42 | },
43 | "config": {
44 | "bin-dir": "bin"
45 | },
46 | "extra": {
47 | "branch-alias": {
48 | "dev-master": "0.1.0-dev"
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/examples/01_default_csv_noop_console.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | process();
22 |
--------------------------------------------------------------------------------
/examples/02_default_csv_callback_console.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | process();
26 |
--------------------------------------------------------------------------------
/examples/03_default_amazon_chain_console.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | new Session(new Selenium2Driver()),
21 | ])),
22 | new TransformerChain([
23 | new DocumentToElementTransformer(),
24 | new ElementToStringTransformer(),
25 | new PropertyTransformer(
26 | new CallbackTransformer('truncate'),
27 | PropertyAccess::createPropertyAccessor(),
28 | '[title]'
29 | ),
30 | ]),
31 | new ConsoleLoader(
32 | new Table($output = new ConsoleOutput())
33 | )
34 | ))->process();
35 |
--------------------------------------------------------------------------------
/examples/04_default_amazon_chain_console_with_events.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | addListener(Events::LOAD, function (GenericEvent $event) use ($output) {
25 | $output->writeln(sprintf('Loading %s', $event->getSubject()['title']));
26 | });
27 |
28 | (new DefaultPipeline(
29 | new AmazonExtractor(new Mink([
30 | 'selenium2' => new Session(new Selenium2Driver()),
31 | ])),
32 | new TransformerChain([
33 | new DocumentToElementTransformer(),
34 | new ElementToStringTransformer(),
35 | new PropertyTransformer(
36 | new CallbackTransformer('truncate'),
37 | PropertyAccess::createPropertyAccessor(),
38 | '[title]'
39 | ),
40 | ]),
41 | new ConsoleLoader(new Table($output)),
42 | $dispatcher
43 | ))->process();
44 |
--------------------------------------------------------------------------------
/examples/doctrine/01_default_doctrine_query_console.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | process();
21 |
--------------------------------------------------------------------------------
/examples/doctrine/02_default_doctrine_prepared_query_console.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | ':author',
18 | 'value' => 'Dante Alighieri',
19 | 'data_type' => PDO::PARAM_STR // optional
20 | ]
21 | ];
22 |
23 | (new DefaultPipeline(
24 | new QueryExtractor($conn, $sql, $values),
25 | new NoopTransformer(),
26 | new ConsoleLoader(
27 | new Table($output = new ConsoleOutput())
28 | )
29 | ))->process();
30 |
--------------------------------------------------------------------------------
/examples/doctrine/03_default_doctrine_query_dbal_loader.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | process();
19 |
--------------------------------------------------------------------------------
/examples/doctrine/mysql-bootstrap.php:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | 'extraload_fixtures',
11 | 'user' => 'extraload_fixtures',
12 | 'password' => 'password',
13 | 'host' => 'localhost',
14 | 'driver' => 'pdo_mysql',
15 | );
16 | $conn = \Doctrine\DBAL\DriverManager::getConnection($connectionParams, $config);
17 |
--------------------------------------------------------------------------------
/examples/lib.php:
--------------------------------------------------------------------------------
1 | mink = $mink;
20 |
21 | $this->getSession()->visit('http://www.amazon.com/gp/goldbox');
22 |
23 | foreach ($this->getSession()->getPage()->findAll('css', '#widgetContent div.dealTile > a') as $element) {
24 | $this->urls[] = $element->getAttribute('href');
25 | }
26 | }
27 |
28 | public function extract()
29 | {
30 | $data = $this->current();
31 |
32 | $this->next();
33 |
34 | return $data;
35 | }
36 |
37 | public function current()
38 | {
39 | if (!$this->valid()) {
40 | return;
41 | }
42 |
43 | $this->getSession()->visit($this->urls[$this->key()]);
44 |
45 | return $this->getSession()->getPage();
46 | }
47 |
48 | public function key()
49 | {
50 | return $this->index;
51 | }
52 |
53 | public function next()
54 | {
55 | ++$this->index;
56 | }
57 |
58 | public function rewind()
59 | {
60 | $this->index = 0;
61 | }
62 |
63 | public function valid()
64 | {
65 | return isset($this->urls[$this->key()]);
66 | }
67 |
68 | private function getSession()
69 | {
70 | return $this->mink->getSession('selenium2');
71 | }
72 | }
73 |
74 | class DocumentToElementTransformer implements TransformerInterface
75 | {
76 | public function transform($data)
77 | {
78 | if (!$data instanceof DocumentElement) {
79 | throw new \InvalidArgumentException('Can transform only DocumentElement.');
80 | }
81 |
82 | return [
83 | 'title' => $data->findById('title'),
84 | 'price' => $data->findById('priceblock_dealprice'),
85 | ];
86 | }
87 | }
88 |
89 | class ElementToStringTransformer implements TransformerInterface
90 | {
91 | public function transform($data)
92 | {
93 | foreach ($data as $key => $element) {
94 | if (null === $element) {
95 | return;
96 | }
97 |
98 | if (!$element instanceof Element) {
99 | throw new \InvalidArgumentException('Can transform only Element.');
100 | }
101 |
102 | $data[$key] = $element->getText();
103 | }
104 |
105 | return $data;
106 | }
107 | }
108 |
109 | function truncate($value)
110 | {
111 | $length = 30;
112 |
113 | if (strlen($value) > $length) {
114 | if (false !== ($breakpoint = strpos($value, ' ', $length))) {
115 | $length = $breakpoint;
116 | }
117 |
118 | return rtrim(substr($value, 0, $length)).'...';
119 | }
120 |
121 | return $value;
122 | }
123 |
--------------------------------------------------------------------------------
/features/bootstrap/BaseContext.php:
--------------------------------------------------------------------------------
1 | createDirectory(dirname($path));
15 |
16 | file_put_contents($path, $content);
17 |
18 | return $path;
19 | }
20 |
21 | protected function createDirectory($path)
22 | {
23 | if (!is_dir($path)) {
24 | mkdir($path, 0777, true);
25 | }
26 | }
27 |
28 | protected function stringNodeToString(PyStringNode $string)
29 | {
30 | return strtr((string) $string, array("'''" => '"""'));
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/features/bootstrap/DefaultPipelineContext.php:
--------------------------------------------------------------------------------
1 | pipeline = new DefaultPipeline(
31 | $this->createCsvExtractor(),
32 | $this->createTransformer($transformer),
33 | new ConsoleLoader(
34 | new Table($this->output = new BufferedOutput())
35 | )
36 | );
37 | }
38 |
39 | /**
40 | * @Given I create csv to database pipeline
41 | */
42 | public function iCreateCsvToDatabasePipeline()
43 | {
44 | return $this->pipeline = new DefaultPipeline(
45 | $this->createCsvExtractor(),
46 | $this->createTransformer('callable'),
47 | new DbalLoader(
48 | $this->getConnection(),
49 | $this->workingTable
50 | )
51 | );
52 | }
53 |
54 | /**
55 | * @Given I process it
56 | */
57 | public function iProcessIt()
58 | {
59 | $this->pipeline->process();
60 | }
61 |
62 | /**
63 | * @Then I should see in console:
64 | */
65 | public function iShouldSeeInConsole(PyStringNode $expected)
66 | {
67 | $expected = $this->stringNodeToString($expected);
68 | $actual = trim($this->output->fetch());
69 |
70 | PHPUnit_Framework_Assert::assertEquals($expected, $actual);
71 | }
72 |
73 | /**
74 | * @Then I should see in database:
75 | */
76 | public function iShouldSeeInDatabase(TableNode $table)
77 | {
78 | $actual = $this->getConnection()
79 | ->createQueryBuilder()
80 | ->select('*')
81 | ->from($this->workingTable)
82 | ->execute()
83 | ->fetchAll()
84 | ;
85 |
86 | foreach ($table->getHash() as $key => $expected) {
87 | PHPUnit_Framework_Assert::assertEquals($expected, $actual[$key]);
88 | }
89 | }
90 |
91 | private function createCsvExtractor()
92 | {
93 | return new CsvExtractor(new \SplFileObject(__DIR__.'/../../fixtures/books.csv'));
94 | }
95 |
96 | private function createTransformer($type)
97 | {
98 | switch ($type) {
99 | case 'callable':
100 | return new CallbackTransformer(function ($data) {
101 | return [
102 | 'isbn' => $data[0],
103 | 'title' => $data[1],
104 | 'author' => $data[2],
105 | ];
106 | });
107 |
108 | case 'chain':
109 | return new TransformerChain([
110 | new CallbackTransformer(function ($data) {
111 | unset($data[0]);
112 |
113 | return $data;
114 | }),
115 | new CallbackTransformer(function ($data) {
116 | return [
117 | 'title' => $data[1],
118 | 'author' => $data[2],
119 | ];
120 | }),
121 | ]);
122 | }
123 |
124 | throw new PendingException(sprintf('Implement %s transformer creator.', $type));
125 | }
126 |
127 | private function getConnection()
128 | {
129 | if (null === $this->connection) {
130 | $this->connection = DriverManager::getConnection(['url' => 'sqlite:///:memory:']);
131 | $this->connection->exec(sprintf('CREATE TABLE %s(isbn, title, author)', $this->workingTable));
132 | }
133 |
134 | return $this->connection;
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/features/bootstrap/HookContext.php:
--------------------------------------------------------------------------------
1 | clearDirectory(self::$workingDirectory);
25 | }
26 |
27 | private function clearDirectory($path)
28 | {
29 | $files = scandir($path);
30 | array_shift($files);
31 | array_shift($files);
32 |
33 | foreach ($files as $file) {
34 | $file = $path.DIRECTORY_SEPARATOR.$file;
35 | if (is_dir($file)) {
36 | $this->clearDirectory($file);
37 | } else {
38 | unlink($file);
39 | }
40 | }
41 |
42 | rmdir($path);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/features/default_pipeline.feature:
--------------------------------------------------------------------------------
1 | Feature: Default pipeline
2 | In order to run sequentiel ETL process
3 | As ETL developer
4 | I need to be able to process default ETL pipeline
5 |
6 | Scenario: Dump CSV file to console table
7 | Given I create csv to console pipeline using "callable" transformer
8 | And I process it
9 | Then I should see in console:
10 | """
11 | +---------------+--------------------------+------------------+
12 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
13 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens |
14 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien |
15 | | 80-902734-1-6 | And Then There Were None | Agatha Christie |
16 | +---------------+--------------------------+------------------+
17 | """
18 |
19 | Scenario: Dump CSV file to console table using transformer chain
20 | Given I create csv to console pipeline using "chain" transformer
21 | And I process it
22 | Then I should see in console:
23 | """
24 | +--------------------------+------------------+
25 | | Divine Comedy | Dante Alighieri |
26 | | A Tale of Two Cities | Charles Dickens |
27 | | The Lord of the Rings | J. R. R. Tolkien |
28 | | And Then There Were None | Agatha Christie |
29 | +--------------------------+------------------+
30 | """
31 |
32 | Scenario: Import CSV file into database
33 | Given I create csv to database pipeline
34 | And I process it
35 | Then I should see in database:
36 | | isbn | title | author |
37 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri |
38 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens |
39 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien |
40 | | 80-902734-1-6 | And Then There Were None | Agatha Christie |
41 |
--------------------------------------------------------------------------------
/fixtures/books.csv:
--------------------------------------------------------------------------------
1 | "99921-58-10-7", "Divine Comedy", "Dante Alighieri"
2 | "9971-5-0210-0", "A Tale of Two Cities", "Charles Dickens"
3 | "960-425-059-0", "The Lord of the Rings", "J. R. R. Tolkien"
4 | "80-902734-1-6", "And Then There Were None", "Agatha Christie"
5 |
--------------------------------------------------------------------------------
/fixtures/mysql/books.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE extraload_fixtures;
2 |
3 | USE extraload_fixtures;
4 |
5 | GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, INDEX, ALTER, LOCK TABLES, CREATE TEMPORARY TABLES
6 | ON extraload_fixtures.* TO 'extraload_fixtures'@'localhost' IDENTIFIED BY 'password';
7 |
8 | CREATE TABLE books (
9 | isbn VARCHAR(16) NOT NULL,
10 | title VARCHAR(128) NOT NULL,
11 | author VARCHAR(128) NOT NULL
12 | );
13 |
14 | CREATE TABLE my_books (
15 | isbn VARCHAR(16) NOT NULL,
16 | title VARCHAR(128) NOT NULL,
17 | author VARCHAR(128) NOT NULL
18 | );
19 |
20 | INSERT INTO books (isbn, title, author)
21 | VALUES
22 | ('99921-58-10-7', 'Divine Comedy', 'Dante Alighieri'),
23 | ('9781847493583', 'La Vita Nuova', 'Dante Alighieri'),
24 | ('9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens'),
25 | ('960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien'),
26 | ('80-902734-1-6', 'And Then There Were None', 'Agatha Christie');
27 |
28 | INSERT INTO my_books (isbn, title, author)
29 | VALUES
30 | ('9781503262140', 'Faust', 'Johann Wolfgang von Goethe'),
31 | ('978-0156949606', 'The Waves', 'Virgina Woolf');
32 |
--------------------------------------------------------------------------------
/spec/Extraload/Extractor/CsvExtractorSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith(new \SplFileObject(__DIR__.'/../../../fixtures/books.csv'));
12 | }
13 |
14 | function it_is_initializable()
15 | {
16 | $this->shouldHaveType('Extraload\Extractor\CsvExtractor');
17 | }
18 |
19 | function it_implements_extractor_interface()
20 | {
21 | $this->shouldImplement('Extraload\Extractor\ExtractorInterface');
22 | }
23 |
24 | function it_iterates_over_csv_rows()
25 | {
26 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']);
27 | $this->extract()->shouldReturn(['9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens']);
28 | $this->extract()->shouldReturn(['960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien']);
29 | $this->extract()->shouldReturn(['80-902734-1-6', 'And Then There Were None', 'Agatha Christie']);
30 | }
31 |
32 | function it_moves_over_csv_rows()
33 | {
34 | $this->current()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']);
35 | $this->next();
36 | $this->current()->shouldReturn(['9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens']);
37 | $this->next();
38 | $this->current()->shouldReturn(['960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien']);
39 | }
40 |
41 | function it_rewinds()
42 | {
43 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']);
44 | $this->rewind();
45 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/spec/Extraload/Loader/ConsoleLoaderSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith($table);
13 | }
14 |
15 | function it_is_initializable()
16 | {
17 | $this->shouldHaveType('Extraload\Loader\ConsoleLoader');
18 | }
19 |
20 | function it_implements_loader_interface()
21 | {
22 | $this->shouldImplement('Extraload\Loader\LoaderInterface');
23 | }
24 |
25 | function it_loads_data_in_console_using_table_helper(Table $table)
26 | {
27 | $table->addRow(['a1', 'b1', 'c1'])->shouldBeCalled();
28 |
29 | $this->load(['a1', 'b1', 'c1']);
30 | }
31 |
32 | function it_renders_data_in_console_on_flush(Table $table)
33 | {
34 | $table->render()->shouldBeCalled();
35 |
36 | $this->flush();
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spec/Extraload/Loader/Doctrine/DbalLoaderSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith($connection, 'data');
14 | }
15 |
16 | function it_is_initializable()
17 | {
18 | $this->shouldHaveType('Extraload\Loader\Doctrine\DbalLoader');
19 | }
20 |
21 | function it_implements_loader_interface()
22 | {
23 | $this->shouldImplement('Extraload\Loader\LoaderInterface');
24 | }
25 |
26 | function it_loads_data_into_database_using_doctrine_dbal_connection(Connection $connection)
27 | {
28 | $connection->insert('data', ['a1', 'b1', 'c1'])->shouldBeCalled();
29 |
30 | $this->load(['a1', 'b1', 'c1']);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/spec/Extraload/Pipeline/DefaultPipelineSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith($extractor, $transformer, $loader);
18 | }
19 |
20 | function it_is_initializable()
21 | {
22 | $this->shouldHaveType('Extraload\Pipeline\DefaultPipeline');
23 | }
24 |
25 | function it_implements_pipeline_interface()
26 | {
27 | $this->shouldImplement('Extraload\Pipeline\PipelineInterface');
28 | }
29 |
30 | function it_does_not_transform_nor_load_if_no_data_extracted(
31 | ExtractorInterface $extractor,
32 | TransformerInterface $transformer,
33 | LoaderInterface $loader
34 | )
35 | {
36 | $extractor->extract()->shouldBeCalled()->willReturn(null);
37 | $transformer->transform(Argument::any())->shouldNotBeCalled();
38 | $loader->flush()->shouldBeCalled();
39 |
40 | $this->process();
41 | }
42 |
43 | function it_processes_etl_sequentially(
44 | ExtractorInterface $extractor,
45 | TransformerInterface $transformer,
46 | LoaderInterface $loader
47 | )
48 | {
49 | $extractor->extract()->shouldBeCalled()->willReturn(['a1', 'b1', 'c1'], null);
50 | $transformer->transform(['a1', 'b1', 'c1'])->shouldBeCalled()->willReturn(['c1', 'b1', 'a1']);
51 | $loader->load(['c1', 'b1', 'a1'])->shouldBeCalled();
52 | $loader->flush()->shouldBeCalled();
53 |
54 | $this->process();
55 | }
56 |
57 | function it_dispatches_events_during_etl_processesing(
58 | ExtractorInterface $extractor,
59 | TransformerInterface $transformer,
60 | LoaderInterface $loader,
61 | EventDispatcherInterface $eventDispatcher
62 | )
63 | {
64 | $this->beConstructedWith($extractor, $transformer, $loader, $eventDispatcher);
65 |
66 | $extractor->extract()->shouldBeCalled()->willReturn(['a1', 'b1', 'c1'], null);
67 | $transformer->transform(['a1', 'b1', 'c1'])->shouldBeCalled()->willReturn(['c1', 'b1', 'a1']);
68 | $loader->load(['c1', 'b1', 'a1'])->shouldBeCalled();
69 | $loader->flush()->shouldBeCalled();
70 |
71 | $eventDispatcher->dispatch(Events::PRE_PROCESS, Argument::any())->shouldBeCalled();
72 | $eventDispatcher->dispatch(Events::POST_PROCESS, Argument::any())->shouldBeCalled();
73 | $eventDispatcher->dispatch(Events::EXTRACT, Argument::any())->shouldBeCalled();
74 | $eventDispatcher->dispatch(Events::TRANSFORM, Argument::any())->shouldBeCalled();
75 | $eventDispatcher->dispatch(Events::LOAD, Argument::any())->shouldBeCalled();
76 |
77 | $this->process();
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/spec/Extraload/Transformer/CallbackTransformerSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith('array_reverse');
13 | }
14 |
15 | function it_is_initializable()
16 | {
17 | $this->shouldHaveType('Extraload\Transformer\CallbackTransformer');
18 | }
19 |
20 | function it_implements_transformer_interface()
21 | {
22 | $this->shouldImplement('Extraload\Transformer\TransformerInterface');
23 | }
24 |
25 | function it_transforms_data_using_callback()
26 | {
27 | $this->transform(['foo', 'bar'])->shouldReturn(['bar', 'foo']);
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/spec/Extraload/Transformer/NoopTransformerSpec.php:
--------------------------------------------------------------------------------
1 | shouldHaveType('Extraload\Transformer\NoopTransformer');
13 | }
14 |
15 | function it_implements_transformer_interface()
16 | {
17 | $this->shouldImplement('Extraload\Transformer\TransformerInterface');
18 | }
19 |
20 | function it_returns_original_data()
21 | {
22 | $this->transform(['foo', 'bar'])->shouldReturn(['foo', 'bar']);
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/spec/Extraload/Transformer/PropertyTransformerSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith($transformer, PropertyAccess::createPropertyAccessor(), '[bar]');
14 | }
15 |
16 | function it_is_initializable()
17 | {
18 | $this->shouldHaveType('Extraload\Transformer\PropertyTransformer');
19 | }
20 |
21 | function it_implements_transformer_interface()
22 | {
23 | $this->shouldImplement('Extraload\Transformer\TransformerInterface');
24 | }
25 |
26 | function it_applies_transformer_on_given_path(TransformerInterface $transformer)
27 | {
28 | $transformer->transform('bar')->shouldBeCalled()->willReturn('Bar');
29 |
30 | $this->transform(['foo' => 'foo', 'bar' => 'bar'])->shouldReturn(['foo' => 'foo', 'bar' => 'Bar']);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/spec/Extraload/Transformer/TransformerChainSpec.php:
--------------------------------------------------------------------------------
1 | beConstructedWith([$reverseTransformer, $capitalizeTransformer]);
13 | }
14 |
15 | function it_is_initializable()
16 | {
17 | $this->shouldHaveType('Extraload\Transformer\TransformerChain');
18 | }
19 |
20 | function it_implements_transformer_interface()
21 | {
22 | $this->shouldImplement('Extraload\Transformer\TransformerInterface');
23 | }
24 |
25 | function it_transforms_using_all_registered_transformers(TransformerInterface $reverseTransformer, TransformerInterface $capitalizeTransformer)
26 | {
27 | $reverseTransformer->transform(['foo', 'bar'])->shouldBeCalled()->willReturn(['bar', 'foo']);
28 | $capitalizeTransformer->transform(['bar', 'foo'])->shouldBeCalled()->willReturn(['Bar', 'Foo']);
29 |
30 | $this->transform(['foo', 'bar'])->shouldReturn(['Bar', 'Foo']);
31 | }
32 |
33 | function it_throws_exception_if_trying_to_register_non_transformer()
34 | {
35 | $this->shouldThrow(\InvalidArgumentException::class)->during('__construct', [['NonTransformer']]);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/Extraload/Events.php:
--------------------------------------------------------------------------------
1 | file = $file;
12 | $this->file->setFlags(\SplFileObject::SKIP_EMPTY | \SplFileObject::READ_AHEAD | \SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_CSV);
13 | $this->file->setCsvControl($delimiter, $enclosure);
14 | }
15 |
16 | public function extract()
17 | {
18 | if ($this->file->eof()) {
19 | return;
20 | }
21 |
22 | $data = $this->current();
23 |
24 | $this->next();
25 |
26 | return $data;
27 | }
28 |
29 | public function current()
30 | {
31 | return $this->file->current();
32 | }
33 |
34 | public function key()
35 | {
36 | return $this->file->key();
37 | }
38 |
39 | public function next()
40 | {
41 | return $this->file->next();
42 | }
43 |
44 | public function rewind()
45 | {
46 | return $this->file->rewind();
47 | }
48 |
49 | public function valid()
50 | {
51 | return $this->file->valid();
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/Extraload/Extractor/Doctrine/QueryExtractor.php:
--------------------------------------------------------------------------------
1 | stmt = $conn->prepare($sql);
19 |
20 | foreach ($values as $value) {
21 | $this->stmt->bindValue(
22 | $value['parameter'],
23 | $value['value'],
24 | $value['data_type'] ?? null
25 | );
26 | }
27 |
28 | $this->stmt->execute();
29 |
30 | $this->position = 0;
31 |
32 | $this->data = [];
33 | }
34 |
35 | public function extract()
36 | {
37 | if (false !== $this->data[$this->position] = $this->stmt->fetch()) {
38 | $data = $this->current();
39 | $this->next();
40 | return $data;
41 | }
42 | }
43 |
44 | public function current()
45 | {
46 | return $this->data[$this->position];
47 | }
48 |
49 | public function key()
50 | {
51 | return $this->position;
52 | }
53 |
54 | public function next()
55 | {
56 | $this->position += 1;
57 | }
58 |
59 | public function rewind()
60 | {
61 | $this->position = 0;
62 | }
63 |
64 | public function valid()
65 | {
66 | return isset($this->data[$this->position]);
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/Extraload/Extractor/ExtractorInterface.php:
--------------------------------------------------------------------------------
1 | table = $table;
14 | }
15 |
16 | public function load($data)
17 | {
18 | $this->table->addRow($data);
19 | }
20 |
21 | public function flush()
22 | {
23 | $this->table->render();
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/Extraload/Loader/Doctrine/DbalLoader.php:
--------------------------------------------------------------------------------
1 | connection = $connection;
17 | $this->tableName = $tableName;
18 | }
19 |
20 | public function load($data)
21 | {
22 | return $this->connection->insert($this->tableName, $data);
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/Extraload/Loader/LoaderInterface.php:
--------------------------------------------------------------------------------
1 | extractor = $extractor;
26 | $this->transformer = $transformer;
27 | $this->loader = $loader;
28 | $this->eventDispatcher = $eventDispatcher;
29 | }
30 |
31 | public function process()
32 | {
33 | $this->dispatch(Events::PRE_PROCESS);
34 |
35 | while (null !== $extracted = $this->extractor->extract()) {
36 | $this->dispatch(Events::EXTRACT, $extracted);
37 |
38 | $transformed = $this->transformer->transform($extracted);
39 | $this->dispatch(Events::TRANSFORM, $transformed);
40 |
41 | if (null === $transformed) {
42 | continue;
43 | }
44 |
45 | $this->loader->load($transformed);
46 | $this->dispatch(Events::LOAD, $transformed);
47 | }
48 |
49 | $this->loader->flush();
50 |
51 | $this->dispatch(Events::POST_PROCESS);
52 | }
53 |
54 | private function dispatch($name, $subject = null)
55 | {
56 | if (null === $this->eventDispatcher) {
57 | return;
58 | }
59 |
60 | $this->eventDispatcher->dispatch($name, new GenericEvent($subject));
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/Extraload/Pipeline/PipelineInterface.php:
--------------------------------------------------------------------------------
1 | callback = $callback;
12 | }
13 |
14 | public function transform($data)
15 | {
16 | return call_user_func($this->callback, $data);
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/Extraload/Transformer/NoopTransformer.php:
--------------------------------------------------------------------------------
1 | transformer = $transformer;
19 | $this->propertyAccessor = $propertyAccessor;
20 | $this->path = $path;
21 | }
22 |
23 | public function transform($data)
24 | {
25 | if (null === $data) {
26 | return;
27 | }
28 |
29 | $this->propertyAccessor->setValue(
30 | $data,
31 | $this->path,
32 | $this->transformer->transform(
33 | $this->propertyAccessor->getValue($data, $this->path)
34 | )
35 | );
36 |
37 | return $data;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/Extraload/Transformer/TransformerChain.php:
--------------------------------------------------------------------------------
1 | transformers = $transformers;
22 | }
23 |
24 | public function transform($data)
25 | {
26 | foreach ($this->transformers as $transformer) {
27 | $data = $transformer->transform($data);
28 | }
29 |
30 | return $data;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Extraload/Transformer/TransformerInterface.php:
--------------------------------------------------------------------------------
1 |