├── .gitignore ├── .scrutinizer.yml ├── .travis.yml ├── LICENSE ├── README.md ├── behat.yml ├── composer.json ├── examples ├── 01_default_csv_noop_console.php ├── 02_default_csv_callback_console.php ├── 03_default_amazon_chain_console.php ├── 04_default_amazon_chain_console_with_events.php ├── doctrine │ ├── 01_default_doctrine_query_console.php │ ├── 02_default_doctrine_prepared_query_console.php │ ├── 03_default_doctrine_query_dbal_loader.php │ └── mysql-bootstrap.php └── lib.php ├── features ├── bootstrap │ ├── BaseContext.php │ ├── DefaultPipelineContext.php │ └── HookContext.php └── default_pipeline.feature ├── fixtures ├── books.csv └── mysql │ └── books.sql ├── spec └── Extraload │ ├── Extractor │ └── CsvExtractorSpec.php │ ├── Loader │ ├── ConsoleLoaderSpec.php │ └── Doctrine │ │ └── DbalLoaderSpec.php │ ├── Pipeline │ └── DefaultPipelineSpec.php │ └── Transformer │ ├── CallbackTransformerSpec.php │ ├── NoopTransformerSpec.php │ ├── PropertyTransformerSpec.php │ └── TransformerChainSpec.php └── src └── Extraload ├── Events.php ├── Extractor ├── CsvExtractor.php ├── Doctrine │ └── QueryExtractor.php └── ExtractorInterface.php ├── Loader ├── AutoFlushLoader.php ├── ConsoleLoader.php ├── Doctrine │ └── DbalLoader.php └── LoaderInterface.php ├── Pipeline ├── DefaultPipeline.php └── PipelineInterface.php └── Transformer ├── CallbackTransformer.php ├── NoopTransformer.php ├── PropertyTransformer.php ├── TransformerChain.php └── TransformerInterface.php /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | vendor 3 | composer.lock 4 | -------------------------------------------------------------------------------- /.scrutinizer.yml: -------------------------------------------------------------------------------- 1 | filter: 2 | paths: 3 | - src/* 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | sudo: false 4 | 5 | php: 6 | - 5.6 7 | - 7.0 8 | 9 | before_script: 10 | - composer install --dev 11 | 12 | script: 13 | - bin/phpspec run -f dot 14 | - bin/behat -f progress 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Saša Stamenković 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | 5 |

6 |

7 | symfony upgrade fixer • 8 | twig gettext extractor • 9 | wisdom • 10 | centipede • 11 | permissions handler • 12 | extraload • 13 | gravatar • 14 | locurro • 15 | country list • 16 | transliterator 17 |

18 | 19 | # Extraload [![Build Status](https://travis-ci.org/umpirsky/Extraload.svg?branch=master)](https://travis-ci.org/umpirsky/Extraload) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/umpirsky/Extraload/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/umpirsky/Extraload/?branch=master) 20 | 21 | Powerful ETL library. 22 | 23 | 24 | ## Examples 25 | 26 | ### Dumping CSV data into the console 27 | 28 | Input data is given in csv format: 29 | ```csv 30 | "99921-58-10-7", "Divine Comedy", "Dante Alighieri" 31 | "9971-5-0210-0", "A Tale of Two Cities", "Charles Dickens" 32 | "960-425-059-0", "The Lord of the Rings", "J. R. R. Tolkien" 33 | "80-902734-1-6", "And Then There Were None", "Agatha Christie" 34 | ``` 35 | With: 36 | ```php 37 | (new DefaultPipeline( 38 | new CsvExtractor( 39 | new \SplFileObject('books.csv') 40 | ), 41 | new NoopTransformer(), 42 | new ConsoleLoader( 43 | new Table(new ConsoleOutput()) 44 | ) 45 | ))->process(); 46 | ``` 47 | It can be dumped as table to console: 48 | ``` 49 | +---------------+--------------------------+------------------+ 50 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 51 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens | 52 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien | 53 | | 80-902734-1-6 | And Then There Were None | Agatha Christie | 54 | +---------------+--------------------------+------------------+ 55 | ``` 56 | In this example `NoopTransformer` is used, but various transformations can be applied. Transformers can also be chained using `TransformerChain`. 57 | 58 | ### Dumping a Doctrine query into the console 59 | 60 | First of all make sure to load the fixtures into a database -- this example works with MySQL: 61 | 62 | mysql> source /home/standard/projects/Extraload/fixtures/mysql/books.sql 63 | 64 | So the following code: 65 | 66 | ```php 67 | (new DefaultPipeline( 68 | new QueryExtractor($conn, 'SELECT * FROM books'), 69 | new NoopTransformer(), 70 | new ConsoleLoader( 71 | new Table($output = new ConsoleOutput()) 72 | ) 73 | ))->process(); 74 | ``` 75 | 76 | Will dump these results to the console: 77 | 78 | +---------------+--------------------------+------------------+ 79 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 80 | | 9781847493583 | La Vita Nuova | Dante Alighieri | 81 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens | 82 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien | 83 | | 80-902734-1-6 | And Then There Were None | Agatha Christie | 84 | +---------------+--------------------------+------------------+ 85 | 86 | ### Dumping a Doctrine prepared query into the console 87 | 88 | The following code: 89 | 90 | ```php 91 | // ... 92 | 93 | $sql = "SELECT * FROM books WHERE author = :author"; 94 | $values = [ 95 | [ 96 | 'parameter' => ':author', 97 | 'value' => 'Dante Alighieri', 98 | 'data_type' => PDO::PARAM_STR // optional 99 | ] 100 | ]; 101 | 102 | (new DefaultPipeline( 103 | new QueryExtractor($conn, $sql, $values), 104 | new NoopTransformer(), 105 | new ConsoleLoader( 106 | new Table($output = new ConsoleOutput()) 107 | ) 108 | ))->process(); 109 | ``` 110 | 111 | Will dump these results to the console: 112 | 113 | +---------------+---------------+-----------------+ 114 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 115 | | 9781847493583 | La Vita Nuova | Dante Alighieri | 116 | +---------------+---------------+-----------------+ 117 | 118 | ### Dumping a Doctrine query into a table 119 | 120 | The following code: 121 | 122 | ```php 123 | // ... 124 | 125 | (new DefaultPipeline( 126 | new QueryExtractor($conn, 'SELECT * FROM books'), 127 | new NoopTransformer(), 128 | new DbalLoader($conn, 'my_books') 129 | ))->process(); 130 | ``` 131 | 132 | Will dump the results into the `my_books` table: 133 | 134 | mysql> select * from my_books; 135 | +----------------+--------------------------+----------------------------+ 136 | | isbn | title | author | 137 | +----------------+--------------------------+----------------------------+ 138 | | 9781503262140 | Faust | Johann Wolfgang von Goethe | 139 | | 978-0156949606 | The Waves | Virgina Woolf | 140 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 141 | | 9781847493583 | La Vita Nuova | Dante Alighieri | 142 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens | 143 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien | 144 | | 80-902734-1-6 | And Then There Were None | Agatha Christie | 145 | +----------------+--------------------------+----------------------------+ 146 | 7 rows in set (0.00 sec) 147 | 148 | See more [examples](https://github.com/umpirsky/Extraload/tree/master/examples). 149 | 150 | ## 2. Inspiration 151 | 152 | Inspired by [php-etl](https://github.com/docteurklein/php-etl) and [petl](https://github.com/alimanfoo/petl). 153 | -------------------------------------------------------------------------------- /behat.yml: -------------------------------------------------------------------------------- 1 | default: 2 | suites: 3 | default: 4 | contexts: 5 | - HookContext 6 | - DefaultPipelineContext 7 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "umpirsky/extraload", 3 | "description": "Powerful ETL library.", 4 | "keywords": [ 5 | "etl", 6 | "extract", 7 | "transform", 8 | "convert", 9 | "load", 10 | "import", 11 | "export" 12 | ], 13 | "homepage": "http://umpirsky.com", 14 | "type": "library", 15 | "require": { 16 | "php": ">=5.6", 17 | "symfony/event-dispatcher": "^2.7 || ^3.0 || ^4.0" 18 | }, 19 | "require-dev": { 20 | "phpspec/phpspec": "^3.0", 21 | "behat/behat": "^3.1", 22 | "phpunit/phpunit": "^5.1", 23 | "symfony/console": "^3.0", 24 | "doctrine/dbal": "^2.5", 25 | "symfony/property-access": "^3.0", 26 | "behat/mink-selenium2-driver": "^1.3" 27 | }, 28 | "suggest": { 29 | "doctrine/dbal": "Allows loading data into database using DBAL loader", 30 | "symfony/property-access": "Allows applying transformer on property by path", 31 | "symfony/console": "Allows displaying tabular data in console" 32 | }, 33 | "license": "MIT", 34 | "authors": [ 35 | { 36 | "name": "Saša Stamenković", 37 | "email": "umpirsky@gmail.com" 38 | } 39 | ], 40 | "autoload": { 41 | "psr-0": { "Extraload\\": "src/" } 42 | }, 43 | "config": { 44 | "bin-dir": "bin" 45 | }, 46 | "extra": { 47 | "branch-alias": { 48 | "dev-master": "0.1.0-dev" 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/01_default_csv_noop_console.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | process(); 22 | -------------------------------------------------------------------------------- /examples/02_default_csv_callback_console.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | process(); 26 | -------------------------------------------------------------------------------- /examples/03_default_amazon_chain_console.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | new Session(new Selenium2Driver()), 21 | ])), 22 | new TransformerChain([ 23 | new DocumentToElementTransformer(), 24 | new ElementToStringTransformer(), 25 | new PropertyTransformer( 26 | new CallbackTransformer('truncate'), 27 | PropertyAccess::createPropertyAccessor(), 28 | '[title]' 29 | ), 30 | ]), 31 | new ConsoleLoader( 32 | new Table($output = new ConsoleOutput()) 33 | ) 34 | ))->process(); 35 | -------------------------------------------------------------------------------- /examples/04_default_amazon_chain_console_with_events.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | addListener(Events::LOAD, function (GenericEvent $event) use ($output) { 25 | $output->writeln(sprintf('Loading %s', $event->getSubject()['title'])); 26 | }); 27 | 28 | (new DefaultPipeline( 29 | new AmazonExtractor(new Mink([ 30 | 'selenium2' => new Session(new Selenium2Driver()), 31 | ])), 32 | new TransformerChain([ 33 | new DocumentToElementTransformer(), 34 | new ElementToStringTransformer(), 35 | new PropertyTransformer( 36 | new CallbackTransformer('truncate'), 37 | PropertyAccess::createPropertyAccessor(), 38 | '[title]' 39 | ), 40 | ]), 41 | new ConsoleLoader(new Table($output)), 42 | $dispatcher 43 | ))->process(); 44 | -------------------------------------------------------------------------------- /examples/doctrine/01_default_doctrine_query_console.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | process(); 21 | -------------------------------------------------------------------------------- /examples/doctrine/02_default_doctrine_prepared_query_console.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | ':author', 18 | 'value' => 'Dante Alighieri', 19 | 'data_type' => PDO::PARAM_STR // optional 20 | ] 21 | ]; 22 | 23 | (new DefaultPipeline( 24 | new QueryExtractor($conn, $sql, $values), 25 | new NoopTransformer(), 26 | new ConsoleLoader( 27 | new Table($output = new ConsoleOutput()) 28 | ) 29 | ))->process(); 30 | -------------------------------------------------------------------------------- /examples/doctrine/03_default_doctrine_query_dbal_loader.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | process(); 19 | -------------------------------------------------------------------------------- /examples/doctrine/mysql-bootstrap.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | 'extraload_fixtures', 11 | 'user' => 'extraload_fixtures', 12 | 'password' => 'password', 13 | 'host' => 'localhost', 14 | 'driver' => 'pdo_mysql', 15 | ); 16 | $conn = \Doctrine\DBAL\DriverManager::getConnection($connectionParams, $config); 17 | -------------------------------------------------------------------------------- /examples/lib.php: -------------------------------------------------------------------------------- 1 | mink = $mink; 20 | 21 | $this->getSession()->visit('http://www.amazon.com/gp/goldbox'); 22 | 23 | foreach ($this->getSession()->getPage()->findAll('css', '#widgetContent div.dealTile > a') as $element) { 24 | $this->urls[] = $element->getAttribute('href'); 25 | } 26 | } 27 | 28 | public function extract() 29 | { 30 | $data = $this->current(); 31 | 32 | $this->next(); 33 | 34 | return $data; 35 | } 36 | 37 | public function current() 38 | { 39 | if (!$this->valid()) { 40 | return; 41 | } 42 | 43 | $this->getSession()->visit($this->urls[$this->key()]); 44 | 45 | return $this->getSession()->getPage(); 46 | } 47 | 48 | public function key() 49 | { 50 | return $this->index; 51 | } 52 | 53 | public function next() 54 | { 55 | ++$this->index; 56 | } 57 | 58 | public function rewind() 59 | { 60 | $this->index = 0; 61 | } 62 | 63 | public function valid() 64 | { 65 | return isset($this->urls[$this->key()]); 66 | } 67 | 68 | private function getSession() 69 | { 70 | return $this->mink->getSession('selenium2'); 71 | } 72 | } 73 | 74 | class DocumentToElementTransformer implements TransformerInterface 75 | { 76 | public function transform($data) 77 | { 78 | if (!$data instanceof DocumentElement) { 79 | throw new \InvalidArgumentException('Can transform only DocumentElement.'); 80 | } 81 | 82 | return [ 83 | 'title' => $data->findById('title'), 84 | 'price' => $data->findById('priceblock_dealprice'), 85 | ]; 86 | } 87 | } 88 | 89 | class ElementToStringTransformer implements TransformerInterface 90 | { 91 | public function transform($data) 92 | { 93 | foreach ($data as $key => $element) { 94 | if (null === $element) { 95 | return; 96 | } 97 | 98 | if (!$element instanceof Element) { 99 | throw new \InvalidArgumentException('Can transform only Element.'); 100 | } 101 | 102 | $data[$key] = $element->getText(); 103 | } 104 | 105 | return $data; 106 | } 107 | } 108 | 109 | function truncate($value) 110 | { 111 | $length = 30; 112 | 113 | if (strlen($value) > $length) { 114 | if (false !== ($breakpoint = strpos($value, ' ', $length))) { 115 | $length = $breakpoint; 116 | } 117 | 118 | return rtrim(substr($value, 0, $length)).'...'; 119 | } 120 | 121 | return $value; 122 | } 123 | -------------------------------------------------------------------------------- /features/bootstrap/BaseContext.php: -------------------------------------------------------------------------------- 1 | createDirectory(dirname($path)); 15 | 16 | file_put_contents($path, $content); 17 | 18 | return $path; 19 | } 20 | 21 | protected function createDirectory($path) 22 | { 23 | if (!is_dir($path)) { 24 | mkdir($path, 0777, true); 25 | } 26 | } 27 | 28 | protected function stringNodeToString(PyStringNode $string) 29 | { 30 | return strtr((string) $string, array("'''" => '"""')); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /features/bootstrap/DefaultPipelineContext.php: -------------------------------------------------------------------------------- 1 | pipeline = new DefaultPipeline( 31 | $this->createCsvExtractor(), 32 | $this->createTransformer($transformer), 33 | new ConsoleLoader( 34 | new Table($this->output = new BufferedOutput()) 35 | ) 36 | ); 37 | } 38 | 39 | /** 40 | * @Given I create csv to database pipeline 41 | */ 42 | public function iCreateCsvToDatabasePipeline() 43 | { 44 | return $this->pipeline = new DefaultPipeline( 45 | $this->createCsvExtractor(), 46 | $this->createTransformer('callable'), 47 | new DbalLoader( 48 | $this->getConnection(), 49 | $this->workingTable 50 | ) 51 | ); 52 | } 53 | 54 | /** 55 | * @Given I process it 56 | */ 57 | public function iProcessIt() 58 | { 59 | $this->pipeline->process(); 60 | } 61 | 62 | /** 63 | * @Then I should see in console: 64 | */ 65 | public function iShouldSeeInConsole(PyStringNode $expected) 66 | { 67 | $expected = $this->stringNodeToString($expected); 68 | $actual = trim($this->output->fetch()); 69 | 70 | PHPUnit_Framework_Assert::assertEquals($expected, $actual); 71 | } 72 | 73 | /** 74 | * @Then I should see in database: 75 | */ 76 | public function iShouldSeeInDatabase(TableNode $table) 77 | { 78 | $actual = $this->getConnection() 79 | ->createQueryBuilder() 80 | ->select('*') 81 | ->from($this->workingTable) 82 | ->execute() 83 | ->fetchAll() 84 | ; 85 | 86 | foreach ($table->getHash() as $key => $expected) { 87 | PHPUnit_Framework_Assert::assertEquals($expected, $actual[$key]); 88 | } 89 | } 90 | 91 | private function createCsvExtractor() 92 | { 93 | return new CsvExtractor(new \SplFileObject(__DIR__.'/../../fixtures/books.csv')); 94 | } 95 | 96 | private function createTransformer($type) 97 | { 98 | switch ($type) { 99 | case 'callable': 100 | return new CallbackTransformer(function ($data) { 101 | return [ 102 | 'isbn' => $data[0], 103 | 'title' => $data[1], 104 | 'author' => $data[2], 105 | ]; 106 | }); 107 | 108 | case 'chain': 109 | return new TransformerChain([ 110 | new CallbackTransformer(function ($data) { 111 | unset($data[0]); 112 | 113 | return $data; 114 | }), 115 | new CallbackTransformer(function ($data) { 116 | return [ 117 | 'title' => $data[1], 118 | 'author' => $data[2], 119 | ]; 120 | }), 121 | ]); 122 | } 123 | 124 | throw new PendingException(sprintf('Implement %s transformer creator.', $type)); 125 | } 126 | 127 | private function getConnection() 128 | { 129 | if (null === $this->connection) { 130 | $this->connection = DriverManager::getConnection(['url' => 'sqlite:///:memory:']); 131 | $this->connection->exec(sprintf('CREATE TABLE %s(isbn, title, author)', $this->workingTable)); 132 | } 133 | 134 | return $this->connection; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /features/bootstrap/HookContext.php: -------------------------------------------------------------------------------- 1 | clearDirectory(self::$workingDirectory); 25 | } 26 | 27 | private function clearDirectory($path) 28 | { 29 | $files = scandir($path); 30 | array_shift($files); 31 | array_shift($files); 32 | 33 | foreach ($files as $file) { 34 | $file = $path.DIRECTORY_SEPARATOR.$file; 35 | if (is_dir($file)) { 36 | $this->clearDirectory($file); 37 | } else { 38 | unlink($file); 39 | } 40 | } 41 | 42 | rmdir($path); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /features/default_pipeline.feature: -------------------------------------------------------------------------------- 1 | Feature: Default pipeline 2 | In order to run sequentiel ETL process 3 | As ETL developer 4 | I need to be able to process default ETL pipeline 5 | 6 | Scenario: Dump CSV file to console table 7 | Given I create csv to console pipeline using "callable" transformer 8 | And I process it 9 | Then I should see in console: 10 | """ 11 | +---------------+--------------------------+------------------+ 12 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 13 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens | 14 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien | 15 | | 80-902734-1-6 | And Then There Were None | Agatha Christie | 16 | +---------------+--------------------------+------------------+ 17 | """ 18 | 19 | Scenario: Dump CSV file to console table using transformer chain 20 | Given I create csv to console pipeline using "chain" transformer 21 | And I process it 22 | Then I should see in console: 23 | """ 24 | +--------------------------+------------------+ 25 | | Divine Comedy | Dante Alighieri | 26 | | A Tale of Two Cities | Charles Dickens | 27 | | The Lord of the Rings | J. R. R. Tolkien | 28 | | And Then There Were None | Agatha Christie | 29 | +--------------------------+------------------+ 30 | """ 31 | 32 | Scenario: Import CSV file into database 33 | Given I create csv to database pipeline 34 | And I process it 35 | Then I should see in database: 36 | | isbn | title | author | 37 | | 99921-58-10-7 | Divine Comedy | Dante Alighieri | 38 | | 9971-5-0210-0 | A Tale of Two Cities | Charles Dickens | 39 | | 960-425-059-0 | The Lord of the Rings | J. R. R. Tolkien | 40 | | 80-902734-1-6 | And Then There Were None | Agatha Christie | 41 | -------------------------------------------------------------------------------- /fixtures/books.csv: -------------------------------------------------------------------------------- 1 | "99921-58-10-7", "Divine Comedy", "Dante Alighieri" 2 | "9971-5-0210-0", "A Tale of Two Cities", "Charles Dickens" 3 | "960-425-059-0", "The Lord of the Rings", "J. R. R. Tolkien" 4 | "80-902734-1-6", "And Then There Were None", "Agatha Christie" 5 | -------------------------------------------------------------------------------- /fixtures/mysql/books.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE extraload_fixtures; 2 | 3 | USE extraload_fixtures; 4 | 5 | GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, INDEX, ALTER, LOCK TABLES, CREATE TEMPORARY TABLES 6 | ON extraload_fixtures.* TO 'extraload_fixtures'@'localhost' IDENTIFIED BY 'password'; 7 | 8 | CREATE TABLE books ( 9 | isbn VARCHAR(16) NOT NULL, 10 | title VARCHAR(128) NOT NULL, 11 | author VARCHAR(128) NOT NULL 12 | ); 13 | 14 | CREATE TABLE my_books ( 15 | isbn VARCHAR(16) NOT NULL, 16 | title VARCHAR(128) NOT NULL, 17 | author VARCHAR(128) NOT NULL 18 | ); 19 | 20 | INSERT INTO books (isbn, title, author) 21 | VALUES 22 | ('99921-58-10-7', 'Divine Comedy', 'Dante Alighieri'), 23 | ('9781847493583', 'La Vita Nuova', 'Dante Alighieri'), 24 | ('9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens'), 25 | ('960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien'), 26 | ('80-902734-1-6', 'And Then There Were None', 'Agatha Christie'); 27 | 28 | INSERT INTO my_books (isbn, title, author) 29 | VALUES 30 | ('9781503262140', 'Faust', 'Johann Wolfgang von Goethe'), 31 | ('978-0156949606', 'The Waves', 'Virgina Woolf'); 32 | -------------------------------------------------------------------------------- /spec/Extraload/Extractor/CsvExtractorSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith(new \SplFileObject(__DIR__.'/../../../fixtures/books.csv')); 12 | } 13 | 14 | function it_is_initializable() 15 | { 16 | $this->shouldHaveType('Extraload\Extractor\CsvExtractor'); 17 | } 18 | 19 | function it_implements_extractor_interface() 20 | { 21 | $this->shouldImplement('Extraload\Extractor\ExtractorInterface'); 22 | } 23 | 24 | function it_iterates_over_csv_rows() 25 | { 26 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']); 27 | $this->extract()->shouldReturn(['9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens']); 28 | $this->extract()->shouldReturn(['960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien']); 29 | $this->extract()->shouldReturn(['80-902734-1-6', 'And Then There Were None', 'Agatha Christie']); 30 | } 31 | 32 | function it_moves_over_csv_rows() 33 | { 34 | $this->current()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']); 35 | $this->next(); 36 | $this->current()->shouldReturn(['9971-5-0210-0', 'A Tale of Two Cities', 'Charles Dickens']); 37 | $this->next(); 38 | $this->current()->shouldReturn(['960-425-059-0', 'The Lord of the Rings', 'J. R. R. Tolkien']); 39 | } 40 | 41 | function it_rewinds() 42 | { 43 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']); 44 | $this->rewind(); 45 | $this->extract()->shouldReturn(['99921-58-10-7', 'Divine Comedy', 'Dante Alighieri']); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /spec/Extraload/Loader/ConsoleLoaderSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($table); 13 | } 14 | 15 | function it_is_initializable() 16 | { 17 | $this->shouldHaveType('Extraload\Loader\ConsoleLoader'); 18 | } 19 | 20 | function it_implements_loader_interface() 21 | { 22 | $this->shouldImplement('Extraload\Loader\LoaderInterface'); 23 | } 24 | 25 | function it_loads_data_in_console_using_table_helper(Table $table) 26 | { 27 | $table->addRow(['a1', 'b1', 'c1'])->shouldBeCalled(); 28 | 29 | $this->load(['a1', 'b1', 'c1']); 30 | } 31 | 32 | function it_renders_data_in_console_on_flush(Table $table) 33 | { 34 | $table->render()->shouldBeCalled(); 35 | 36 | $this->flush(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spec/Extraload/Loader/Doctrine/DbalLoaderSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($connection, 'data'); 14 | } 15 | 16 | function it_is_initializable() 17 | { 18 | $this->shouldHaveType('Extraload\Loader\Doctrine\DbalLoader'); 19 | } 20 | 21 | function it_implements_loader_interface() 22 | { 23 | $this->shouldImplement('Extraload\Loader\LoaderInterface'); 24 | } 25 | 26 | function it_loads_data_into_database_using_doctrine_dbal_connection(Connection $connection) 27 | { 28 | $connection->insert('data', ['a1', 'b1', 'c1'])->shouldBeCalled(); 29 | 30 | $this->load(['a1', 'b1', 'c1']); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /spec/Extraload/Pipeline/DefaultPipelineSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($extractor, $transformer, $loader); 18 | } 19 | 20 | function it_is_initializable() 21 | { 22 | $this->shouldHaveType('Extraload\Pipeline\DefaultPipeline'); 23 | } 24 | 25 | function it_implements_pipeline_interface() 26 | { 27 | $this->shouldImplement('Extraload\Pipeline\PipelineInterface'); 28 | } 29 | 30 | function it_does_not_transform_nor_load_if_no_data_extracted( 31 | ExtractorInterface $extractor, 32 | TransformerInterface $transformer, 33 | LoaderInterface $loader 34 | ) 35 | { 36 | $extractor->extract()->shouldBeCalled()->willReturn(null); 37 | $transformer->transform(Argument::any())->shouldNotBeCalled(); 38 | $loader->flush()->shouldBeCalled(); 39 | 40 | $this->process(); 41 | } 42 | 43 | function it_processes_etl_sequentially( 44 | ExtractorInterface $extractor, 45 | TransformerInterface $transformer, 46 | LoaderInterface $loader 47 | ) 48 | { 49 | $extractor->extract()->shouldBeCalled()->willReturn(['a1', 'b1', 'c1'], null); 50 | $transformer->transform(['a1', 'b1', 'c1'])->shouldBeCalled()->willReturn(['c1', 'b1', 'a1']); 51 | $loader->load(['c1', 'b1', 'a1'])->shouldBeCalled(); 52 | $loader->flush()->shouldBeCalled(); 53 | 54 | $this->process(); 55 | } 56 | 57 | function it_dispatches_events_during_etl_processesing( 58 | ExtractorInterface $extractor, 59 | TransformerInterface $transformer, 60 | LoaderInterface $loader, 61 | EventDispatcherInterface $eventDispatcher 62 | ) 63 | { 64 | $this->beConstructedWith($extractor, $transformer, $loader, $eventDispatcher); 65 | 66 | $extractor->extract()->shouldBeCalled()->willReturn(['a1', 'b1', 'c1'], null); 67 | $transformer->transform(['a1', 'b1', 'c1'])->shouldBeCalled()->willReturn(['c1', 'b1', 'a1']); 68 | $loader->load(['c1', 'b1', 'a1'])->shouldBeCalled(); 69 | $loader->flush()->shouldBeCalled(); 70 | 71 | $eventDispatcher->dispatch(Events::PRE_PROCESS, Argument::any())->shouldBeCalled(); 72 | $eventDispatcher->dispatch(Events::POST_PROCESS, Argument::any())->shouldBeCalled(); 73 | $eventDispatcher->dispatch(Events::EXTRACT, Argument::any())->shouldBeCalled(); 74 | $eventDispatcher->dispatch(Events::TRANSFORM, Argument::any())->shouldBeCalled(); 75 | $eventDispatcher->dispatch(Events::LOAD, Argument::any())->shouldBeCalled(); 76 | 77 | $this->process(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /spec/Extraload/Transformer/CallbackTransformerSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith('array_reverse'); 13 | } 14 | 15 | function it_is_initializable() 16 | { 17 | $this->shouldHaveType('Extraload\Transformer\CallbackTransformer'); 18 | } 19 | 20 | function it_implements_transformer_interface() 21 | { 22 | $this->shouldImplement('Extraload\Transformer\TransformerInterface'); 23 | } 24 | 25 | function it_transforms_data_using_callback() 26 | { 27 | $this->transform(['foo', 'bar'])->shouldReturn(['bar', 'foo']); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /spec/Extraload/Transformer/NoopTransformerSpec.php: -------------------------------------------------------------------------------- 1 | shouldHaveType('Extraload\Transformer\NoopTransformer'); 13 | } 14 | 15 | function it_implements_transformer_interface() 16 | { 17 | $this->shouldImplement('Extraload\Transformer\TransformerInterface'); 18 | } 19 | 20 | function it_returns_original_data() 21 | { 22 | $this->transform(['foo', 'bar'])->shouldReturn(['foo', 'bar']); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /spec/Extraload/Transformer/PropertyTransformerSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($transformer, PropertyAccess::createPropertyAccessor(), '[bar]'); 14 | } 15 | 16 | function it_is_initializable() 17 | { 18 | $this->shouldHaveType('Extraload\Transformer\PropertyTransformer'); 19 | } 20 | 21 | function it_implements_transformer_interface() 22 | { 23 | $this->shouldImplement('Extraload\Transformer\TransformerInterface'); 24 | } 25 | 26 | function it_applies_transformer_on_given_path(TransformerInterface $transformer) 27 | { 28 | $transformer->transform('bar')->shouldBeCalled()->willReturn('Bar'); 29 | 30 | $this->transform(['foo' => 'foo', 'bar' => 'bar'])->shouldReturn(['foo' => 'foo', 'bar' => 'Bar']); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /spec/Extraload/Transformer/TransformerChainSpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith([$reverseTransformer, $capitalizeTransformer]); 13 | } 14 | 15 | function it_is_initializable() 16 | { 17 | $this->shouldHaveType('Extraload\Transformer\TransformerChain'); 18 | } 19 | 20 | function it_implements_transformer_interface() 21 | { 22 | $this->shouldImplement('Extraload\Transformer\TransformerInterface'); 23 | } 24 | 25 | function it_transforms_using_all_registered_transformers(TransformerInterface $reverseTransformer, TransformerInterface $capitalizeTransformer) 26 | { 27 | $reverseTransformer->transform(['foo', 'bar'])->shouldBeCalled()->willReturn(['bar', 'foo']); 28 | $capitalizeTransformer->transform(['bar', 'foo'])->shouldBeCalled()->willReturn(['Bar', 'Foo']); 29 | 30 | $this->transform(['foo', 'bar'])->shouldReturn(['Bar', 'Foo']); 31 | } 32 | 33 | function it_throws_exception_if_trying_to_register_non_transformer() 34 | { 35 | $this->shouldThrow(\InvalidArgumentException::class)->during('__construct', [['NonTransformer']]); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Extraload/Events.php: -------------------------------------------------------------------------------- 1 | file = $file; 12 | $this->file->setFlags(\SplFileObject::SKIP_EMPTY | \SplFileObject::READ_AHEAD | \SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_CSV); 13 | $this->file->setCsvControl($delimiter, $enclosure); 14 | } 15 | 16 | public function extract() 17 | { 18 | if ($this->file->eof()) { 19 | return; 20 | } 21 | 22 | $data = $this->current(); 23 | 24 | $this->next(); 25 | 26 | return $data; 27 | } 28 | 29 | public function current() 30 | { 31 | return $this->file->current(); 32 | } 33 | 34 | public function key() 35 | { 36 | return $this->file->key(); 37 | } 38 | 39 | public function next() 40 | { 41 | return $this->file->next(); 42 | } 43 | 44 | public function rewind() 45 | { 46 | return $this->file->rewind(); 47 | } 48 | 49 | public function valid() 50 | { 51 | return $this->file->valid(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/Extraload/Extractor/Doctrine/QueryExtractor.php: -------------------------------------------------------------------------------- 1 | stmt = $conn->prepare($sql); 19 | 20 | foreach ($values as $value) { 21 | $this->stmt->bindValue( 22 | $value['parameter'], 23 | $value['value'], 24 | $value['data_type'] ?? null 25 | ); 26 | } 27 | 28 | $this->stmt->execute(); 29 | 30 | $this->position = 0; 31 | 32 | $this->data = []; 33 | } 34 | 35 | public function extract() 36 | { 37 | if (false !== $this->data[$this->position] = $this->stmt->fetch()) { 38 | $data = $this->current(); 39 | $this->next(); 40 | return $data; 41 | } 42 | } 43 | 44 | public function current() 45 | { 46 | return $this->data[$this->position]; 47 | } 48 | 49 | public function key() 50 | { 51 | return $this->position; 52 | } 53 | 54 | public function next() 55 | { 56 | $this->position += 1; 57 | } 58 | 59 | public function rewind() 60 | { 61 | $this->position = 0; 62 | } 63 | 64 | public function valid() 65 | { 66 | return isset($this->data[$this->position]); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/Extraload/Extractor/ExtractorInterface.php: -------------------------------------------------------------------------------- 1 | table = $table; 14 | } 15 | 16 | public function load($data) 17 | { 18 | $this->table->addRow($data); 19 | } 20 | 21 | public function flush() 22 | { 23 | $this->table->render(); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/Extraload/Loader/Doctrine/DbalLoader.php: -------------------------------------------------------------------------------- 1 | connection = $connection; 17 | $this->tableName = $tableName; 18 | } 19 | 20 | public function load($data) 21 | { 22 | return $this->connection->insert($this->tableName, $data); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/Extraload/Loader/LoaderInterface.php: -------------------------------------------------------------------------------- 1 | extractor = $extractor; 26 | $this->transformer = $transformer; 27 | $this->loader = $loader; 28 | $this->eventDispatcher = $eventDispatcher; 29 | } 30 | 31 | public function process() 32 | { 33 | $this->dispatch(Events::PRE_PROCESS); 34 | 35 | while (null !== $extracted = $this->extractor->extract()) { 36 | $this->dispatch(Events::EXTRACT, $extracted); 37 | 38 | $transformed = $this->transformer->transform($extracted); 39 | $this->dispatch(Events::TRANSFORM, $transformed); 40 | 41 | if (null === $transformed) { 42 | continue; 43 | } 44 | 45 | $this->loader->load($transformed); 46 | $this->dispatch(Events::LOAD, $transformed); 47 | } 48 | 49 | $this->loader->flush(); 50 | 51 | $this->dispatch(Events::POST_PROCESS); 52 | } 53 | 54 | private function dispatch($name, $subject = null) 55 | { 56 | if (null === $this->eventDispatcher) { 57 | return; 58 | } 59 | 60 | $this->eventDispatcher->dispatch($name, new GenericEvent($subject)); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/Extraload/Pipeline/PipelineInterface.php: -------------------------------------------------------------------------------- 1 | callback = $callback; 12 | } 13 | 14 | public function transform($data) 15 | { 16 | return call_user_func($this->callback, $data); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/Extraload/Transformer/NoopTransformer.php: -------------------------------------------------------------------------------- 1 | transformer = $transformer; 19 | $this->propertyAccessor = $propertyAccessor; 20 | $this->path = $path; 21 | } 22 | 23 | public function transform($data) 24 | { 25 | if (null === $data) { 26 | return; 27 | } 28 | 29 | $this->propertyAccessor->setValue( 30 | $data, 31 | $this->path, 32 | $this->transformer->transform( 33 | $this->propertyAccessor->getValue($data, $this->path) 34 | ) 35 | ); 36 | 37 | return $data; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Extraload/Transformer/TransformerChain.php: -------------------------------------------------------------------------------- 1 | transformers = $transformers; 22 | } 23 | 24 | public function transform($data) 25 | { 26 | foreach ($this->transformers as $transformer) { 27 | $data = $transformer->transform($data); 28 | } 29 | 30 | return $data; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Extraload/Transformer/TransformerInterface.php: -------------------------------------------------------------------------------- 1 |