├── translations └── .gitignore ├── docker-compose.override.yml.dist ├── phpspec.yml ├── config ├── packages │ ├── routing.yaml │ ├── dev │ │ └── routing.yaml │ ├── test │ │ ├── routing.yaml │ │ └── framework.yaml │ ├── translation.yaml │ ├── doctrine_migrations.yaml │ ├── snc_redis.yaml │ ├── doctrine.yaml │ ├── prod │ │ └── doctrine.yaml │ └── framework.yaml ├── routes.yaml ├── bundles.php └── services.yaml ├── src ├── Fogger │ ├── Mask │ │ ├── Exception │ │ │ └── UnknownMaskException.php │ │ ├── MaskStrategyInterface.php │ │ ├── AbstractMask.php │ │ ├── StarifyMask.php │ │ ├── HashifyMask.php │ │ ├── MaskStrategyProvider.php │ │ ├── FakerMask.php │ │ └── AbstractCachedMask.php │ ├── Subset │ │ ├── Exception │ │ │ ├── SortByColumnRequired.php │ │ │ ├── RequiredOptionMissingException.php │ │ │ └── UnknownSubsetStrategyException.php │ │ ├── SubsetStrategyInterface.php │ │ ├── NoSubset.php │ │ ├── TailSubset.php │ │ ├── AbstractSubset.php │ │ ├── HeadSubset.php │ │ ├── SubsetStrategyProvider.php │ │ ├── RangeSubset.php │ │ └── AbstratctHeadOrTailSubset.php │ ├── Data │ │ ├── Writer │ │ │ ├── Exception │ │ │ │ └── ChunkWriterNotFound.php │ │ │ ├── ChunkWriterInterface.php │ │ │ ├── ChunkWriterProvider.php │ │ │ ├── MysqlInfileWriter.php │ │ │ └── GenericInsertWriter.php │ │ ├── ChunkMessage.php │ │ ├── ChunkReader.php │ │ ├── ChunkConsumer.php │ │ ├── ChunkError.php │ │ ├── DataCopier.php │ │ ├── SourceQuery.php │ │ ├── Masker.php │ │ ├── ChunkProducer.php │ │ └── ChunkCache.php │ ├── Recipe │ │ ├── StrategyDefinition.php │ │ ├── Recipe.php │ │ ├── Table.php │ │ ├── MaskReplicator.php │ │ ├── RecipeFactory.php │ │ └── RecipeTableFactory.php │ ├── Schema │ │ ├── RelationGroupsFactory.php │ │ ├── RelationGroups │ │ │ ├── RelationColumn.php │ │ │ ├── GrouppedRelationColumns.php │ │ │ └── RelationsGroups.php │ │ ├── ForeignKeysExtractor.php │ │ └── SchemaManipulator.php │ ├── Serializer │ │ └── TableDenormalizer.php │ └── Refine │ │ ├── RefineExecutor.php │ │ └── Refiner.php ├── Config │ ├── StrategyExtractor.php │ ├── ColumnConfigFactory.php │ ├── Model │ │ ├── ColumnConfig.php │ │ ├── Config.php │ │ └── TableConfig.php │ ├── TableConfigFactory.php │ ├── ConfigFactory.php │ ├── Serializer │ │ ├── ConfigDenormalizer.php │ │ └── TableConfigDenormalizer.php │ └── ConfigLoader.php ├── Command │ ├── InitCommand.php │ ├── ConsumerCommand.php │ ├── FinishCommand.php │ └── RunCommand.php └── Kernel.php ├── .gitignore ├── features ├── bootstrap │ ├── bootstrap.php │ ├── ConfigFileContext.php │ ├── ChunkCacheContext.php │ ├── CommandContext.php │ └── DatabaseContext.php ├── Init │ └── init.feature └── Run │ ├── Data │ └── copyUTF8.feature │ ├── Mask │ ├── starify.feature │ ├── hashify.feature │ └── faker.feature │ ├── Subset │ ├── head.fails.feature │ ├── head.feature │ ├── range.feature │ └── tail.feature │ ├── Refine │ ├── selfReferenceNotNull.feature │ ├── selfReferenceNullable.feature │ ├── notNullReference.feature │ ├── nullableReference.feature │ ├── refineExcluded.feature │ └── cascadeRefineOnlyToForeignSide.feature │ ├── finishCommand.feature │ └── Chunk │ └── queueChunks.feature ├── .env.dist ├── docker-compose.test-postgres.yml ├── docker-compose.test-mysql.yml ├── spec ├── Config │ ├── ColumnConfigFactorySpec.php │ ├── StrategyExtractorSpec.php │ ├── TableConfigFactorySpec.php │ ├── ConfigFactorySpec.php │ └── Serializer │ │ ├── ConfigDenormalizerSpec.php │ │ └── TableConfigDenormalizerSpec.php └── Fogger │ ├── Mask │ ├── StarifyMaskSpec.php │ └── HashifyMaskSpec.php │ └── Serializer │ └── TableDenormalizerSpec.php ├── behat.yml.dist ├── LICENSE ├── docker-compose.yml ├── bin └── console ├── public └── index.php ├── .travis.yml ├── Makefile ├── composer.json ├── Dockerfile ├── README.md └── symfony.lock /translations/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker-compose.override.yml.dist: -------------------------------------------------------------------------------- 1 | version: '2.0' 2 | services: {} 3 | -------------------------------------------------------------------------------- /phpspec.yml: -------------------------------------------------------------------------------- 1 | suites: 2 | fogger_suite: 3 | namespace: App 4 | psr4_prefix: App 5 | -------------------------------------------------------------------------------- /config/packages/routing.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | router: 3 | strict_requirements: ~ 4 | -------------------------------------------------------------------------------- /config/packages/dev/routing.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | router: 3 | strict_requirements: true 4 | -------------------------------------------------------------------------------- /config/packages/test/routing.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | router: 3 | strict_requirements: true 4 | -------------------------------------------------------------------------------- /config/routes.yaml: -------------------------------------------------------------------------------- 1 | #index: 2 | # path: / 3 | # controller: App\Controller\DefaultController::index 4 | -------------------------------------------------------------------------------- /config/packages/test/framework.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | test: true 3 | session: 4 | storage_id: session.storage.mock_file 5 | -------------------------------------------------------------------------------- /src/Fogger/Mask/Exception/UnknownMaskException.php: -------------------------------------------------------------------------------- 1 | symfony/framework-bundle ### 2 | /.env 3 | /public/bundles/ 4 | /var/ 5 | /vendor/ 6 | ###< symfony/framework-bundle ### 7 | 8 | ###> behat/symfony2-extension ### 9 | behat.yml 10 | ###< behat/symfony2-extension ### 11 | docker-compose.override.yml 12 | -------------------------------------------------------------------------------- /src/Fogger/Mask/MaskStrategyInterface.php: -------------------------------------------------------------------------------- 1 | getMaskName()) { 12 | return true; 13 | } 14 | 15 | return false; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /config/bundles.php: -------------------------------------------------------------------------------- 1 | ['all' => true], 5 | Doctrine\Bundle\DoctrineCacheBundle\DoctrineCacheBundle::class => ['all' => true], 6 | Doctrine\Bundle\DoctrineBundle\DoctrineBundle::class => ['all' => true], 7 | Doctrine\Bundle\MigrationsBundle\DoctrineMigrationsBundle::class => ['all' => true], 8 | Snc\RedisBundle\SncRedisBundle::class => ['all' => true], 9 | ]; 10 | -------------------------------------------------------------------------------- /src/Fogger/Mask/StarifyMask.php: -------------------------------------------------------------------------------- 1 | load(__DIR__.'/../../.env'); 11 | } 12 | -------------------------------------------------------------------------------- /src/Fogger/Subset/NoSubset.php: -------------------------------------------------------------------------------- 1 | extractor = $extractor; 15 | } 16 | 17 | public function createFromDBALColumn(DBAL\Column $dbalColumn): ColumnConfig 18 | { 19 | return $this->extractor->extract($dbalColumn->getComment()); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.env.dist: -------------------------------------------------------------------------------- 1 | ###> symfony/framework-bundle ### 2 | APP_ENV=prod 3 | APP_SECRET=1b90fda83888e1852b735fdf9d37cf40 4 | ###< symfony/framework-bundle ### 5 | 6 | ###> doctrine/doctrine-bundle ### 7 | # Please provide urls for your source and target databases 8 | SOURCE_DATABASE_URL=mysql://user:pass@source/source 9 | TARGET_DATABASE_URL=mysql://user:pass@target/target 10 | ###< doctrine/doctrine-bundle ### 11 | ###> snc/redis-bundle ### 12 | # passwords that contain special characters (@, %, :, +) must be urlencoded 13 | REDIS_URL=redis://redis 14 | ###< snc/redis-bundle ### 15 | -------------------------------------------------------------------------------- /config/packages/snc_redis.yaml: -------------------------------------------------------------------------------- 1 | snc_redis: 2 | clients: 3 | default: 4 | type: predis 5 | alias: default 6 | dsn: "%env(REDIS_URL)%" 7 | logging: false 8 | 9 | # Define your clients here. The example below connects to database 0 of the default Redis server. 10 | # 11 | # See https://github.com/snc/SncRedisBundle/blob/master/Resources/doc/index.md for instructions on 12 | # how to configure the bundle. 13 | # 14 | # default: 15 | # type: predis 16 | # alias: default 17 | # dsn: "%env(REDIS_URL)%" 18 | -------------------------------------------------------------------------------- /docker-compose.test-postgres.yml: -------------------------------------------------------------------------------- 1 | version: '2.0' 2 | services: 3 | postgres_test_source: 4 | environment: 5 | POSTGRES_DB: source 6 | POSTGRES_PASSWORD: pass 7 | POSTGRES_USER: user 8 | image: postgres 9 | postgres_test_target: 10 | environment: 11 | POSTGRES_DB: target 12 | POSTGRES_PASSWORD: pass 13 | POSTGRES_USER: user 14 | image: postgres 15 | app: 16 | environment: 17 | SOURCE_DATABASE_URL: postgresql://user:pass@postgres_test_source:5432/source 18 | TARGET_DATABASE_URL: postgresql://user:pass@postgres_test_target:5432/target 19 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkMessage.php: -------------------------------------------------------------------------------- 1 | table = $table; 18 | $this->keys = $keys; 19 | } 20 | 21 | public function getTable(): Table 22 | { 23 | return $this->table; 24 | } 25 | 26 | public function getKeys(): array 27 | { 28 | return $this->keys; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /docker-compose.test-mysql.yml: -------------------------------------------------------------------------------- 1 | version: '2.0' 2 | services: 3 | mysql_test_source: 4 | environment: 5 | MYSQL_DATABASE: source 6 | MYSQL_PASSWORD: pass 7 | MYSQL_ROOT_PASSWORD: pass 8 | MYSQL_USER: user 9 | image: mysql:5.7 10 | mysql_test_target: 11 | environment: 12 | MYSQL_DATABASE: target 13 | MYSQL_PASSWORD: pass 14 | MYSQL_ROOT_PASSWORD: pass 15 | MYSQL_USER: user 16 | image: mysql:5.7 17 | app: 18 | environment: 19 | SOURCE_DATABASE_URL: mysql://user:pass@mysql_test_source:3306/source 20 | TARGET_DATABASE_URL: mysql://user:pass@mysql_test_target:3306/target 21 | -------------------------------------------------------------------------------- /src/Config/Model/ColumnConfig.php: -------------------------------------------------------------------------------- 1 | maskStrategy = $maskStrategy; 16 | $this->options = $options; 17 | } 18 | 19 | public function getMaskStrategy(): string 20 | { 21 | return $this->maskStrategy; 22 | } 23 | 24 | public function getOptions(): array 25 | { 26 | return $this->options; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/StrategyDefinition.php: -------------------------------------------------------------------------------- 1 | name = $name; 19 | $this->options = $options; 20 | } 21 | 22 | public function getName(): string 23 | { 24 | return $this->name; 25 | } 26 | 27 | public function getOptions(): array 28 | { 29 | return $this->options; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/Recipe.php: -------------------------------------------------------------------------------- 1 | excludes = $excludes; 14 | } 15 | 16 | public function getTables(): array 17 | { 18 | return $this->tables; 19 | } 20 | 21 | public function getExcludes(): array 22 | { 23 | return $this->excludes; 24 | } 25 | 26 | public function addTable(string $name, Table $table) 27 | { 28 | $this->tables[$name] = $table; 29 | } 30 | 31 | public function getTable(string $name) 32 | { 33 | return $this->tables[$name] ?? null; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkReader.php: -------------------------------------------------------------------------------- 1 | sourceQuery = $sourceQuery; 12 | } 13 | 14 | /** 15 | * @param ChunkMessage $chunkMessage 16 | * @return array 17 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 18 | */ 19 | public function getDataChunk(ChunkMessage $chunkMessage): array 20 | { 21 | $query = $this->sourceQuery->getAllRowsQuery( 22 | $chunkMessage->getTable(), 23 | $chunkMessage->getKeys() 24 | ); 25 | 26 | return $query->execute()->fetchAll(); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /config/packages/doctrine.yaml: -------------------------------------------------------------------------------- 1 | parameters: 2 | # Adds a fallback DATABASE_URL if the env var is not set. 3 | # This allows you to run cache:warmup even if your 4 | # environment variables are not available yet. 5 | # You should not need to change this value. 6 | env(DATABASE_URL): '' 7 | 8 | doctrine: 9 | dbal: 10 | default_connection: source 11 | connections: 12 | source: 13 | url: '%env(resolve:SOURCE_DATABASE_URL)%' 14 | options: 15 | !php/const PDO::MYSQL_ATTR_USE_BUFFERED_QUERY: 0 16 | target: 17 | options: 18 | !php/const PDO::MYSQL_ATTR_LOCAL_INFILE: 1 19 | url: '%env(resolve:TARGET_DATABASE_URL)%' 20 | -------------------------------------------------------------------------------- /spec/Config/ColumnConfigFactorySpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($extractor); 16 | } 17 | 18 | function it_returns_column_config_created_by_extractor(Column $column, StrategyExtractor $extractor) 19 | { 20 | $instance = new ColumnConfig('none'); 21 | $extractor->extract(Argument::any())->willReturn($instance); 22 | 23 | $this->createFromDBALColumn($column)->shouldBe($instance); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/Fogger/Schema/RelationGroupsFactory.php: -------------------------------------------------------------------------------- 1 | sourceSchema = $connection->getSchemaManager(); 15 | } 16 | 17 | public function createFromDBAL() 18 | { 19 | $groups = new RelationsGroups(); 20 | 21 | foreach ($this->sourceSchema->listTables() as $table) { 22 | foreach ($table->getForeignKeys() as $foreignKey) { 23 | $groups->addForeignKey($foreignKey); 24 | } 25 | } 26 | 27 | return $groups; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /behat.yml.dist: -------------------------------------------------------------------------------- 1 | default: 2 | suites: 3 | default: 4 | contexts: 5 | - ConfigFileContext: 6 | - CommandContext: 7 | kernel: '@kernel' 8 | configFactory: '@App\Config\ConfigFactory' 9 | configLoader: '@App\Config\ConfigLoader' 10 | - DatabaseContext: 11 | source: '@doctrine.dbal.source_connection' 12 | target: '@doctrine.dbal.target_connection' 13 | - ChunkCacheContext: 14 | chunkCache: '@App\Fogger\Data\ChunkCache' 15 | 16 | extensions: 17 | Behat\Symfony2Extension: 18 | kernel: 19 | bootstrap: features/bootstrap/bootstrap.php 20 | class: App\Kernel 21 | -------------------------------------------------------------------------------- /src/Config/TableConfigFactory.php: -------------------------------------------------------------------------------- 1 | columnConfigFactory = $columnConfigFactory; 15 | } 16 | 17 | public function createFromDBALTable(DBAL\Table $dbalTable) 18 | { 19 | $table = new TableConfig(); 20 | foreach ($dbalTable->getColumns() as $dbalColumn) { 21 | $table->addColumn( 22 | $dbalColumn->getName(), 23 | $this->columnConfigFactory->createFromDBALColumn($dbalColumn) 24 | ); 25 | } 26 | 27 | return $table; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/Config/Model/Config.php: -------------------------------------------------------------------------------- 1 | tables; 17 | } 18 | 19 | public function addTable(string $name, TableConfig $table) 20 | { 21 | $this->tables[$name] = $table; 22 | } 23 | 24 | public function getTable($name): ?TableConfig 25 | { 26 | return $this->tables[$name] ?? null; 27 | } 28 | 29 | public function setExcludes(array $excludes) 30 | { 31 | $this->excludes = $excludes; 32 | } 33 | 34 | public function getExcludes(): array 35 | { 36 | return $this->excludes; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkConsumer.php: -------------------------------------------------------------------------------- 1 | dataCopier = $dataCopier; 20 | $this->cache = $cache; 21 | $this->error = $error; 22 | } 23 | 24 | public function execute(ChunkMessage $message) 25 | { 26 | try { 27 | $this->dataCopier->copyDataChunk($message); 28 | } catch (\Exception $exception) { 29 | $this->error->addError($exception->getMessage()); 30 | } 31 | 32 | $this->cache->increaseProcessedCount(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/Fogger/Schema/RelationGroups/RelationColumn.php: -------------------------------------------------------------------------------- 1 | table = $table; 16 | $this->columns = $columns; 17 | } 18 | 19 | public function getTable(): string 20 | { 21 | return $this->table; 22 | } 23 | 24 | public function getColumns(): array 25 | { 26 | return $this->columns; 27 | } 28 | 29 | public function getDescriptor() 30 | { 31 | return sprintf( 32 | self::DESCRIPTOR_PATTERN, 33 | $this->table, 34 | implode('|', $this->columns) 35 | ); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Fogger/Schema/RelationGroups/GrouppedRelationColumns.php: -------------------------------------------------------------------------------- 1 | contains($column)) { 12 | return; 13 | } 14 | 15 | $this->columns[$column->getDescriptor()] = $column; 16 | } 17 | 18 | public function contains(RelationColumn $column) 19 | { 20 | return $this->containsByKey($column->getDescriptor()); 21 | } 22 | 23 | public function containsByKey(string $key) 24 | { 25 | return isset($this->columns[$key]); 26 | } 27 | 28 | /** 29 | * @return RelationColumn[] 30 | */ 31 | public function getColumns(): array 32 | { 33 | return $this->columns; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkError.php: -------------------------------------------------------------------------------- 1 | redis = $redis; 16 | } 17 | 18 | public function reset(): void 19 | { 20 | $this->redis->set(self::CHUNKS_ERROR, ''); 21 | } 22 | 23 | public function addError(string $errorMessage): void 24 | { 25 | $this->redis->append(self::CHUNKS_ERROR, trim($errorMessage) . "\n"); 26 | } 27 | 28 | public function getError(): string 29 | { 30 | return (string)trim($this->redis->get(self::CHUNKS_ERROR)); 31 | } 32 | 33 | public function hasError(): bool 34 | { 35 | return $this->redis->get(self::CHUNKS_ERROR) !== ''; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Config/ConfigFactory.php: -------------------------------------------------------------------------------- 1 | sourceSchemaManager = $connection->getSchemaManager(); 17 | $this->tableConfigFactory = $tableConfigFactory; 18 | } 19 | 20 | public function createFromDBAL() 21 | { 22 | $dbalTables = $this->sourceSchemaManager->listTables(); 23 | $config = new Config(); 24 | 25 | foreach ($dbalTables as $dbalTable) { 26 | $config->addTable($dbalTable->getName(), $this->tableConfigFactory->createFromDBALTable($dbalTable)); 27 | } 28 | 29 | return $config; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/Fogger/Mask/MaskStrategyProvider.php: -------------------------------------------------------------------------------- 1 | addMask($mask); 16 | } 17 | } 18 | 19 | private function addMask(MaskStrategyInterface $mask) 20 | { 21 | $this->masks[] = $mask; 22 | } 23 | 24 | /** 25 | * @param string $name 26 | * @return MaskStrategyInterface 27 | * @throws UnknownMaskException 28 | */ 29 | public function getMask(string $name): MaskStrategyInterface 30 | { 31 | foreach ($this->masks as $mask) { 32 | if ($mask->supports($name)) { 33 | return $mask; 34 | } 35 | } 36 | 37 | throw new UnknownMaskException('Unknown mask "'.$name.'".'); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Fogger/Subset/TailSubset.php: -------------------------------------------------------------------------------- 1 | ensureOptionIsSet($table->getSubset()->getOptions(), 'length'); 20 | $this->ensureSortByColumn($table); 21 | 22 | return $queryBuilder 23 | ->andWhere(sprintf('%s >= ?', $table->getSortBy())) 24 | ->setParameter(0, $this->findOffsetId($table, true)); 25 | } 26 | 27 | public function getSubsetStrategyName(): string 28 | { 29 | return 'tail'; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /spec/Fogger/Mask/StarifyMaskSpec.php: -------------------------------------------------------------------------------- 1 | shouldHaveType(MaskStrategyInterface::class); 13 | } 14 | 15 | function it_should_supports_starify() 16 | { 17 | $this->supports('starify')->shouldBe(true); 18 | $this->supports('wrongName')->shouldBe(false); 19 | } 20 | 21 | function it_should_mask_given_value() 22 | { 23 | $this->apply('dummyValue')->shouldBe('**********'); 24 | } 25 | 26 | function it_should_allow_to_specify_how_many_character_should_be_returned() 27 | { 28 | $this->apply('dummyValue', ['length' => 2])->shouldBe('**'); 29 | } 30 | 31 | function it_should_ignore_null_value() 32 | { 33 | $this->apply(null)->shouldBe(null); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Fogger/Subset/AbstractSubset.php: -------------------------------------------------------------------------------- 1 | getSubsetStrategyName(); 14 | } 15 | 16 | /** 17 | * @param array $options 18 | * @param $option 19 | * @throws RequiredOptionMissingException 20 | */ 21 | protected function ensureOptionIsSet(array $options, $option) 22 | { 23 | if (!isset($options[$option])) { 24 | throw new RequiredOptionMissingException( 25 | sprintf( 26 | 'Strategy %s requires option "%s" to be set', 27 | $this->getSubsetStrategyName(), 28 | $option 29 | ) 30 | ); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /config/packages/prod/doctrine.yaml: -------------------------------------------------------------------------------- 1 | doctrine: 2 | orm: 3 | metadata_cache_driver: 4 | type: service 5 | id: doctrine.system_cache_provider 6 | query_cache_driver: 7 | type: service 8 | id: doctrine.system_cache_provider 9 | result_cache_driver: 10 | type: service 11 | id: doctrine.result_cache_provider 12 | 13 | services: 14 | doctrine.result_cache_provider: 15 | class: Symfony\Component\Cache\DoctrineProvider 16 | public: false 17 | arguments: 18 | - '@doctrine.result_cache_pool' 19 | doctrine.system_cache_provider: 20 | class: Symfony\Component\Cache\DoctrineProvider 21 | public: false 22 | arguments: 23 | - '@doctrine.system_cache_pool' 24 | 25 | framework: 26 | cache: 27 | pools: 28 | doctrine.result_cache_pool: 29 | adapter: cache.app 30 | doctrine.system_cache_pool: 31 | adapter: cache.system 32 | -------------------------------------------------------------------------------- /src/Config/Model/TableConfig.php: -------------------------------------------------------------------------------- 1 | columns; 16 | } 17 | 18 | public function getColumn(string $name): ?ColumnConfig 19 | { 20 | return $this->columns[$name] ?? null; 21 | } 22 | 23 | public function addColumn(string $name, ColumnConfig $column) 24 | { 25 | $this->columns[$name] = $column; 26 | } 27 | 28 | public function getSubsetStrategy(): ?string 29 | { 30 | return $this->subsetStrategy; 31 | } 32 | 33 | public function getSubsetOptions(): array 34 | { 35 | return $this->subsetOptions; 36 | } 37 | 38 | public function setSubsetStrategy(string $name, array $options) 39 | { 40 | $this->subsetStrategy = $name; 41 | $this->subsetOptions = $options; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/Fogger/Subset/HeadSubset.php: -------------------------------------------------------------------------------- 1 | ensureOptionIsSet($table->getSubset()->getOptions(), 'length'); 21 | $this->ensureSortByColumn($table); 22 | 23 | return $queryBuilder 24 | ->andWhere(sprintf('%s <= ?', $table->getSortBy())) 25 | ->setParameter(0, $this->findOffsetId($table, false)); 26 | } 27 | 28 | public function getSubsetStrategyName(): string 29 | { 30 | return 'head'; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /config/packages/framework.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | secret: '%env(APP_SECRET)%' 3 | #default_locale: en 4 | #csrf_protection: true 5 | #http_method_override: true 6 | 7 | # Enables session support. Note that the session will ONLY be started if you read or write from it. 8 | # Remove or comment this section to explicitly disable session support. 9 | session: 10 | handler_id: ~ 11 | 12 | #esi: true 13 | #fragments: true 14 | php_errors: 15 | log: true 16 | 17 | cache: 18 | # Put the unique name of your app here: the prefix seed 19 | # is used to compute stable namespaces for cache keys. 20 | #prefix_seed: your_vendor_name/app_name 21 | 22 | # The app cache caches to the filesystem by default. 23 | # Other options include: 24 | 25 | # Redis 26 | app: cache.adapter.redis 27 | default_redis_provider: redis://redis 28 | 29 | # APCu (not recommended with heavy random-write workloads as memory fragmentation can cause perf issues) 30 | #app: cache.adapter.apcu 31 | -------------------------------------------------------------------------------- /src/Fogger/Data/Writer/ChunkWriterProvider.php: -------------------------------------------------------------------------------- 1 | addWriter($writer); 16 | } 17 | } 18 | 19 | private function addWriter(ChunkWriterInterface $chunkWriter) 20 | { 21 | $this->chunkWriters[] = $chunkWriter; 22 | } 23 | 24 | /** 25 | * @return ChunkWriterInterface 26 | * @throws ChunkWriterNotFound 27 | */ 28 | public function getWriter(): ChunkWriterInterface 29 | { 30 | foreach ($this->chunkWriters as $chunkWriter) { 31 | if ($chunkWriter->isApplicable()) { 32 | return $chunkWriter; 33 | } 34 | } 35 | 36 | throw new ChunkWriterNotFound('Adapter that could write data could not be found'); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spec/Config/StrategyExtractorSpec.php: -------------------------------------------------------------------------------- 1 | extract('')->shouldBeLike(new ColumnConfig('none')); 13 | } 14 | 15 | function it_returns_column_config_with_none_strategy_for_comment_not_maching_template() 16 | { 17 | $this->extract('some comment')->shouldBeLike(new ColumnConfig('none')); 18 | } 19 | 20 | function it_returns_column_config_with_proper_strategy_for_comment_maching_template() 21 | { 22 | $this->extract('fogger::strategy')->shouldBeLike(new ColumnConfig('strategy')); 23 | } 24 | 25 | function it_returns_column_config_with_proper_strategy_for_comment_maching_template_with_options() 26 | { 27 | $this->extract('fogger::strategy{"key": "value"}') 28 | ->shouldBeLike(new ColumnConfig('strategy', ['key' => 'value'])); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/Config/Serializer/ConfigDenormalizer.php: -------------------------------------------------------------------------------- 1 | $table) { 24 | /** @var TableConfig $tableConfig */ 25 | $tableConfig = $this->denormalizer->denormalize($table, TableConfig::class, $format, $context); 26 | $config->addTable($key, $tableConfig); 27 | } 28 | $config->setExcludes($data['excludes'] ?? []); 29 | 30 | return $config; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 The Software House 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/Fogger/Subset/SubsetStrategyProvider.php: -------------------------------------------------------------------------------- 1 | addSubsetStrategy($subsetStrategy); 15 | } 16 | } 17 | 18 | private function addSubsetStrategy(SubsetStrategyInterface $subsetStrategy) 19 | { 20 | $this->subsetStrategies[] = $subsetStrategy; 21 | } 22 | 23 | /** 24 | * @param string $name 25 | * @return SubsetStrategyInterface 26 | * @throws UnknownSubsetStrategyException 27 | */ 28 | public function getSubsetStrategy(?string $name = 'noSubset'): SubsetStrategyInterface 29 | { 30 | foreach ($this->subsetStrategies as $subsetStrategy) { 31 | if ($subsetStrategy->supports($name)) { 32 | return $subsetStrategy; 33 | } 34 | } 35 | 36 | throw new UnknownSubsetStrategyException('Unknown subset strategy "'.$name.'".'); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/Config/Serializer/TableConfigDenormalizer.php: -------------------------------------------------------------------------------- 1 | $column) { 23 | /** @var ColumnConfig $columnConfig */ 24 | $columnConfig = $this->denormalizer->denormalize($column, ColumnConfig::class, $format, $context); 25 | $table->addColumn($key, $columnConfig); 26 | } 27 | if (isset($data['subsetStrategy'])) { 28 | $table->setSubsetStrategy($data['subsetStrategy'], $data['subsetOptions'] ?? []); 29 | } 30 | 31 | return $table; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.0' 2 | services: 3 | app: 4 | depends_on: 5 | - redis 6 | build: . 7 | volumes: 8 | - .:/app 9 | - ./var/:/fogger 10 | environment: 11 | SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source 12 | TARGET_DATABASE_URL: mysql://user:pass@target:3306/target 13 | RABBITMQ_URL: amqp://user:pass@rabbit:5672 14 | REDIS_URL: redis://redis 15 | worker: 16 | depends_on: 17 | - redis 18 | build: . 19 | volumes: 20 | - .:/app 21 | - ./var/:/fogger 22 | environment: 23 | SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source 24 | TARGET_DATABASE_URL: mysql://user:pass@target:3306/target 25 | RABBITMQ_URL: amqp://user:pass@rabbit:5672 26 | REDIS_URL: redis://redis 27 | restart: always 28 | command: fogger:consumer 29 | redis: 30 | image: redis:4 31 | source: 32 | environment: 33 | MYSQL_DATABASE: source 34 | MYSQL_PASSWORD: pass 35 | MYSQL_ROOT_PASSWORD: pass 36 | MYSQL_USER: user 37 | image: mysql:5.7 38 | target: 39 | environment: 40 | MYSQL_DATABASE: target 41 | MYSQL_PASSWORD: pass 42 | MYSQL_ROOT_PASSWORD: pass 43 | MYSQL_USER: user 44 | image: mysql:5.7 45 | -------------------------------------------------------------------------------- /spec/Fogger/Mask/HashifyMaskSpec.php: -------------------------------------------------------------------------------- 1 | shouldHaveType(MaskStrategyInterface::class); 15 | } 16 | 17 | function it_should_supports_hashify() 18 | { 19 | $this->supports('hashify')->shouldBe(true); 20 | $this->supports('wrongName')->shouldBe(false); 21 | } 22 | 23 | function it_should_mask_given_value() 24 | { 25 | $dummyValue = 'dummyValue'; 26 | $this->apply($dummyValue)->shouldBe(md5($dummyValue)); 27 | } 28 | 29 | function it_should_allow_to_specify_template_which_should_be_use_to_generate_response() 30 | { 31 | $template = 'template %s template'; 32 | $dummyValue = 'dummyValue'; 33 | $this->apply($dummyValue, ['template' => $template])->shouldBe(sprintf($template, md5($dummyValue))); 34 | } 35 | 36 | function it_should_ignore_null_value() 37 | { 38 | $this->apply(null)->shouldBe(null); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /features/Init/init.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to use the fogger tool 3 | As a user 4 | I want to read database schema and create boilerplate config file 5 | 6 | Scenario: It creates the boilerplate by running init command 7 | And there is a source database 8 | And there is a table table with following columns: 9 | | name | type | length | comment | 10 | | id | integer | | | 11 | | column | string | 64 | fogger::strategy | 12 | | other | string | 128 | fogger::strategy{"option": "value"} | 13 | And the file test.yaml doesn't exist 14 | When I run "init" command with input: 15 | | --file | test.yaml | 16 | Then I should see "Done!" in command's output 17 | And the command should exit with code 0 18 | And YAML file test.yaml should be like: 19 | """ 20 | tables: 21 | table: 22 | columns: 23 | id: { maskStrategy: none, options: { } } 24 | column: { maskStrategy: strategy, options: { } } 25 | other: { maskStrategy: strategy, options: { option: "value" } } 26 | subsetStrategy: null 27 | subsetOptions: { } 28 | excludes: { } 29 | 30 | """ 31 | -------------------------------------------------------------------------------- /src/Fogger/Data/DataCopier.php: -------------------------------------------------------------------------------- 1 | chunkReader = $chunkReader; 21 | $this->masker = $masker; 22 | $this->chunkWriterProvider = $chunkWriterchunkWriterProvider; 23 | } 24 | 25 | /** 26 | * @param ChunkMessage $chunkMessage 27 | * @throws \App\Fogger\Mask\Exception\UnknownMaskException 28 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 29 | * @throws Writer\Exception\ChunkWriterNotFound 30 | */ 31 | public function copyDataChunk(ChunkMessage $chunkMessage) 32 | { 33 | $data = $this->chunkReader->getDataChunk($chunkMessage); 34 | $table = $chunkMessage->getTable(); 35 | $this->chunkWriterProvider->getWriter()->insert( 36 | $table->getName(), 37 | $this->masker->applyMasks($data, $table->getMasks()) 38 | ); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/Fogger/Serializer/TableDenormalizer.php: -------------------------------------------------------------------------------- 1 | denormalizer 23 | ->denormalize($data['subset'], StrategyDefinition::class, $format, $context); 24 | 25 | $table = new Table( 26 | $data['name'], 27 | $data['chunkSize'], 28 | $data['sortBy'], 29 | $subset ?? new StrategyDefinition('noSubset') 30 | ); 31 | foreach ($data['masks'] ?? [] as $key => $mask) { 32 | $table->addMask($key, new StrategyDefinition($mask['name'], $mask['options'])); 33 | } 34 | 35 | return $table; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Config/ConfigLoader.php: -------------------------------------------------------------------------------- 1 | serializer = $serializer; 19 | } 20 | 21 | public static function forgePath(string $filename): string 22 | { 23 | return sprintf("%s/%s", self::DIRECTORY, $filename ?? self::DEFAULT_FILENAME); 24 | } 25 | 26 | public function save(Config $config, ?string $filename = null) 27 | { 28 | file_put_contents( 29 | self::forgePath($filename), 30 | $this->serializer->serialize($config, YamlEncoder::FORMAT, ['yaml_inline' => 4]) 31 | ); 32 | } 33 | 34 | public function load(string $filename): Config 35 | { 36 | /** @var Config $config */ 37 | $config = $this->serializer->deserialize( 38 | file_get_contents(self::forgePath($filename)), 39 | Config::class, 40 | YamlEncoder::FORMAT 41 | ); 42 | 43 | return $config; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Fogger/Subset/RangeSubset.php: -------------------------------------------------------------------------------- 1 | ensureOptionIsSet($options, 'column'); 17 | $this->ensureOptionIsSet($options, 'from'); 18 | $this->ensureOptionIsSet($options, 'to'); 19 | } 20 | 21 | /** 22 | * @param QueryBuilder $queryBuilder 23 | * @param Table $table 24 | * @return QueryBuilder 25 | * @throws Exception\RequiredOptionMissingException 26 | */ 27 | public function subsetQuery(QueryBuilder $queryBuilder, Table $table): QueryBuilder 28 | { 29 | $this->ensureValidOptions($options = $table->getSubset()->getOptions()); 30 | 31 | return $queryBuilder 32 | ->where(sprintf('%s >= ?', $options['column'])) 33 | ->andWhere(sprintf('%s <= ?', $options['column'])) 34 | ->setParameter(0, $options['from']) 35 | ->setParameter(1, $options['to']); 36 | } 37 | 38 | protected function getSubsetStrategyName(): string 39 | { 40 | return 'range'; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /features/bootstrap/ConfigFileContext.php: -------------------------------------------------------------------------------- 1 | getRaw())) { 30 | throw new \Exception('File content does not match the template'); 31 | } 32 | } 33 | 34 | /** 35 | * @Given the config :filename contains: 36 | */ 37 | public function theConfigTestYamlContains(string $filename, PyStringNode $string) 38 | { 39 | file_put_contents(ConfigLoader::forgePath($filename), $string->getRaw()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | load(__DIR__.'/../.env'); 23 | } 24 | 25 | $input = new ArgvInput(); 26 | $env = $input->getParameterOption(['--env', '-e'], $_SERVER['APP_ENV'] ?? 'dev', true); 27 | $debug = (bool) ($_SERVER['APP_DEBUG'] ?? ('prod' !== $env)) && !$input->hasParameterOption('--no-debug', true); 28 | 29 | if ($debug) { 30 | umask(0000); 31 | 32 | if (class_exists(Debug::class)) { 33 | Debug::enable(); 34 | } 35 | } 36 | 37 | $kernel = new Kernel($env, $debug); 38 | $application = new Application($kernel); 39 | $application->run($input); 40 | -------------------------------------------------------------------------------- /public/index.php: -------------------------------------------------------------------------------- 1 | load(__DIR__.'/../.env'); 16 | } 17 | 18 | $env = $_SERVER['APP_ENV'] ?? 'dev'; 19 | $debug = (bool) ($_SERVER['APP_DEBUG'] ?? ('prod' !== $env)); 20 | 21 | if ($debug) { 22 | umask(0000); 23 | 24 | Debug::enable(); 25 | } 26 | 27 | if ($trustedProxies = $_SERVER['TRUSTED_PROXIES'] ?? false) { 28 | Request::setTrustedProxies(explode(',', $trustedProxies), Request::HEADER_X_FORWARDED_ALL ^ Request::HEADER_X_FORWARDED_HOST); 29 | } 30 | 31 | if ($trustedHosts = $_SERVER['TRUSTED_HOSTS'] ?? false) { 32 | Request::setTrustedHosts(explode(',', $trustedHosts)); 33 | } 34 | 35 | $kernel = new Kernel($env, $debug); 36 | $request = Request::createFromGlobals(); 37 | $response = $kernel->handle($request); 38 | $response->send(); 39 | $kernel->terminate($request, $response); 40 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | env: 4 | global: 5 | - APP_ENV=test 6 | 7 | matrix: 8 | include: 9 | - php: '7.2' 10 | services: 11 | - postgresql 12 | - redis-server 13 | - rabbitmq 14 | before_script: 15 | - psql -c 'create database suorce;' -U postgres 16 | - psql -c 'create database target;' -U postgres 17 | env: 18 | - SOURCE_DATABASE_URL=pgsql://postgres@localhost/suorce 19 | - TARGET_DATABASE_URL=pgsql://postgres@localhost/target 20 | - RABBITMQ_URL=amqp://guest:guest@localhost 21 | - REDIS_URL=redis://localhost 22 | - php: '7.2' 23 | services: 24 | - mysql 25 | - redis-server 26 | - rabbitmq 27 | before_script: 28 | - mysql -e 'CREATE DATABASE suorce;' 29 | - mysql -e 'CREATE DATABASE target;' 30 | env: 31 | - SOURCE_DATABASE_URL=mysql://root@localhost/suorce 32 | - TARGET_DATABASE_URL=mysql://root@localhost/target 33 | - RABBITMQ_URL=amqp://guest:guest@localhost 34 | - REDIS_URL=redis://localhost 35 | 36 | 37 | before_install: 38 | - echo "memory_limit=-1" >> ~/.phpenv/versions/$(phpenv version-name)/etc/conf.d/travis.ini 39 | 40 | install: 41 | - composer install --no-progress --no-suggest --ansi; 42 | 43 | script: 44 | - vendor/bin/phpspec run 45 | - bin/console cache:clear 46 | - vendor/bin/behat --format=progress; 47 | -------------------------------------------------------------------------------- /src/Fogger/Mask/FakerMask.php: -------------------------------------------------------------------------------- 1 | generator = $generator; 17 | 18 | parent::__construct($cache); 19 | } 20 | 21 | public function getSubstitution(array $options = []): ?string 22 | { 23 | $method = $options['method'] ?? self::DEFAULT_METHOD; 24 | $arguments = $options['arguments'] ?? []; 25 | $modifier = $options['modifier'] ?? null; 26 | $modifierArguments = $options['modifierArguments'] ?? []; 27 | 28 | $generator = $this->generator; 29 | 30 | if ('optional' === $modifier) { 31 | $generator = $generator->optional(...$modifierArguments); 32 | } 33 | 34 | $result = $generator->$method(...$arguments); 35 | 36 | if (is_array($result)) { 37 | $result = implode(' ', $result); 38 | } elseif ($result instanceof \DateTime) { 39 | $result = $result->format("Y-m-d H:i:s"); 40 | } 41 | 42 | return $result; 43 | } 44 | 45 | protected function getMaskName(): string 46 | { 47 | return 'faker'; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /spec/Config/TableConfigFactorySpec.php: -------------------------------------------------------------------------------- 1 | beConstructedWith($columnConfigFactory); 17 | } 18 | 19 | function it_creates_table_config_object_from_dbal_schema_table_instance( 20 | Table $table, 21 | Column $schemaColumn, 22 | ColumnConfigFactory $columnConfigFactory 23 | ) { 24 | $schemaColumn->getName()->willReturn('column'); 25 | $table->getColumns()->willReturn([$schemaColumn]); 26 | 27 | $columnConfig = new ColumnConfig('none'); 28 | 29 | $columnConfigFactory->createFromDBALColumn($schemaColumn)->willReturn($columnConfig); 30 | 31 | $instance = new TableConfig(); 32 | $instance->addColumn('column', $columnConfig); 33 | 34 | $this->createFromDBALTable($table)->shouldBeLike($instance); 35 | } 36 | 37 | function it_creates_table_config_object_from_dbal_schema_table_instance_no_columns(Table $table) 38 | { 39 | 40 | $table->getColumns()->willReturn([]); 41 | $instance = new TableConfig(); 42 | 43 | $this->createFromDBALTable($table)->shouldBeLike($instance); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/Table.php: -------------------------------------------------------------------------------- 1 | name = $name; 20 | $this->sortBy = $sortBy; 21 | $this->subset = $subset; 22 | $this->chunkSize = $chunkSize; 23 | } 24 | 25 | public function getName(): string 26 | { 27 | return $this->name; 28 | } 29 | 30 | public function getSortBy(): ?string 31 | { 32 | return $this->sortBy; 33 | } 34 | 35 | public function getSubset(): StrategyDefinition 36 | { 37 | return $this->subset; 38 | } 39 | 40 | /** 41 | * @return StrategyDefinition[] 42 | */ 43 | public function getMasks(): array 44 | { 45 | return $this->masks; 46 | } 47 | 48 | public function addMask(string $column, StrategyDefinition $mask) 49 | { 50 | $this->masks[$column] = $mask; 51 | } 52 | 53 | public function getSubsetName(): string 54 | { 55 | return $this->getSubset()->getName(); 56 | } 57 | 58 | public function getChunkSize(): int 59 | { 60 | return $this->chunkSize; 61 | } 62 | 63 | public function setChunkSize(int $chunkSize): void 64 | { 65 | $this->chunkSize = $chunkSize; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /features/Run/Data/copyUTF8.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to copy masked data properly 3 | As a user 4 | I want to get correctly encoded data in target database 5 | 6 | Scenario: We want to subset the table that is referenced by other table (Not Null column) 7 | Given there is a source database 8 | And there is a table test with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | text | string | 64 | | 12 | And the table test contains following data: 13 | | id | text | 14 | | 1 | zażółć gęślą jaźń | 15 | | 2 | المملكة العربية السعودية | 16 | | 3 | 中华人民共和国 | 17 | And there is an empty target database 18 | And the task queue is empty 19 | And the config test.yaml contains: 20 | """ 21 | tables: 22 | test: ~ 23 | """ 24 | When I run "run" command with input: 25 | | --chunk-size | 1000 | 26 | | --file | test.yaml | 27 | | --dont-wait | true | 28 | And worker processes 1 task 29 | And I run "finish" command with input: 30 | | --file | test.yaml | 31 | Then the command should exit with code 0 32 | And the table test in target database should have 3 rows 33 | And the table test in target database should contain rows: 34 | | id | text | 35 | | 1 | zażółć gęślą jaźń | 36 | | 2 | المملكة العربية السعودية | 37 | | 3 | 中华人民共和国 | 38 | -------------------------------------------------------------------------------- /features/Run/Mask/starify.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to comply with the General Data Protection Regulation (EU GDPR) 3 | As a user 4 | I want to obfuscate (mask) data while moving them to the target database 5 | 6 | Scenario: We want to mask the email column with starify 7 | Given there is a source database 8 | And there is a table products with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | product | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table products contains following data: 14 | | id | product | desc | 15 | | 4 | product 4 | desc 4 | 16 | | 3 | product 3 | desc 3 | 17 | | 1 | product 1 | desc 1 | 18 | | 2 | product 2 | desc 2 | 19 | And there is an empty target database 20 | And the task queue is empty 21 | And the config test.yaml contains: 22 | """ 23 | tables: 24 | products: 25 | columns: 26 | product: { maskStrategy: "starify" } 27 | """ 28 | When I run "run" command with input: 29 | | --chunk-size | 1000 | 30 | | --file | test.yaml | 31 | | --dont-wait | true | 32 | And worker processes 1 task 33 | Then the table products in target database should have 4 rows 34 | And the table products in target database should contain rows: 35 | | id | product | 36 | | 1 | ********** | 37 | | 2 | ********** | 38 | | 3 | ********** | 39 | | 4 | ********** | 40 | -------------------------------------------------------------------------------- /src/Fogger/Schema/ForeignKeysExtractor.php: -------------------------------------------------------------------------------- 1 | source = $source->getSchemaManager(); 15 | } 16 | 17 | public function findForeignKeysReferencingTable(string $tableName): array 18 | { 19 | $foreignKeys = []; 20 | /** @var Schema\Table $table */ 21 | foreach ($this->source->listTables() as $table) { 22 | /** @var Schema\ForeignKeyConstraint $foreignKeyConstraint */ 23 | foreach ($table->getForeignKeys() as $foreignKeyConstraint) { 24 | if ($foreignKeyConstraint->getForeignTableName() === $tableName) { 25 | $foreignKeys[] = $foreignKeyConstraint; 26 | } 27 | } 28 | } 29 | 30 | return $foreignKeys; 31 | } 32 | 33 | /** 34 | * @param Schema\ForeignKeyConstraint $foreignKey 35 | * @return bool 36 | * @throws Schema\SchemaException 37 | */ 38 | public function isLocalColumnNullable(Schema\ForeignKeyConstraint $foreignKey): bool 39 | { 40 | $localColumn = $foreignKey->getLocalColumns()[0]; 41 | $localTable = $foreignKey->getLocalTableName(); 42 | $table = $this->source->listTableDetails($localTable); 43 | $column = $table->getColumn($localColumn); 44 | 45 | return !$column->getNotnull(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /spec/Config/ConfigFactorySpec.php: -------------------------------------------------------------------------------- 1 | getSchemaManager()->willReturn($schemaManager); 18 | $this->beConstructedWith($connection, $tableConfigFactory); 19 | } 20 | 21 | function it_creates_config_with_tables_fetched_from_DBAL_schema( 22 | Table $table, 23 | TableConfigFactory $tableConfigFactory, 24 | AbstractSchemaManager $schemaManager 25 | ) { 26 | $config = new Config(); 27 | $tableConfig = new TableConfig(); 28 | $config->addTable('table', $tableConfig); 29 | 30 | $schemaManager->listTables()->willReturn([$table]); 31 | $table->getName()->willReturn('table'); 32 | $tableConfigFactory->createFromDBALTable($table)->willReturn($tableConfig); 33 | 34 | $this->createFromDBAL()->shouldBeLike($config); 35 | } 36 | 37 | function it_created_empty_config_when_there_are_no_tables_in_schema(AbstractSchemaManager $schemaManager) 38 | { 39 | $config = new Config(); 40 | 41 | $schemaManager->listTables()->willReturn([]); 42 | 43 | $this->createFromDBAL()->shouldBeLike($config); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/MaskReplicator.php: -------------------------------------------------------------------------------- 1 | relationGroups = $relationGroupsFactory->createFromDBAL(); 16 | } 17 | 18 | private function replicateMask( 19 | GrouppedRelationColumns $group, 20 | StrategyDefinition $mask, 21 | Recipe $recipe 22 | ) { 23 | /** @var RelationColumn $relationColumn */ 24 | foreach ($group->getColumns() as $relationColumn) { 25 | $table = $recipe->getTable($relationColumn->getTable()); 26 | $table->addMask(implode('|', $relationColumn->getColumns()), $mask); 27 | } 28 | } 29 | 30 | private function replicateMasksInTable(Table $table, Recipe $recipe) 31 | { 32 | foreach ($table->getMasks() as $column => $mask) { 33 | if (null === $group = $this->relationGroups 34 | ->getGroupByTableAndColumn($table->getName(), $column)) { 35 | continue; 36 | } 37 | $this->replicateMask($group, $mask, $recipe); 38 | } 39 | } 40 | 41 | public function replicateMasksToRelatedColumns(Recipe $recipe) 42 | { 43 | foreach ($recipe->getTables() as $table) { 44 | $this->replicateMasksInTable($table, $recipe); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/Fogger/Subset/AbstratctHeadOrTailSubset.php: -------------------------------------------------------------------------------- 1 | source = $source; 17 | } 18 | 19 | /** 20 | * @param Table $table 21 | * @throws SortByColumnRequired 22 | */ 23 | protected function ensureSortByColumn(Table $table) 24 | { 25 | if (null === $table->getSortBy()) { 26 | throw new SortByColumnRequired( 27 | sprintf( 28 | 'Error! Strategy require the table to have a unique sortBy column', 29 | $table->getName() 30 | ) 31 | ); 32 | } 33 | } 34 | 35 | protected function findOffsetId(Table $table, bool $reverse) 36 | { 37 | $options = $table->getSubset()->getOptions(); 38 | 39 | $findOffsetId = $this->source->createQueryBuilder(); 40 | $findOffsetId 41 | ->select($this->source->quoteIdentifier($table->getSortBy())) 42 | ->from($this->source->quoteIdentifier($table->getName())) 43 | ->addOrderBy($table->getSortBy(), $reverse ? Criteria::DESC : Criteria::ASC) 44 | ->setFirstResult($options['length'] - 1) 45 | ->setMaxResults(1); 46 | 47 | return $findOffsetId->execute()->fetchColumn(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /features/bootstrap/ChunkCacheContext.php: -------------------------------------------------------------------------------- 1 | chunkCache = $chunkCache; 13 | } 14 | 15 | /** 16 | * @param int $value 17 | * @param int $expected 18 | * @throws Exception 19 | */ 20 | private function assertCountEquals(int $value, int $expected) 21 | { 22 | if ($value === $expected) { 23 | return; 24 | } 25 | 26 | throw new \Exception( 27 | sprintf( 28 | 'Counter equals %d, %d expected', 29 | $value, 30 | $expected 31 | ) 32 | ); 33 | } 34 | 35 | /** 36 | * @Then published tasks counter should equal :expected 37 | * @param $expected 38 | * @throws Exception 39 | */ 40 | public function publishedTasksCounterShouldEqual(int $expected) 41 | { 42 | $this->assertCountEquals($this->chunkCache->getPublishedCount(), $expected); 43 | } 44 | 45 | /** 46 | * @Then processed tasks counter should equal :expected 47 | * @param $expected 48 | * @throws Exception 49 | */ 50 | public function processedTasksCounterShouldEqual(int $expected) 51 | { 52 | $this->assertCountEquals($this->chunkCache->getProcessedCount(), $expected); 53 | } 54 | 55 | /** 56 | * @Given the task queue is empty 57 | */ 58 | public function theTaskQueueIsEmpty() 59 | { 60 | $this->chunkCache->reset(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_COMPOSE = docker-compose -f docker-compose.yml -f docker-compose.override.yml 2 | DOCKER_COMPOSE_TEST_MYSQL = ${DOCKER_COMPOSE} -f docker-compose.test-mysql.yml 3 | DOCKER_COMPOSE_TEST_POSTGRES = ${DOCKER_COMPOSE} -f docker-compose.test-postgres.yml 4 | 5 | # --- docker 6 | .PHONY: pull 7 | pull: 8 | ${DOCKER_COMPOSE} pull 9 | 10 | .PHONY: install 11 | install: 12 | ${DOCKER_COMPOSE} run --rm --entrypoint="composer" app install 13 | 14 | .PHONY: start 15 | start: 16 | ${DOCKER_COMPOSE} up -d 17 | echo "waiting for services to start..." 18 | sleep 16 19 | 20 | .PHONY: stop 21 | stop: 22 | ${DOCKER_COMPOSE} stop 23 | 24 | # --- test 25 | .PHONY: test 26 | test: 27 | make test-mysql 28 | make test-postgres 29 | 30 | .PHONY: test-mysql 31 | test-mysql: 32 | ${DOCKER_COMPOSE_TEST_MYSQL} up -d --scale=worker=0 33 | sleep 16 34 | ${DOCKER_COMPOSE_TEST_MYSQL} run --rm --entrypoint="php" app vendor/bin/behat --format=progress 35 | ${DOCKER_COMPOSE_TEST_MYSQL} run --rm --entrypoint="php" app vendor/bin/phpspec run 36 | ${DOCKER_COMPOSE} up -d --remove-orphans 37 | 38 | .PHONY: test-postgres 39 | test-postgres: 40 | ${DOCKER_COMPOSE_TEST_POSTGRES} up -d --scale=worker=0 41 | sleep 16 42 | ${DOCKER_COMPOSE_TEST_POSTGRES} run --rm --entrypoint="php" app vendor/bin/behat --format=progress 43 | ${DOCKER_COMPOSE_TEST_POSTGRES} run --rm --entrypoint="php" app vendor/bin/phpspec run 44 | ${DOCKER_COMPOSE} up -d --remove-orphans 45 | 46 | # --- fogger 47 | .PHONY: init 48 | init: 49 | ${DOCKER_COMPOSE} run --rm app fogger:init 50 | 51 | .PHONY: run 52 | run: 53 | ${DOCKER_COMPOSE} run --rm app fogger:run 54 | 55 | .PHONY: finish 56 | finish: 57 | ${DOCKER_COMPOSE} run --rm app fogger:finish 58 | -------------------------------------------------------------------------------- /spec/Config/Serializer/ConfigDenormalizerSpec.php: -------------------------------------------------------------------------------- 1 | setDenormalizer($denormalizer); 16 | } 17 | 18 | function it_supports_only_denormalization_of_config_class() 19 | { 20 | $this->supportsDenormalization(Argument::any(), Config::class)->shouldReturn(true); 21 | $this->supportsDenormalization(Argument::any(), 'WrongClassName')->shouldReturn(false); 22 | } 23 | 24 | function it_denormalizes_empty_config() 25 | { 26 | $data = ['tables' => []]; 27 | $config = new Config(); 28 | 29 | $this->denormalize($data, Config::class)->shouldBeLike($config); 30 | } 31 | 32 | function it_denormalizes_config_with_tables(DenormalizerInterface $denormalizer) 33 | { 34 | $data = [ 35 | 'tables' => [ 36 | 'table' => [], 37 | ], 38 | ]; 39 | 40 | $config = new Config(); 41 | foreach ($data['tables'] as $key => $table) { 42 | $tableConfig = new TableConfig(); 43 | $config->addTable($key, $tableConfig); 44 | $denormalizer->denormalize($table, TableConfig::class, Argument::any(), Argument::any()) 45 | ->willReturn($tableConfig); 46 | } 47 | 48 | $this->denormalize($data, TableConfig::class)->shouldBeLike($config); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /features/Run/Mask/hashify.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to comply with the General Data Protection Regulation (EU GDPR) 3 | As a user 4 | I want to obfuscate (mask) data while moving them to the target database 5 | 6 | Scenario: We want to mask the email column with hashify strategy 7 | Given there is a source database 8 | And there is a table users with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | email | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table users contains following data: 14 | | id | email | desc | 15 | | 4 | ex4@tsh.io | desc 4 | 16 | | 3 | ex3@tsh.io | desc 3 | 17 | | 1 | ex1@tsh.io | desc 1 | 18 | | 2 | ex2@tsh.io | desc 2 | 19 | And there is an empty target database 20 | And the task queue is empty 21 | And the config test.yaml contains: 22 | """ 23 | tables: 24 | users: 25 | columns: 26 | email: { maskStrategy: "hashify", options: { template: "%s@example.com" } } 27 | """ 28 | When I run "run" command with input: 29 | | --chunk-size | 1000 | 30 | | --file | test.yaml | 31 | | --dont-wait | true | 32 | And worker processes 1 task 33 | Then the table users in target database should have 4 rows 34 | And the table users in target database should contain rows: 35 | | id | email | 36 | | 4 | 8f2be31e6c018f01a296b94152288d00@example.com | 37 | | 2 | 8213ac5cd9213797788de22b486b1f27@example.com | 38 | | 1 | 0d49c52d05ad00c9f002d94ddbe635fb@example.com | 39 | | 3 | 89f1d876fb477e78c603aaf45e7427fc@example.com | 40 | -------------------------------------------------------------------------------- /features/Run/Subset/head.fails.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to make the resulting database smaller 3 | As a user 4 | I want to be able to subset selected tables 5 | 6 | Background: 7 | Given there is a source database 8 | And there is a table posts with following columns: 9 | | name | type | length | index | 10 | | id | integer | | | 11 | | title | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table posts contains following data: 14 | | id | title | desc | 15 | | 1 | title 1 | desc 1 | 16 | And there is an empty target database 17 | And the task queue is empty 18 | 19 | Scenario: We cannot apply head subset to table without primary key 20 | Given the config test.yaml contains: 21 | """ 22 | tables: 23 | posts: 24 | subsetStrategy: head 25 | subsetOptions: { length: 2 } 26 | """ 27 | When I run "run" command with input: 28 | | --chunk-size | 1000 | 29 | | --file | test.yaml | 30 | | --dont-wait | true | 31 | Then I should see "Error! Strategy require the table to have a unique sortBy column" in command's output 32 | And the command should exit with code "-1" 33 | 34 | Scenario: Startegy requires option length to be provided 35 | Given the config test.yaml contains: 36 | """ 37 | tables: 38 | posts: 39 | subsetStrategy: head 40 | subsetOptions: { } 41 | """ 42 | When I run "run" command with input: 43 | | --chunk-size | 1000 | 44 | | --file | test.yaml | 45 | | --dont-wait | true | 46 | Then I should see 'requires option "length" to be set' in command's output 47 | And the command should exit with code "-1" 48 | -------------------------------------------------------------------------------- /features/Run/Refine/selfReferenceNotNull.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to subset the table that has foreign key relations 3 | As a user 4 | I want to get obfuscated, subsetted but still consistent database. 5 | 6 | Scenario: We want to refine table that references itself - column is NotNull 7 | Given there is a source database 8 | And there is a table users with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | email | string | 64 | unique | | 12 | | supervisor | string | 64 | | | 13 | And the table users contains following data: 14 | | id | email | supervisor | 15 | | 1 | ex1@tsh.io | ex2@tsh.io | 16 | | 2 | ex2@tsh.io | ex3@tsh.io | 17 | | 3 | ex3@tsh.io | ex2@tsh.io | 18 | | 4 | ex4@tsh.io | ex1@tsh.io | 19 | And the users.supervisor references users.email 20 | And there is an empty target database 21 | And the task queue is empty 22 | And the config test.yaml contains: 23 | """ 24 | tables: 25 | users: 26 | subsetStrategy: tail 27 | subsetOptions: { length: 3 } 28 | """ 29 | When I run "run" command with input: 30 | | --chunk-size | 1000 | 31 | | --file | test.yaml | 32 | | --dont-wait | true | 33 | And worker processes 1 task 34 | And I run "finish" command with input: 35 | | --file | test.yaml | 36 | Then the command should exit with code 0 37 | And the table users in target database should have 2 rows 38 | And the table users in target database should contain rows: 39 | | id | email | supervisor | 40 | | 2 | ex2@tsh.io | ex3@tsh.io | 41 | | 3 | ex3@tsh.io | ex2@tsh.io | 42 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/RecipeFactory.php: -------------------------------------------------------------------------------- 1 | configLoader = $configLoader; 26 | $this->sourceSchema = $connection->getSchemaManager(); 27 | $this->recipeTableFactory = $recipeTableFactory; 28 | $this->maskReplicator = $maskReplicator; 29 | } 30 | 31 | /** 32 | * @param string $configFilename 33 | * @param int $chunkSize 34 | * @return Recipe 35 | * @throws \Doctrine\DBAL\DBALException 36 | */ 37 | public function createRecipe(string $configFilename, int $chunkSize = ChunkMessage::DEFAULT_CHUNK_SIZE) 38 | { 39 | $config = $this->configLoader->load($configFilename); 40 | $recipe = new Recipe($config->getExcludes()); 41 | 42 | foreach ($this->sourceSchema->listTables() as $dbalTable) { 43 | $tableName = $dbalTable->getName(); 44 | if (!in_array($tableName, $config->getExcludes())) { 45 | $recipe->addTable( 46 | $tableName, 47 | $this->recipeTableFactory->createRecipeTable($dbalTable, $chunkSize, $config->getTable($tableName)) 48 | ); 49 | } 50 | } 51 | $this->maskReplicator->replicateMasksToRelatedColumns($recipe); 52 | 53 | return $recipe; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /features/Run/Refine/selfReferenceNullable.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to subset the table that has foreign key relations 3 | As a user 4 | I want to get obfuscated, subsetted but still consistent database. 5 | 6 | Scenario: We want to refine table that references itself - column is nullable 7 | Given there is a source database 8 | And there is a table users with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | email | string | 64 | unique | | 12 | | supervisor | string | 64 | | true | 13 | And the table users contains following data: 14 | | id | email | supervisor | 15 | | 1 | ex1@tsh.io | ex2@tsh.io | 16 | | 2 | ex2@tsh.io | ex3@tsh.io | 17 | | 3 | ex3@tsh.io | ex2@tsh.io | 18 | | 4 | ex4@tsh.io | ex1@tsh.io | 19 | And the users.supervisor references users.email 20 | And there is an empty target database 21 | And the task queue is empty 22 | And the config test.yaml contains: 23 | """ 24 | tables: 25 | users: 26 | subsetStrategy: tail 27 | subsetOptions: { length: 3 } 28 | """ 29 | When I run "run" command with input: 30 | | --chunk-size | 1000 | 31 | | --file | test.yaml | 32 | | --dont-wait | true | 33 | And worker processes 1 task 34 | And I run "finish" command with input: 35 | | --file | test.yaml | 36 | Then the command should exit with code 0 37 | And the table users in target database should have 3 rows 38 | And the table users in target database should contain rows: 39 | | id | email | supervisor | 40 | | 2 | ex2@tsh.io | ex3@tsh.io | 41 | | 3 | ex3@tsh.io | ex2@tsh.io | 42 | | 4 | ex4@tsh.io | | 43 | -------------------------------------------------------------------------------- /src/Command/InitCommand.php: -------------------------------------------------------------------------------- 1 | configFactory = $configFactory; 21 | $this->configLoader = $configLoader; 22 | 23 | parent::__construct(); 24 | } 25 | 26 | protected function configure() 27 | { 28 | $this 29 | ->setName('fogger:init') 30 | ->addOption( 31 | 'file', 32 | 'f', 33 | InputOption::VALUE_REQUIRED, 34 | 'Where should the command save the config file. Defaults to fogger.yaml in root folder.', 35 | ConfigLoader::DEFAULT_FILENAME 36 | ) 37 | ->setDescription('Creates configuration boilerplate base on the source DB schema'); 38 | } 39 | 40 | protected function execute(InputInterface $input, OutputInterface $output) 41 | { 42 | $output->writeln('Fogger init.'); 43 | $filename = $input->getOption('file'); 44 | 45 | try { 46 | $this->configLoader->save($this->configFactory->createFromDBAL(), $filename); 47 | } catch (\Exception $exception) { 48 | $output->writeln('There has been an error: '.$exception->getMessage()); 49 | 50 | return -1; 51 | } 52 | 53 | $output->writeln('Done! Config boilerplate saved to '.$filename); 54 | 55 | return 0; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/Fogger/Data/SourceQuery.php: -------------------------------------------------------------------------------- 1 | source = $source; 19 | $this->provider = $provider; 20 | } 21 | 22 | /** 23 | * @param Table $table 24 | * @return QueryBuilder 25 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 26 | */ 27 | public function getAllKeysQuery(Table $table) 28 | { 29 | $query = $this->getAllRowsQuery($table); 30 | $query 31 | ->resetQueryPart('select') 32 | ->select($this->source->quoteIdentifier($table->getSortBy())); 33 | 34 | return $query; 35 | } 36 | 37 | /** 38 | * @param Table $table 39 | * @param array $keys 40 | * @return QueryBuilder 41 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 42 | */ 43 | public function getAllRowsQuery(Table $table, array $keys = []): QueryBuilder 44 | { 45 | $query = $this->source->createQueryBuilder(); 46 | $query 47 | ->select('*') 48 | ->from($this->source->quoteIdentifier($table->getName())); 49 | 50 | if (count($keys)) { 51 | $query 52 | ->where($query->expr()->in($table->getSortBy(), ':keys')) 53 | ->setParameter('keys', $keys, Connection::PARAM_STR_ARRAY); 54 | 55 | return $query; 56 | } 57 | $subset = $this->provider->getSubsetStrategy($table->getSubsetName()); 58 | 59 | return $subset->subsetQuery($query, $table); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/Fogger/Data/Masker.php: -------------------------------------------------------------------------------- 1 | maskStrategyProvider = $maskStrategyProvider; 15 | } 16 | 17 | /** 18 | * @param array $data 19 | * @param array $masks 20 | * @return array 21 | * @throws \App\Fogger\Mask\Exception\UnknownMaskException 22 | */ 23 | public function applyMasks(array $data, array $masks): array 24 | { 25 | foreach ($masks as $column => $definition) { 26 | $data = $this->maskData($data, $column, $definition); 27 | } 28 | 29 | return $data; 30 | } 31 | 32 | /** 33 | * @param array $data 34 | * @param string $column 35 | * @param StrategyDefinition $definition 36 | * @return array 37 | * @throws \App\Fogger\Mask\Exception\UnknownMaskException 38 | */ 39 | private function maskData(array $data, string $column, StrategyDefinition $definition): array 40 | { 41 | foreach ($data as $key => $row) { 42 | $data[$key] = $this->maskRow($row, $column, $definition); 43 | } 44 | 45 | return $data; 46 | } 47 | 48 | /** 49 | * @param array $row 50 | * @param string $column 51 | * @param StrategyDefinition $definition 52 | * @return array 53 | * @throws \App\Fogger\Mask\Exception\UnknownMaskException 54 | */ 55 | private function maskRow(array $row, string $column, StrategyDefinition $definition): array 56 | { 57 | $row[$column] = $this->maskStrategyProvider->getMask($definition->getName()) 58 | ->apply($row[$column], $definition->getOptions()); 59 | 60 | return $row; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkProducer.php: -------------------------------------------------------------------------------- 1 | sourceQuery = $sourceQuery; 22 | $this->chunkCache = $chunkCache; 23 | $this->chunkError = $chunkError; 24 | } 25 | 26 | /** 27 | * @param Table $table 28 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 29 | */ 30 | private function queueTableChunks(Table $table) 31 | { 32 | 33 | if (null === $table->getSortBy()) { 34 | $this->sourceQuery->getAllKeysQuery($table); 35 | $this->chunkCache->pushMessage($table); 36 | 37 | return; 38 | } 39 | 40 | $result = $this->sourceQuery->getAllKeysQuery($table)->execute(); 41 | 42 | $counter = 0; 43 | $keys = []; 44 | 45 | while ($key = $result->fetchColumn()) { 46 | $keys[] = $key; 47 | $counter++; 48 | if (0 === $counter % $table->getChunkSize()) { 49 | $this->chunkCache->pushMessage($table, $keys); 50 | $keys = []; 51 | } 52 | } 53 | if (0 !== $counter % $table->getChunkSize()) { 54 | $this->chunkCache->pushMessage($table, $keys); 55 | } 56 | } 57 | 58 | /** 59 | * @param Recipe $recipe 60 | * @throws \App\Fogger\Subset\Exception\UnknownSubsetStrategyException 61 | */ 62 | public function run(Recipe $recipe) 63 | { 64 | $this->chunkCache->reset(); 65 | $this->chunkError->reset(); 66 | foreach ($recipe->getTables() as $table) { 67 | $this->queueTableChunks($table); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/Fogger/Schema/RelationGroups/RelationsGroups.php: -------------------------------------------------------------------------------- 1 | groups as $group) { 15 | if ($group->contains($column)) { 16 | return $group; 17 | } 18 | } 19 | 20 | return null; 21 | } 22 | 23 | private function newGroup() 24 | { 25 | $group = new GrouppedRelationColumns(); 26 | $this->groups[] = $group; 27 | 28 | return $group; 29 | } 30 | 31 | public function addForeignKey(ForeignKeyConstraint $foreignKeyConstraint) 32 | { 33 | $local = new RelationColumn( 34 | $foreignKeyConstraint->getLocalTableName(), 35 | $foreignKeyConstraint->getLocalColumns() 36 | ); 37 | 38 | $foreign = new RelationColumn( 39 | $foreignKeyConstraint->getForeignTableName(), 40 | $foreignKeyConstraint->getForeignColumns() 41 | ); 42 | 43 | $g1 = $this->getGroupContainingColumn($local); 44 | $g2 = $this->getGroupContainingColumn($foreign); 45 | 46 | $group = $g1 ?? $g2 ?? $this->newGroup(); 47 | $group->addRelationColumn($local); 48 | $group->addRelationColumn($foreign); 49 | } 50 | 51 | public function getGroupByTableAndColumn(string $table, string $column): ?GrouppedRelationColumns 52 | { 53 | foreach ($this->groups as $group) { 54 | if ($group->containsByKey( 55 | sprintf 56 | ( 57 | RelationColumn::DESCRIPTOR_PATTERN, 58 | $table, 59 | $column 60 | ) 61 | )) { 62 | return $group; 63 | } 64 | } 65 | 66 | return null; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "thesoftwarehouse/fogger", 3 | "description": "GDPR friendly database masker", 4 | "type": "project", 5 | "license": "MIT", 6 | "require": { 7 | "php": "^7.1.3", 8 | "ext-ctype": "*", 9 | "ext-iconv": "*", 10 | "ext-json": "*", 11 | "fzaninotto/faker": "^1.8", 12 | "predis/predis": "^1.1", 13 | "snc/redis-bundle": "^2.1", 14 | "symfony/console": "*", 15 | "symfony/flex": "^1.1", 16 | "symfony/framework-bundle": "*", 17 | "symfony/orm-pack": "^1.0", 18 | "symfony/serializer-pack": "^1.0", 19 | "symfony/yaml": "*" 20 | }, 21 | "require-dev": { 22 | "behat/symfony2-extension": "^2.1", 23 | "phpspec/phpspec": "^5.0", 24 | "symfony/dotenv": "*", 25 | "symfony/var-dumper": "*" 26 | }, 27 | "config": { 28 | "preferred-install": { 29 | "*": "dist" 30 | }, 31 | "sort-packages": true 32 | }, 33 | "autoload": { 34 | "psr-4": { 35 | "App\\": "src/" 36 | } 37 | }, 38 | "autoload-dev": { 39 | "psr-4": { 40 | "App\\Tests\\": "tests/" 41 | } 42 | }, 43 | "replace": { 44 | "paragonie/random_compat": "*", 45 | "symfony/polyfill-ctype": "*", 46 | "symfony/polyfill-iconv": "*", 47 | "symfony/polyfill-php71": "*", 48 | "symfony/polyfill-php70": "*", 49 | "symfony/polyfill-php56": "*" 50 | }, 51 | "scripts": { 52 | "auto-scripts": { 53 | "cache:clear": "symfony-cmd", 54 | "assets:install %PUBLIC_DIR%": "symfony-cmd" 55 | }, 56 | "post-install-cmd": [ 57 | "@auto-scripts" 58 | ], 59 | "post-update-cmd": [ 60 | "@auto-scripts" 61 | ] 62 | }, 63 | "conflict": { 64 | "symfony/symfony": "*" 65 | }, 66 | "extra": { 67 | "symfony": { 68 | "allow-contrib": false, 69 | "require": "4.1.*" 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/Fogger/Data/ChunkCache.php: -------------------------------------------------------------------------------- 1 | redis = $redis; 22 | $this->serializer = $serializer; 23 | } 24 | 25 | public function reset() 26 | { 27 | $this->redis->del( 28 | [ 29 | self::CHUNKS_PUBLISHED, 30 | self::CHUNKS_PROCESSED, 31 | self::LIST_NAME, 32 | ] 33 | ); 34 | } 35 | 36 | public function pushMessage(Table $table, array $keys = []) 37 | { 38 | $message = $this->serializer->serialize(new ChunkMessage($table, $keys), 'json'); 39 | $this->redis->rpush(self::LIST_NAME, [$message]); 40 | $this->increasePublishedCount(); 41 | } 42 | 43 | public function popMessage() 44 | { 45 | if (null === $content = $this->redis->lpop(self::LIST_NAME)) { 46 | return null; 47 | } 48 | 49 | return $this->serializer->deserialize( 50 | $content, 51 | ChunkMessage::class, 52 | 'json' 53 | ); 54 | } 55 | 56 | public function increasePublishedCount() 57 | { 58 | $this->redis->incr(self::CHUNKS_PUBLISHED); 59 | } 60 | 61 | public function increaseProcessedCount() 62 | { 63 | $this->redis->incr(self::CHUNKS_PROCESSED); 64 | } 65 | 66 | public function getPublishedCount(): int 67 | { 68 | return $this->redis->get(self::CHUNKS_PUBLISHED) ?? 0; 69 | } 70 | 71 | public function getProcessedCount(): int 72 | { 73 | return $this->redis->get(self::CHUNKS_PROCESSED) ?? 0; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/Command/ConsumerCommand.php: -------------------------------------------------------------------------------- 1 | chunkCache = $chunkCache; 25 | $this->chunkConsumer = $chunkConsumer; 26 | 27 | parent::__construct(); 28 | } 29 | 30 | protected function configure() 31 | { 32 | $this 33 | ->setName('fogger:consumer') 34 | ->addOption( 35 | 'file', 36 | 'f', 37 | InputOption::VALUE_REQUIRED, 38 | 'Where should the command look for a config file. Defaults to fogger.yaml in root folder.', 39 | ConfigLoader::DEFAULT_FILENAME 40 | ) 41 | ->addOption( 42 | 'messages', 43 | 'm', 44 | InputOption::VALUE_REQUIRED, 45 | 'How many messages to process.', 46 | 200 47 | ) 48 | ->setDescription('Consumes a message'); 49 | } 50 | 51 | protected function execute(InputInterface $input, OutputInterface $output) 52 | { 53 | for ($i = 0; $i < $input->getOption('messages'); $i++) { 54 | 55 | /** @var ChunkMessage $message */ 56 | $message = $this->chunkCache->popMessage(); 57 | 58 | if ($message instanceof ChunkMessage) { 59 | $this->chunkConsumer->execute($message); 60 | } else { 61 | echo('.'); 62 | usleep(500000); 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM php:7.2.3 2 | 3 | # Install PHP extensions deps 4 | RUN apt-get update \ 5 | && apt-get install --no-install-recommends -y \ 6 | libpq-dev \ 7 | libzip-dev \ 8 | libpng-dev \ 9 | openssh-server \ 10 | libxrender1 \ 11 | libfontconfig1 \ 12 | libxext6 \ 13 | libfreetype6-dev \ 14 | libjpeg62-turbo-dev \ 15 | zlib1g-dev \ 16 | libicu-dev \ 17 | g++ \ 18 | unixodbc-dev \ 19 | libxml2-dev \ 20 | libaio-dev \ 21 | libmemcached-dev \ 22 | freetds-dev \ 23 | libssl-dev \ 24 | openssl \ 25 | nano \ 26 | wget \ 27 | curl 28 | 29 | RUN apt-get install -y curl gnupg2 apt-transport-https && \ 30 | curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ 31 | curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ 32 | apt-get -y update && \ 33 | export ACCEPT_EULA=Y && apt-get -y install msodbcsql17 mssql-tools 34 | 35 | RUN docker-php-ext-configure gd --with-freetype-dir=/usr/include/ --with-jpeg-dir=/usr/include/ \ 36 | && docker-php-ext-configure pdo_dblib --with-libdir=/lib/x86_64-linux-gnu \ 37 | && pecl install sqlsrv \ 38 | && pecl install pdo_sqlsrv \ 39 | && docker-php-ext-install \ 40 | iconv \ 41 | mbstring \ 42 | bcmath \ 43 | intl \ 44 | gd \ 45 | mysqli \ 46 | pdo_mysql \ 47 | pdo_pgsql \ 48 | pdo_dblib \ 49 | soap \ 50 | sockets \ 51 | zip \ 52 | pcntl \ 53 | ftp \ 54 | && docker-php-ext-enable \ 55 | sqlsrv \ 56 | pdo_sqlsrv 57 | 58 | # Composer 59 | RUN wget https://getcomposer.org/composer.phar && mv composer.phar /usr/bin/composer && chmod +x /usr/bin/composer 60 | 61 | 62 | RUN mkdir /fogger && chmod 777 /fogger 63 | COPY . /app 64 | WORKDIR /app 65 | 66 | #RUN composer install --no-dev 67 | RUN composer install 68 | 69 | ENTRYPOINT ["php", "bin/console"] 70 | CMD ["--help"] 71 | -------------------------------------------------------------------------------- /src/Fogger/Data/Writer/MysqlInfileWriter.php: -------------------------------------------------------------------------------- 1 | target = $target; 17 | $this->cacheDir = $cacheDir; 18 | } 19 | 20 | private function forgeTempFilename() 21 | { 22 | return sprintf('%s/%s.txt', $this->cacheDir, uniqid()); 23 | } 24 | 25 | /** 26 | * @param string $table 27 | * @param array $inserts 28 | * @throws \Doctrine\DBAL\DBALException 29 | */ 30 | private function loadInfile(string $table, array $inserts) 31 | { 32 | $filename = $this->forgeTempFilename(); 33 | file_put_contents($filename, implode("\n", $inserts)); 34 | $this->target->exec( 35 | sprintf( 36 | "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s", 37 | $filename, 38 | $table 39 | ). 40 | " FIELDS TERMINATED BY ',' ENCLOSED BY '\'' LINES TERMINATED BY '\n' STARTING BY ''" 41 | ); 42 | unlink($filename); 43 | } 44 | 45 | private function forgeRow(array $row): string 46 | { 47 | return implode( 48 | ',', 49 | array_map( 50 | function ($item) { 51 | return $item === null ? '\N' : $this->target->quote($item); 52 | }, 53 | $row 54 | ) 55 | ); 56 | } 57 | 58 | /** 59 | * @param string $table 60 | * @param array $data 61 | * @throws Exception 62 | */ 63 | public function insert(string $table, array $data) 64 | { 65 | $inserts = []; 66 | foreach ($data as $row) { 67 | $inserts[] = $this->forgeRow($row); 68 | } 69 | $this->loadInfile($table, $inserts); 70 | } 71 | 72 | public function isApplicable(): bool 73 | { 74 | return $this->target->getDriver()->getName() === 'pdo_mysql'; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /spec/Config/Serializer/TableConfigDenormalizerSpec.php: -------------------------------------------------------------------------------- 1 | setDenormalizer($denormalizer); 16 | } 17 | 18 | function it_supports_only_denormalization_of_table_config_class() 19 | { 20 | $this->supportsDenormalization(Argument::any(), TableConfig::class)->shouldReturn(true); 21 | $this->supportsDenormalization(Argument::any(), 'WrongClassName')->shouldReturn(false); 22 | } 23 | 24 | function it_denormalizes_data_with_empty_columns_array_and_no_subset_into_table_config_object() 25 | { 26 | $data = ['columns' => []]; 27 | $table = new TableConfig(); 28 | $this->denormalize($data, TableConfig::class)->shouldBeLike($table); 29 | } 30 | 31 | function it_denormalizes_data_with_empty_columns_array_and_subset_into_table_config_object() 32 | { 33 | $data = ['columns' => [], 'subsetStrategy' => 'none', 'subsetOptions' => []]; 34 | $table = new TableConfig(); 35 | $table->setSubsetStrategy($data['subsetStrategy'], $data['subsetOptions']); 36 | 37 | $this->denormalize($data, TableConfig::class)->shouldBeLike($table); 38 | } 39 | 40 | function it_denormalizes_data_with_columns_array(DenormalizerInterface $denormalizer) 41 | { 42 | $data = [ 43 | 'columns' => [ 44 | 'column' => [], 45 | ], 46 | ]; 47 | 48 | $table = new TableConfig(); 49 | 50 | foreach ($data['columns'] as $key => $col) { 51 | $columnConfig = new ColumnConfig(); 52 | $table->addColumn($key, $columnConfig); 53 | $denormalizer->denormalize($col, ColumnConfig::class, Argument::any(), Argument::any())->willReturn( 54 | $columnConfig 55 | ); 56 | } 57 | 58 | $this->denormalize($data, TableConfig::class)->shouldBeLike($table); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/Fogger/Mask/AbstractCachedMask.php: -------------------------------------------------------------------------------- 1 | cache = $cache; 19 | } 20 | 21 | abstract protected function getSubstitution(array $options = []): ?string; 22 | 23 | private function forgeCacheKey(string $value, array $options, bool $substitute = false) 24 | { 25 | return md5( 26 | sprintf("%s.%s.%s.%s", $value, $this->getMaskName(), json_encode($options), $substitute) 27 | ); 28 | } 29 | 30 | /** 31 | * @param null|string $value 32 | * @param array $options 33 | * @return null|string 34 | * @throws \Psr\Cache\InvalidArgumentException 35 | */ 36 | public function apply(?string $value, array $options = []): ?string 37 | { 38 | if (null === $value) { 39 | return $value; 40 | } 41 | 42 | do { 43 | $originalValueCacheItem = $this->cache->getItem($this->forgeCacheKey($value, $options)); 44 | } while ($originalValueCacheItem->get() === self::LOCK_VALUE); 45 | 46 | if ($originalValueCacheItem->isHit()) { 47 | return $originalValueCacheItem->get(); 48 | } else { 49 | $originalValueCacheItem->set(self::LOCK_VALUE); 50 | $this->cache->save($originalValueCacheItem); 51 | } 52 | $tries = 0; 53 | do { 54 | $tries++; 55 | $substitution = $this->getSubstitution($options); 56 | $substitutionCacheItem = $this->cache->getItem($this->forgeCacheKey($substitution, $options, true)); 57 | if ($tries == 100) { 58 | break; 59 | } 60 | } while ($substitutionCacheItem->isHit()); 61 | $this->cache->save($substitutionCacheItem); 62 | 63 | $originalValueCacheItem->set($substitution); 64 | $this->cache->save($originalValueCacheItem); 65 | 66 | return $substitution; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/Fogger/Refine/RefineExecutor.php: -------------------------------------------------------------------------------- 1 | target = $target; 15 | } 16 | 17 | private function innerSelectFromCopiedTable(Schema\ForeignKeyConstraint $foreignKey) 18 | { 19 | return $this->target->createQueryBuilder() 20 | ->select($this->target->quoteIdentifier('__tmp.'.$foreignKey->getForeignColumns()[0])) 21 | ->from( 22 | '('.$this->target->createQueryBuilder() 23 | ->select($this->target->quoteIdentifier($foreignKey->getForeignColumns()[0])) 24 | ->from($this->target->quoteIdentifier($foreignKey->getForeignTableName())) 25 | ->getSql().')', 26 | '__tmp' 27 | ) 28 | ->getSQL(); 29 | } 30 | 31 | public function delete(Schema\ForeignKeyConstraint $foreignKey): int 32 | { 33 | return $this->target->createQueryBuilder() 34 | ->delete($this->target->quoteIdentifier($foreignKey->getLocalTableName())) 35 | ->where( 36 | $this->target->createQueryBuilder()->expr()->notIn( 37 | $this->target->quoteIdentifier($foreignKey->getLocalColumns()[0]), 38 | $this->innerSelectFromCopiedTable($foreignKey) 39 | ) 40 | ) 41 | ->execute(); 42 | } 43 | 44 | public function setNulls(Schema\ForeignKeyConstraint $foreignKey) 45 | { 46 | $this->target->createQueryBuilder() 47 | ->update($this->target->quoteIdentifier($foreignKey->getLocalTableName())) 48 | ->set($this->target->quoteIdentifier($foreignKey->getLocalColumns()[0]), ':val') 49 | ->where( 50 | $this->target->createQueryBuilder()->expr()->notIn( 51 | $this->target->quoteIdentifier($foreignKey->getLocalColumns()[0]), 52 | $this->innerSelectFromCopiedTable($foreignKey) 53 | ) 54 | ) 55 | ->setParameter('val', null) 56 | ->execute(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/Fogger/Data/Writer/GenericInsertWriter.php: -------------------------------------------------------------------------------- 1 | target = $target; 18 | } 19 | 20 | /** 21 | * @throws \Doctrine\DBAL\DBALException 22 | */ 23 | private function flush() 24 | { 25 | if (!count($this->inserts)) { 26 | return; 27 | } 28 | $this->target->beginTransaction(); 29 | foreach ($this->inserts as $insert) { 30 | $this->target->exec($insert); 31 | } 32 | $this->target->commit(); 33 | $this->inserts = []; 34 | } 35 | 36 | /** 37 | * @param string $table 38 | * @param array $data 39 | * @throws \Doctrine\DBAL\DBALException 40 | */ 41 | public function insert(string $table, array $data) 42 | { 43 | $counter = 0; 44 | $queryBuilder = $this->target->createQueryBuilder(); 45 | $this->inserts = []; 46 | foreach ($data as $row) { 47 | $this->inserts[] = $queryBuilder 48 | ->insert($this->target->quoteIdentifier($table)) 49 | ->values( 50 | array_combine( 51 | array_map( 52 | function ($key) { 53 | return $this->target->quoteIdentifier($key); 54 | }, 55 | array_keys($row) 56 | ), 57 | array_map( 58 | function ($value) { 59 | return $value === null ? 'null' : $this->target->quote($value); 60 | }, 61 | $row 62 | ) 63 | ) 64 | )->getSQL(); 65 | if (!(++$counter % self::FLUSH_RATE)) { 66 | $this->flush(); 67 | } 68 | } 69 | $this->flush(); 70 | } 71 | 72 | public function isApplicable(): bool 73 | { 74 | return true; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /features/Run/Refine/notNullReference.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to subset the table that has foreign key relations 3 | As a user 4 | I want to get subsetted but refined result so the foreign constraints are still valid 5 | 6 | Scenario: We want to subset the table that is referenced by other table (Not Null column) 7 | Given there is a source database 8 | And there is a table invoices with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | no | string | 64 | | | 12 | And the table invoices contains following data: 13 | | id | no | 14 | | 1 | 01/2018 | 15 | | 2 | 02/2018 | 16 | | 3 | 03/2018 | 17 | And there is a table items with following columns: 18 | | name | type | length | index | nullable | 19 | | id | integer | | primary | | 20 | | invoice_id | integer | | | | 21 | | line | string | 64 | | | 22 | And the table items contains following data: 23 | | id | invoice_id | line | 24 | | 1 | 1 | product 1 | 25 | | 2 | 1 | product 2 | 26 | | 3 | 2 | product 3 | 27 | | 4 | 3 | product 4 | 28 | | 5 | 3 | product 5 | 29 | | 6 | 3 | product 6 | 30 | And the items.invoice_id references invoices.id 31 | And there is an empty target database 32 | And the task queue is empty 33 | And the config test.yaml contains: 34 | """ 35 | tables: 36 | invoices: 37 | subsetStrategy: tail 38 | subsetOptions: { length: 1 } 39 | """ 40 | When I run "run" command with input: 41 | | --chunk-size | 1000 | 42 | | --file | test.yaml | 43 | | --dont-wait | true | 44 | And worker processes 2 task 45 | And I run "finish" command with input: 46 | | --file | test.yaml | 47 | Then the command should exit with code 0 48 | And the table invoices in target database should have 1 rows 49 | And the table items in target database should have 3 rows 50 | And the table items in target database should contain rows: 51 | | id | 52 | | 4 | 53 | | 5 | 54 | | 6 | 55 | -------------------------------------------------------------------------------- /src/Fogger/Recipe/RecipeTableFactory.php: -------------------------------------------------------------------------------- 1 | getMaskStrategy() != Config\ColumnConfig::NONE_STRATEGY) { 13 | $table->addMask( 14 | $columnName, 15 | new StrategyDefinition($column->getMaskStrategy(), $column->getOptions()) 16 | ); 17 | } 18 | } 19 | 20 | /** 21 | * @param DBAL\Table $table 22 | * @return null|string 23 | * @throws \Doctrine\DBAL\DBALException 24 | */ 25 | private function findSortBy(DBAL\Table $table): ?string 26 | { 27 | if ($table->getPrimaryKey() && 1 === count($table->getPrimaryKeyColumns())) { 28 | return $table->getPrimaryKeyColumns()[0]; 29 | } 30 | foreach ($table->getIndexes() as $index) { 31 | if ($index->isUnique() && 1 === count($index->getColumns())) { 32 | return $index->getColumns()[0]; 33 | } 34 | } 35 | 36 | return null; 37 | } 38 | 39 | /** 40 | * @param DBAL\Table $dbalTable 41 | * @param int $chunkSize 42 | * @param Config\TableConfig|null $tableConfig 43 | * @return Table 44 | * @throws \Doctrine\DBAL\DBALException 45 | */ 46 | public function createRecipeTable( 47 | DBAL\Table $dbalTable, 48 | int $chunkSize, 49 | Config\TableConfig $tableConfig = null 50 | ): Table { 51 | if ($tableConfig && $subsetStrategy = $tableConfig->getSubsetStrategy()) { 52 | $subset = new StrategyDefinition($subsetStrategy, $tableConfig->getSubsetOptions()); 53 | } 54 | 55 | $table = new Table( 56 | $dbalTable->getName(), 57 | $chunkSize, 58 | $this->findSortBy($dbalTable), 59 | $subset ?? new StrategyDefinition('noSubset') 60 | ); 61 | 62 | if (!$tableConfig) { 63 | return $table; 64 | } 65 | 66 | foreach ($tableConfig->getColumns() as $key => $column) { 67 | $this->addMask($table, $column, $key); 68 | } 69 | 70 | return $table; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /features/Run/Refine/nullableReference.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to subset the table that has foreign key relations 3 | As a user 4 | I want to get subsetted but refined result so the foreign constraints are still valid 5 | 6 | Scenario: We want to subset the table that is referenced by other table (nullable column) 7 | Given there is a source database 8 | And there is a table invoices with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | no | string | 64 | | | 12 | And the table invoices contains following data: 13 | | id | no | 14 | | 1 | 01/2018 | 15 | | 2 | 02/2018 | 16 | | 3 | 03/2018 | 17 | And there is a table items with following columns: 18 | | name | type | length | index | nullable | 19 | | id | integer | | primary | | 20 | | invoice_id | integer | | | true | 21 | | line | string | 64 | | | 22 | And the table items contains following data: 23 | | id | invoice_id | line | 24 | | 1 | 1 | product 1 | 25 | | 2 | 1 | product 2 | 26 | | 3 | 2 | product 3 | 27 | | 4 | 3 | product 4 | 28 | | 5 | 3 | product 5 | 29 | | 6 | 3 | product 6 | 30 | And the items.invoice_id references invoices.id 31 | And there is an empty target database 32 | And the task queue is empty 33 | And the config test.yaml contains: 34 | """ 35 | tables: 36 | invoices: 37 | subsetStrategy: tail 38 | subsetOptions: { length: 1 } 39 | """ 40 | When I run "run" command with input: 41 | | --chunk-size | 1000 | 42 | | --file | test.yaml | 43 | | --dont-wait | true | 44 | And worker processes 2 task 45 | And I run "finish" command with input: 46 | | --file | test.yaml | 47 | Then the command should exit with code 0 48 | And the table invoices in target database should have 1 rows 49 | And the table items in target database should have 6 rows 50 | And the table items in target database should contain rows: 51 | | id | invoice_id | 52 | | 1 | | 53 | | 2 | | 54 | | 3 | | 55 | -------------------------------------------------------------------------------- /features/Run/Refine/refineExcluded.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to exclude the table that has foreign key relations 3 | As a user 4 | I want the database refined so the foreign constraints are still valid 5 | 6 | Scenario: We want to exclude the table that is referenced by other table 7 | Given there is a source database 8 | And there is a table invoices with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | no | string | 64 | | | 12 | And the table invoices contains following data: 13 | | id | no | 14 | | 1 | 01/2018 | 15 | | 2 | 02/2018 | 16 | | 3 | 03/2018 | 17 | And there is a table items with following columns: 18 | | name | type | length | index | nullable | 19 | | id | integer | | primary | | 20 | | invoice_id | integer | | | true | 21 | | line | string | 64 | | | 22 | And the table items contains following data: 23 | | id | invoice_id | line | 24 | | 1 | 1 | product 1 | 25 | | 2 | 1 | product 2 | 26 | | 3 | 2 | product 3 | 27 | | 4 | 3 | product 4 | 28 | | 5 | 3 | product 5 | 29 | | 6 | 3 | product 6 | 30 | And the items.invoice_id references invoices.id 31 | And there is an empty target database 32 | And the task queue is empty 33 | And the config test.yaml contains: 34 | """ 35 | tables: 36 | excludes: 37 | - invoices 38 | """ 39 | When I run "run" command with input: 40 | | --chunk-size | 1000 | 41 | | --file | test.yaml | 42 | | --dont-wait | true | 43 | And worker processes 1 task 44 | And I run "finish" command with input: 45 | | --file | test.yaml | 46 | Then the command should exit with code 0 47 | And the table invoices in target database should have 0 rows 48 | And the table items in target database should have 6 rows 49 | And the table items in target database should contain rows: 50 | | id | invoice_id | 51 | | 1 | | 52 | | 2 | | 53 | | 3 | | 54 | | 4 | | 55 | | 5 | | 56 | | 6 | | 57 | -------------------------------------------------------------------------------- /src/Kernel.php: -------------------------------------------------------------------------------- 1 | getProjectDir().'/var/cache/'.$this->environment; 21 | } 22 | 23 | public function getLogDir() 24 | { 25 | return $this->getProjectDir().'/var/log'; 26 | } 27 | 28 | public function registerBundles() 29 | { 30 | $contents = require $this->getProjectDir().'/config/bundles.php'; 31 | foreach ($contents as $class => $envs) { 32 | if (isset($envs['all']) || isset($envs[$this->environment])) { 33 | yield new $class(); 34 | } 35 | } 36 | } 37 | 38 | protected function configureContainer(ContainerBuilder $container, LoaderInterface $loader) 39 | { 40 | $container->addResource(new FileResource($this->getProjectDir().'/config/bundles.php')); 41 | // Feel free to remove the "container.autowiring.strict_mode" parameter 42 | // if you are using symfony/dependency-injection 4.0+ as it's the default behavior 43 | $container->setParameter('container.autowiring.strict_mode', true); 44 | $container->setParameter('container.dumper.inline_class_loader', true); 45 | $confDir = $this->getProjectDir().'/config'; 46 | 47 | $loader->load($confDir.'/{packages}/*'.self::CONFIG_EXTS, 'glob'); 48 | $loader->load($confDir.'/{packages}/'.$this->environment.'/**/*'.self::CONFIG_EXTS, 'glob'); 49 | $loader->load($confDir.'/{services}'.self::CONFIG_EXTS, 'glob'); 50 | $loader->load($confDir.'/{services}_'.$this->environment.self::CONFIG_EXTS, 'glob'); 51 | } 52 | 53 | protected function configureRoutes(RouteCollectionBuilder $routes) 54 | { 55 | $confDir = $this->getProjectDir().'/config'; 56 | 57 | $routes->import($confDir.'/{routes}/*'.self::CONFIG_EXTS, '/', 'glob'); 58 | $routes->import($confDir.'/{routes}/'.$this->environment.'/**/*'.self::CONFIG_EXTS, '/', 'glob'); 59 | $routes->import($confDir.'/{routes}'.self::CONFIG_EXTS, '/', 'glob'); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /spec/Fogger/Serializer/TableDenormalizerSpec.php: -------------------------------------------------------------------------------- 1 | setDenormalizer($denormalizer); 16 | } 17 | 18 | function it_supports_only_denormalization_of_recipe_table_class_only_from_json() 19 | { 20 | $this->supportsDenormalization(Argument::any(), Table::class, 'json')->shouldReturn(true); 21 | $this->supportsDenormalization(Argument::any(), Table::class, Argument::any())->shouldReturn(false); 22 | $this->supportsDenormalization(Argument::any(), 'WrongClassName')->shouldReturn(false); 23 | } 24 | 25 | function it_denormalizes_table_without_masks_specified() 26 | { 27 | $data = ['name' => 'table', 'chunkSize' => 10, 'sortBy' => null, 'subset' => 'noSubset']; 28 | $table = new Table('table', 10, null, new StrategyDefinition('noSubset')); 29 | 30 | $this->denormalize($data, Table::class, 'json')->shouldBeLike($table); 31 | } 32 | 33 | function it_denormalizes_table_with_subset_strategy_specified(DenormalizerInterface $denormalizer) 34 | { 35 | $data = ['name' => 'table', 'chunkSize' => 10, 'sortBy' => null, 'subset' => 'subset']; 36 | $subsetStrategy = new StrategyDefinition('subset'); 37 | $denormalizer->denormalize($data['subset'], StrategyDefinition::class, 'json', [])->willReturn($subsetStrategy); 38 | $table = new Table('table', 10, null, $subsetStrategy); 39 | 40 | $this->denormalize($data, Table::class, 'json')->shouldBeLike($table); 41 | } 42 | 43 | function it_denormalizes_table_with_masks_specified() 44 | { 45 | $data = [ 46 | 'name' => 'table', 47 | 'chunkSize' => 10, 48 | 'subset' => 'noSubset', 49 | 'sortBy' => null, 50 | 'masks' => [ 51 | 'column' => ['name' => 'mask', 'options' => []], 52 | 'other' => ['name' => 'otherMask', 'options' => ['option' => 'value']], 53 | ], 54 | ]; 55 | $table = new Table('table', 10, null, new StrategyDefinition('noSubset')); 56 | $table->addMask('column', new StrategyDefinition('mask')); 57 | $table->addMask('other', new StrategyDefinition('otherMask', ['option' => 'value'])); 58 | 59 | $this->denormalize($data, Table::class, 'json')->shouldBeLike($table); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/Fogger/Refine/Refiner.php: -------------------------------------------------------------------------------- 1 | extractor = $extractor; 22 | $this->refineExecutor = $refineExecutor; 23 | } 24 | 25 | /** 26 | * @param Table $table 27 | * @throws Schema\SchemaException 28 | */ 29 | private function refineIfSubsetted(Table $table): void 30 | { 31 | if ($table->getSubsetName() === NoSubset::STRATEGY_NAME) { 32 | return; 33 | } 34 | $this->refineTable($table->getName()); 35 | } 36 | 37 | /** 38 | * @param Schema\ForeignKeyConstraint $foreignKey 39 | * @throws Schema\SchemaException 40 | */ 41 | private function runQueryFor(Schema\ForeignKeyConstraint $foreignKey) 42 | { 43 | echo(sprintf( 44 | " - %s.%s => %s.%s\n", 45 | $foreignKey->getLocalTableName(), 46 | implode('_', $foreignKey->getLocalColumns()), 47 | $foreignKey->getForeignTableName(), 48 | implode('_', $foreignKey->getForeignColumns()) 49 | )); 50 | if ($this->extractor->isLocalColumnNullable($foreignKey)) { 51 | $this->refineExecutor->setNulls($foreignKey); 52 | 53 | return; 54 | } 55 | if ($this->refineExecutor->delete($foreignKey)) { 56 | $this->refineTable($foreignKey->getLocalTableName()); 57 | } 58 | } 59 | 60 | /** 61 | * @param string $tabletableName 62 | * @throws Schema\SchemaException 63 | */ 64 | private function refineTable(string $tabletableName) 65 | { 66 | echo(' - refining '.$tabletableName."\n"); 67 | /** @var Schema\ForeignKeyConstraint $foreignKey */ 68 | foreach ($this->extractor->findForeignKeysReferencingTable($tabletableName) as $foreignKey) { 69 | $this->runQueryFor($foreignKey); 70 | } 71 | 72 | } 73 | 74 | /** 75 | * @param Recipe $recipe 76 | * @throws Schema\SchemaException 77 | */ 78 | public function refine(Recipe $recipe) 79 | { 80 | /** @var Table $table */ 81 | foreach ($recipe->getTables() as $table) { 82 | $this->refineIfSubsetted($table); 83 | } 84 | foreach ($recipe->getExcludes() as $excluded) { 85 | $this->refineTable($excluded); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /features/Run/Subset/head.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to make the resulting database smaller 3 | As a user 4 | I want to be able to subset selected tables 5 | 6 | Background: 7 | Given there is a source database 8 | And there is a table posts with following columns: 9 | | name | type | length | index | 10 | | id | integer | | | 11 | | title | string | 64 | unique | 12 | | desc | string | 128 | | 13 | And the table posts contains following data: 14 | | id | title | desc | 15 | | 4 | title 4 | desc 4 | 16 | | 3 | title 3 | desc 3 | 17 | | 1 | title 1 | desc 1 | 18 | | 2 | title 2 | desc 2 | 19 | | 5 | title 5 | desc 5 | 20 | | 6 | title 6 | desc 6 | 21 | And there is an empty target database 22 | And the task queue is empty 23 | 24 | Scenario: We want only the first records (head strategy) from the table 25 | Given the config test.yaml contains: 26 | """ 27 | tables: 28 | posts: 29 | subsetStrategy: head 30 | subsetOptions: { length: 2 } 31 | """ 32 | When I run "run" command with input: 33 | | --chunk-size | 1000 | 34 | | --file | test.yaml | 35 | | --dont-wait | true | 36 | Then I should see "1 chunks have been added to queue" in command's output 37 | And the command should exit with code 0 38 | And published tasks counter should equal 1 39 | And processed tasks counter should equal 0 40 | When worker processes 1 task 41 | Then processed tasks counter should equal 1 42 | And the table posts in target database should have 2 row 43 | And the table posts in target database should contain rows: 44 | | id | title | 45 | | 1 | title 1 | 46 | | 2 | title 2 | 47 | 48 | Scenario: We want only the first records (head strategy) from the table (multiple chunks) 49 | Given the config test.yaml contains: 50 | """ 51 | tables: 52 | posts: 53 | subsetStrategy: head 54 | subsetOptions: { length: 3 } 55 | """ 56 | When I run "run" command with input: 57 | | --chunk-size | 2 | 58 | | --file | test.yaml | 59 | | --dont-wait | true | 60 | Then I should see "2 chunks have been added to queue" in command's output 61 | And the command should exit with code 0 62 | And published tasks counter should equal 2 63 | And processed tasks counter should equal 0 64 | When worker processes 2 tasks 65 | Then processed tasks counter should equal 2 66 | And the table posts in target database should have 3 row 67 | And the table posts in target database should contain rows: 68 | | id | title | 69 | | 1 | title 1 | 70 | | 2 | title 2 | 71 | | 3 | title 3 | 72 | -------------------------------------------------------------------------------- /features/Run/Subset/range.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to make the resulting database smaller 3 | As a user 4 | I want to be able to subset selected tables 5 | 6 | Background: 7 | Given there is a source database 8 | And there is a table scores with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | score | integer | | | 12 | | desc | string | 128 | | 13 | And the table scores contains following data: 14 | | id | score | desc | 15 | | 1 | 47 | desc 1 | 16 | | 2 | 5 | desc 2 | 17 | | 3 | 43 | desc 3 | 18 | | 4 | 65 | desc 4 | 19 | | 5 | 31 | desc 5 | 20 | And there is an empty target database 21 | And the task queue is empty 22 | 23 | Scenario: We want only the records within configured range 24 | Given the config test.yaml contains: 25 | """ 26 | tables: 27 | scores: 28 | subsetStrategy: range 29 | subsetOptions: { column: "score", from: 10, to: 50 } 30 | """ 31 | When I run "run" command with input: 32 | | --chunk-size | 1000 | 33 | | --file | test.yaml | 34 | | --dont-wait | true | 35 | Then I should see "1 chunks have been added to queue" in command's output 36 | And the command should exit with code 0 37 | And published tasks counter should equal 1 38 | And processed tasks counter should equal 0 39 | When worker processes 1 task 40 | Then processed tasks counter should equal 1 41 | And the table scores in target database should have 3 row 42 | And the table scores in target database should contain rows: 43 | | id | score | 44 | | 1 | 47 | 45 | | 3 | 43 | 46 | | 5 | 31 | 47 | 48 | Scenario: We want only the records within configured range (multiple chunks) 49 | Given the config test.yaml contains: 50 | """ 51 | tables: 52 | scores: 53 | subsetStrategy: range 54 | subsetOptions: { column: "score", from: 10, to: 50 } 55 | """ 56 | When I run "run" command with input: 57 | | --chunk-size | 2 | 58 | | --file | test.yaml | 59 | | --dont-wait | true | 60 | Then I should see "2 chunks have been added to queue" in command's output 61 | And the command should exit with code 0 62 | And published tasks counter should equal 2 63 | And processed tasks counter should equal 0 64 | When worker processes 2 tasks 65 | Then processed tasks counter should equal 2 66 | And the table scores in target database should have 3 row 67 | And the table scores in target database should contain rows: 68 | | id | score | 69 | | 1 | 47 | 70 | | 3 | 43 | 71 | | 5 | 31 | 72 | -------------------------------------------------------------------------------- /features/Run/Subset/tail.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to make the resulting database smaller 3 | As a user 4 | I want to be able to subset selected tables 5 | 6 | Background: 7 | Given there is a source database 8 | And there is a table posts with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | title | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table posts contains following data: 14 | | id | title | desc | 15 | | 4 | title 4 | desc 4 | 16 | | 3 | title 3 | desc 3 | 17 | | 1 | title 1 | desc 1 | 18 | | 2 | title 2 | desc 2 | 19 | | 5 | title 5 | desc 5 | 20 | | 6 | title 6 | desc 6 | 21 | And there is an empty target database 22 | And the task queue is empty 23 | 24 | Scenario: We want only the last records (tail strategy) from the table 25 | Given the config test.yaml contains: 26 | """ 27 | tables: 28 | posts: 29 | subsetStrategy: tail 30 | subsetOptions: { length: 3 } 31 | """ 32 | When I run "run" command with input: 33 | | --chunk-size | 1000 | 34 | | --file | test.yaml | 35 | | --dont-wait | true | 36 | Then I should see "1 chunks have been added to queue" in command's output 37 | And the command should exit with code 0 38 | And published tasks counter should equal 1 39 | And processed tasks counter should equal 0 40 | When worker processes 1 task 41 | Then processed tasks counter should equal 1 42 | And the table posts in target database should have 3 row 43 | And the table posts in target database should contain rows: 44 | | id | title | 45 | | 4 | title 4 | 46 | | 5 | title 5 | 47 | | 6 | title 6 | 48 | 49 | Scenario: We want only the last records (tail strategy) from the table (multiple chunks) 50 | Given the config test.yaml contains: 51 | """ 52 | tables: 53 | posts: 54 | subsetStrategy: tail 55 | subsetOptions: { length: 3 } 56 | """ 57 | When I run "run" command with input: 58 | | --chunk-size | 2 | 59 | | --file | test.yaml | 60 | | --dont-wait | true | 61 | Then I should see "2 chunks have been added to queue" in command's output 62 | And the command should exit with code 0 63 | And published tasks counter should equal 2 64 | And processed tasks counter should equal 0 65 | When worker processes 2 tasks 66 | Then processed tasks counter should equal 2 67 | And the table posts in target database should have 3 row 68 | And the table posts in target database should contain rows: 69 | | id | title | 70 | | 4 | title 4 | 71 | | 5 | title 5 | 72 | | 6 | title 6 | 73 | -------------------------------------------------------------------------------- /features/Run/Refine/cascadeRefineOnlyToForeignSide.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to subset the table that has foreign key relations 3 | As a user 4 | I want to get subsetted but refined result so the foreign constraints are still valid 5 | 6 | Scenario: We want to subset the table that both is referenced and references. Only comments are refined. 7 | Given there is a source database 8 | And there is a table invoices with following columns: 9 | | name | type | length | index | nullable | 10 | | id | integer | | primary | | 11 | | no | string | 64 | | | 12 | And the table invoices contains following data: 13 | | id | no | 14 | | 1 | 01/2018 | 15 | | 2 | 02/2018 | 16 | And there is a table items with following columns: 17 | | name | type | length | index | nullable | 18 | | id | integer | | primary | | 19 | | invoice_id | integer | | | | 20 | | line | string | 64 | | | 21 | And the table items contains following data: 22 | | id | invoice_id | line | 23 | | 1 | 1 | product 1 | 24 | | 2 | 1 | product 2 | 25 | | 3 | 2 | product 3 | 26 | And there is a table comments with following columns: 27 | | name | type | length | index | nullable | 28 | | id | integer | | primary | | 29 | | item_id | integer | | | | 30 | | comment | string | 64 | | | 31 | And the table comments contains following data: 32 | | id | item_id | comment | 33 | | 1 | 1 | comment 1 | 34 | | 2 | 1 | comment 2 | 35 | | 3 | 2 | comment 3 | 36 | | 4 | 3 | comment 4 | 37 | | 5 | 3 | comment 5 | 38 | | 6 | 3 | comment 6 | 39 | And the items.invoice_id references invoices.id 40 | And the comments.item_id references items.id 41 | And there is an empty target database 42 | And the task queue is empty 43 | And the config test.yaml contains: 44 | """ 45 | tables: 46 | items: 47 | subsetStrategy: head 48 | subsetOptions: { length: 1 } 49 | """ 50 | When I run "run" command with input: 51 | | --chunk-size | 1000 | 52 | | --file | test.yaml | 53 | | --dont-wait | true | 54 | And worker processes 3 task 55 | And I run "finish" command with input: 56 | | --file | test.yaml | 57 | Then the command should exit with code 0 58 | And the table invoices in target database should have 2 rows 59 | And the table items in target database should have 1 rows 60 | And the table comments in target database should have 2 rows 61 | And the table comments in target database should contain rows: 62 | | id | 63 | | 1 | 64 | | 2 | 65 | -------------------------------------------------------------------------------- /features/Run/Mask/faker.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to comply with the General Data Protection Regulation (EU GDPR) 3 | As a user 4 | I want to obfuscate (mask) data while moving them to the target database 5 | 6 | Scenario: We want to mask the email column with faker strategy 7 | Given there is a source database 8 | And there is a table users with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | email | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table users contains following data: 14 | | id | email | desc | 15 | | 4 | ex4@tsh.io | desc 4 | 16 | | 3 | ex3@tsh.io | desc 3 | 17 | | 1 | ex1@tsh.io | desc 1 | 18 | | 2 | ex2@tsh.io | desc 2 | 19 | And there is an empty target database 20 | And the task queue is empty 21 | And the config test.yaml contains: 22 | """ 23 | tables: 24 | users: 25 | columns: 26 | email: { maskStrategy: "faker", options: { method: "email" } } 27 | """ 28 | When I run "run" command with input: 29 | | --chunk-size | 1000 | 30 | | --file | test.yaml | 31 | | --dont-wait | true | 32 | And worker processes 1 task 33 | Then the table users in target database should have 4 rows 34 | And the table users in target database should not contain rows: 35 | | email | 36 | | ex1@tsh.io | 37 | | ex2@tsh.io | 38 | | ex3@tsh.io | 39 | | ex4@tsh.io | 40 | 41 | Scenario: Support Faker's optional modifier 42 | Given there is a source database 43 | And there is a table users with following columns: 44 | | name | type | length | index | 45 | | id | integer | | primary | 46 | | email | string | 64 | | 47 | | desc | string | 128 | | 48 | And the table users contains following data: 49 | | id | email | desc | 50 | | 4 | ex4@tsh.io | desc 4 | 51 | | 3 | ex3@tsh.io | desc 3 | 52 | | 1 | ex1@tsh.io | desc 1 | 53 | | 2 | ex2@tsh.io | desc 2 | 54 | And there is an empty target database 55 | And the task queue is empty 56 | And the config test.yaml contains: 57 | """ 58 | tables: 59 | users: 60 | columns: 61 | email: { maskStrategy: none, options: { } } 62 | desc: { maskStrategy: "faker", options: { method: "email", modifier: "optional", modifierArguments: [0, default] } } 63 | """ 64 | When I run "run" command with input: 65 | | --chunk-size | 1000 | 66 | | --file | test.yaml | 67 | | --dont-wait | true | 68 | And worker processes 1 task 69 | Then the table users in target database should have 4 rows 70 | And the table users in target database should contain rows: 71 | | email | desc | 72 | | ex1@tsh.io | default | 73 | | ex2@tsh.io | default | 74 | | ex3@tsh.io | default | 75 | | ex4@tsh.io | default | 76 | -------------------------------------------------------------------------------- /src/Fogger/Schema/SchemaManipulator.php: -------------------------------------------------------------------------------- 1 | sourceSchema = $source->getSchemaManager(); 17 | $this->targetSchema = $target->getSchemaManager(); 18 | } 19 | 20 | /** 21 | * @throws DBAL\SchemaException 22 | */ 23 | public function copySchemaDroppingIndexesAndForeignKeys() 24 | { 25 | $sourceTables = $this->sourceSchema->listTables(); 26 | /** @var DBAL\Table $table */ 27 | foreach ($sourceTables as $table) { 28 | foreach ($table->getColumns() as $column) { 29 | $column->setAutoincrement(false); 30 | } 31 | foreach ($table->getForeignKeys() as $fk) { 32 | $table->removeForeignKey($fk->getName()); 33 | } 34 | foreach ($table->getIndexes() as $index) { 35 | $table->dropIndex($index->getName()); 36 | } 37 | $this->targetSchema->createTable($table); 38 | } 39 | } 40 | 41 | private function recreateIndexesOnTable(DBAL\Table $table) 42 | { 43 | foreach ($table->getIndexes() as $index) { 44 | echo(sprintf( 45 | " - %s's index %s on %s\n", 46 | $table->getName(), 47 | $index->getName(), 48 | implode(', ', $index->getColumns()) 49 | )); 50 | $this->targetSchema->createIndex($index, $table->getName()); 51 | } 52 | /** @var DBAL\Column $column */ 53 | foreach ($table->getColumns() as $column) { 54 | if ($column->getAutoincrement()) { 55 | $this->targetSchema->alterTable( 56 | new DBAL\TableDiff($table->getName(), [], [new DBAL\ColumnDiff($column->getName(), $column)]) 57 | ); 58 | } 59 | } 60 | } 61 | 62 | private function recreateForeignKeysOnTable(DBAL\Table $table) 63 | { 64 | foreach ($table->getForeignKeys() as $fk) { 65 | echo(sprintf( 66 | " - %s.%s => %s.%s\n", 67 | $fk->getLocalTableName(), 68 | implode('_', $fk->getLocalColumns()), 69 | $fk->getForeignTableName(), 70 | implode('_', $fk->getForeignColumns()) 71 | )); 72 | $this->targetSchema->createForeignKey($fk, $table->getName()); 73 | } 74 | } 75 | 76 | public function recreateIndexes() 77 | { 78 | $sourceTables = $this->sourceSchema->listTables(); 79 | foreach ($sourceTables as $table) { 80 | $this->recreateIndexesOnTable($table); 81 | } 82 | } 83 | 84 | public function recreateForeignKeys() 85 | { 86 | $sourceTables = $this->sourceSchema->listTables(); 87 | foreach ($sourceTables as $table) { 88 | $this->recreateForeignKeysOnTable($table); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /features/bootstrap/CommandContext.php: -------------------------------------------------------------------------------- 1 | application = new Application($kernel); 27 | $this->application->add(new InitCommand($configFactory, $configLoader)); 28 | } 29 | 30 | private function foggerName(string $name): string 31 | { 32 | return sprintf(self::FOGGER_COMMAND_TEMPLATE, $name); 33 | } 34 | 35 | private function runCommand(string $name, array $input = []) 36 | { 37 | $command = $this->application->find($name); 38 | $this->tester = new CommandTester($command); 39 | $this->tester->execute(array_merge(['command' => $command->getName()], $input)); 40 | } 41 | 42 | /** 43 | * @When I run :name command 44 | * @param string $name 45 | */ 46 | public function iRunCommand(string $name) 47 | { 48 | $this->runCommand($this->foggerName($name)); 49 | } 50 | 51 | /** 52 | * @When I run :name command with input: 53 | * @param $name 54 | * @param TableNode $table 55 | */ 56 | public function iRunCommandWithInput($name, TableNode $table) 57 | { 58 | $this->runCommand( 59 | $this->foggerName($name), 60 | array_map( 61 | function ($item) { 62 | return $item === 'true' ? true : $item; 63 | }, 64 | $table->getRowsHash() 65 | ) 66 | ); 67 | } 68 | 69 | /** 70 | * @Then I should see :text in command's output 71 | * @param string $text 72 | * @throws Exception 73 | */ 74 | public function iShouldSeeInCommandsOutput($text) 75 | { 76 | if (false === strpos($this->tester->getDisplay(), $text)) { 77 | throw new \Exception('Text not present in command output'); 78 | } 79 | } 80 | 81 | /** 82 | * @When the command should exit with code :code 83 | * @param int $code 84 | * @throws Exception 85 | */ 86 | public function theCommandShouldExitCode(int $code) 87 | { 88 | if ($code !== $this->tester->getStatusCode()) { 89 | throw new \Exception( 90 | sprintf( 91 | 'Command exited with %d, %d expected', 92 | $this->tester->getStatusCode(), 93 | $code 94 | ) 95 | ); 96 | } 97 | } 98 | 99 | /** 100 | * @Then print commands output 101 | */ 102 | public function printCommandsOutput() 103 | { 104 | dump($this->tester->getDisplay()); 105 | } 106 | 107 | /** 108 | * @When worker processes :count task(s) 109 | * @param int $count 110 | */ 111 | public function workerProcessTask(int $count) 112 | { 113 | $this->runCommand( 114 | 'fogger:consumer', 115 | [ 116 | '--messages' => $count, 117 | ] 118 | ); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /config/services.yaml: -------------------------------------------------------------------------------- 1 | # This file is the entry point to configure your own services. 2 | # Files in the packages/ subdirectory configure your dependencies. 3 | 4 | # Put parameters here that don't need to change on each machine where the app is deployed 5 | # https://symfony.com/doc/current/best_practices/configuration.html#application-related-configuration 6 | parameters: 7 | locale: 'en' 8 | 9 | services: 10 | # default configuration for services in *this* file 11 | _defaults: 12 | autowire: true # Automatically injects dependencies in your services. 13 | autoconfigure: true # Automatically registers your services as commands, event subscribers, etc. 14 | public: false # Allows optimizing the container by removing unused services; this also means 15 | # fetching services directly from the container via $container->get() won't work. 16 | # The best practice is to be explicit about your dependencies anyway. 17 | 18 | _instanceof: 19 | App\Fogger\Mask\MaskStrategyInterface: 20 | tags: ['fogger.mask'] 21 | 22 | App\Fogger\Subset\SubsetStrategyInterface: 23 | tags: ['fogger.subset'] 24 | 25 | App\Fogger\Data\Writer\ChunkWriterInterface: 26 | tags: ['fogger.writer'] 27 | 28 | # makes classes in src/ available to be used as services 29 | # this creates a service per class whose id is the fully-qualified class name 30 | App\: 31 | resource: '../src/*' 32 | exclude: '../src/{DependencyInjection,Entity,Migrations,Tests,Kernel.php}' 33 | 34 | # controllers are imported separately to make sure services can be injected 35 | # as action arguments even if you don't extend any base controller class 36 | # App\Controller\: 37 | # resource: '../src/Controller' 38 | # tags: ['controller.service_arguments'] 39 | 40 | # add more service definitions when explicit configuration is needed 41 | # please note that last definitions always *replace* previous ones 42 | 43 | App\Config\Serializer\ConfigDenormalizer: 44 | calls: 45 | - [setDenormalizer, ['@serializer']] 46 | tags: 47 | - { name: 'serializer.normalizer'} 48 | 49 | App\Config\Serializer\TableConfigDenormalizer: 50 | calls: 51 | - [setDenormalizer, ['@serializer']] 52 | tags: 53 | - { name: 'serializer.normalizer'} 54 | 55 | App\Fogger\Data\ChunkCache: 56 | arguments: 57 | - '@snc_redis.default' 58 | 59 | App\Fogger\Data\ChunkError: 60 | arguments: 61 | - '@snc_redis.default' 62 | 63 | App\Fogger\Data\Writer\GenericInsertWriter: 64 | arguments: 65 | - '@doctrine.dbal.target_connection' 66 | tags: 67 | - { name: 'fogger.writer', priority: -128 } 68 | 69 | App\Fogger\Data\Writer\MysqlInfileWriter: 70 | arguments: 71 | - '@doctrine.dbal.target_connection' 72 | - '%kernel.cache_dir%' 73 | 74 | app.fogger.faker_generator: 75 | class: Faker\Factory 76 | factory: ['Faker\Factory', create] 77 | 78 | App\Fogger\Mask\FakerMask: 79 | arguments: 80 | - '@app.fogger.faker_generator' 81 | 82 | App\Fogger\Refine\RefineExecutor: 83 | arguments: 84 | - '@doctrine.dbal.target_connection' 85 | 86 | App\Fogger\Schema\SchemaManipulator: 87 | arguments: 88 | - "@doctrine.dbal.source_connection" 89 | - "@doctrine.dbal.target_connection" 90 | 91 | App\Fogger\Serializer\TableDenormalizer: 92 | calls: 93 | - [setDenormalizer, ['@serializer']] 94 | tags: 95 | - { name: 'serializer.normalizer'} 96 | 97 | App\Fogger\Mask\MaskStrategyProvider: 98 | arguments: 99 | - !tagged fogger.mask 100 | 101 | App\Fogger\Subset\SubsetStrategyProvider: 102 | arguments: 103 | - !tagged fogger.subset 104 | 105 | App\Fogger\Data\Writer\ChunkWriterProvider: 106 | arguments: 107 | - !tagged fogger.writer 108 | -------------------------------------------------------------------------------- /features/Run/finishCommand.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to have the same constraint and indexes like on the source database 3 | As a user 4 | I want to be able to restore constraint and indexes 5 | 6 | Scenario: finish command should restore constraints and indexes on target database 7 | Given there is a source database 8 | And there is a table users with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | email | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table users contains following data: 14 | | id | email | desc | 15 | | 1 | ex1@tsh.io | desc 1 | 16 | | 2 | ex2@tsh.io | desc 2 | 17 | | 3 | ex3@tsh.io | desc 3 | 18 | | 4 | ex4@tsh.io | desc 4 | 19 | And there is an empty target database 20 | And the task queue is empty 21 | And the config test.yaml contains: 22 | """ 23 | tables: 24 | users: 25 | columns: 26 | email: { maskStrategy: "faker"} 27 | """ 28 | And I run "run" command with input: 29 | | --chunk-size | 2 | 30 | | --file | test.yaml | 31 | | --dont-wait | true | 32 | And worker processes 2 task 33 | When I run "finish" command with input: 34 | | --file | test.yaml | 35 | Then I should see 'Data moved, constraints and indexes recreated.' in command's output 36 | 37 | Scenario: finish command should inform user that processing data is still going on 38 | Given there is a source database 39 | And there is a table users with following columns: 40 | | name | type | length | index | 41 | | id | integer | | primary | 42 | | email | string | 64 | | 43 | | desc | string | 128 | | 44 | And the table users contains following data: 45 | | id | email | desc | 46 | | 1 | ex1@tsh.io | desc 1 | 47 | | 2 | ex2@tsh.io | desc 2 | 48 | | 3 | ex3@tsh.io | desc 3 | 49 | | 4 | ex4@tsh.io | desc 4 | 50 | And there is an empty target database 51 | And the task queue is empty 52 | And the config test.yaml contains: 53 | """ 54 | tables: 55 | users: 56 | columns: 57 | email: { maskStrategy: "faker"} 58 | """ 59 | And I run "run" command with input: 60 | | --chunk-size | 2 | 61 | | --file | test.yaml | 62 | | --dont-wait | true | 63 | And worker processes 1 task 64 | When I run "finish" command 65 | Then I should see 'We are still working on it, please try again later (1/2)' in command's output 66 | 67 | Scenario: finish command should inform user about errors which occurs when workers processed data 68 | Given there is a source database 69 | And there is a table users with following columns: 70 | | name | type | length | index | 71 | | id | integer | | primary | 72 | | email | string | 64 | | 73 | | desc | string | 128 | | 74 | And the table users contains following data: 75 | | id | email | desc | 76 | | 1 | ex1@tsh.io | desc 1 | 77 | | 2 | ex2@tsh.io | desc 2 | 78 | | 3 | ex3@tsh.io | desc 3 | 79 | | 4 | ex4@tsh.io | desc 4 | 80 | And there is an empty target database 81 | And the task queue is empty 82 | And the config test.yaml contains: 83 | """ 84 | tables: 85 | users: 86 | columns: 87 | email: { maskStrategy: "wrongMask"} 88 | """ 89 | And I run "run" command with input: 90 | | --chunk-size | 2 | 91 | | --file | test.yaml | 92 | | --dont-wait | true | 93 | And worker processes 2 task 94 | When I run "finish" command 95 | Then I should see 'There has been an error' in command's output 96 | Then I should see 'Unknown mask "wrongMask".' in command's output -------------------------------------------------------------------------------- /src/Command/FinishCommand.php: -------------------------------------------------------------------------------- 1 | schemaManipulator = $schemaManipulator; 39 | $this->refiner = $refiner; 40 | $this->chunkCache = $chunkCache; 41 | $this->chunkError = $chunkError; 42 | $this->recipeFactory = $recipeFactory; 43 | 44 | parent::__construct(); 45 | } 46 | 47 | protected function configure() 48 | { 49 | $this 50 | ->setName('fogger:finish') 51 | ->addOption( 52 | 'file', 53 | 'f', 54 | InputOption::VALUE_REQUIRED, 55 | 'Where should the command look for a config file. Defaults to fogger.yaml in root folder.', 56 | ConfigLoader::DEFAULT_FILENAME 57 | ) 58 | ->setDescription('Recreates all the indexes and foreign keys in the target'); 59 | } 60 | 61 | protected function execute(InputInterface $input, OutputInterface $output) 62 | { 63 | $output->writeln('Fogger finish procedure'); 64 | 65 | $io = new SymfonyStyle($input, $output); 66 | if ($this->chunkCache->getProcessedCount() < $this->chunkCache->getPublishedCount()) { 67 | $this->outputMessage( 68 | sprintf( 69 | "We are still working on it, please try again later (%d/%d)", 70 | $this->chunkCache->getProcessedCount(), 71 | $this->chunkCache->getPublishedCount() 72 | ), 73 | $io, 74 | 'fg=black;bg=yellow' 75 | ); 76 | 77 | return -1; 78 | } 79 | 80 | if ($this->chunkError->hasError()) { 81 | $this->outputMessage(sprintf("There has been an error:\n\n%s", $this->chunkError->getError()), $io); 82 | 83 | return -1; 84 | } 85 | 86 | try { 87 | $output->writeln(' - refining database...'); 88 | $this->refiner->refine( 89 | $this->recipe ?? $this->recipeFactory->createRecipe($input->getOption('file')) 90 | ); 91 | $output->writeln(' - recreating indexes...'); 92 | $this->schemaManipulator->recreateIndexes(); 93 | $output->writeln(' - recreating foreign keys...'); 94 | $this->schemaManipulator->recreateForeignKeys(); 95 | } catch (\Exception $exception) { 96 | $this->outputMessage(sprintf("There has been an error:\n\n%s", $exception->getMessage()), $io); 97 | 98 | return -1; 99 | } 100 | 101 | $this->outputMessage('Data moved, constraints and indexes recreated.', $io, 'fg=black;bg=green'); 102 | 103 | return 0; 104 | } 105 | 106 | protected function outputMessage(string $message, SymfonyStyle $io, string $style = 'fg=white;bg=red') 107 | { 108 | $io->block($message, null, $style, ' ', true); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /features/Run/Chunk/queueChunks.feature: -------------------------------------------------------------------------------- 1 | Feature: 2 | In order to move data from source to target database efficiently 3 | As a user 4 | I want to be able to do it in chunks 5 | 6 | Scenario Outline: One table - divide into chunks 7 | Given there is a source database 8 | And there is a table posts with following columns: 9 | | name | type | length | index | 10 | | id | integer | | primary | 11 | | title | string | 64 | | 12 | | desc | string | 128 | | 13 | And the table posts contains following data: 14 | | id | title | desc | 15 | | 1 | title 1 | desc 1 | 16 | | 2 | title 2 | desc 2 | 17 | | 3 | title 3 | desc 3 | 18 | And there is an empty target database 19 | And the task queue is empty 20 | And the config test.yaml contains: 21 | """ 22 | tables: 23 | posts: 24 | """ 25 | When I run "run" command with input: 26 | | --chunk-size | | 27 | | --file | test.yaml | 28 | | --dont-wait | true | 29 | Then I should see " chunks have been added to queue" in command's output 30 | And the command should exit with code 0 31 | And published tasks counter should equal "" 32 | And processed tasks counter should equal 0 33 | Examples: 34 | | chunkSize | chunks | 35 | | 100 | 1 | 36 | | 3 | 1 | 37 | | 2 | 2 | 38 | | 1 | 3 | 39 | 40 | Scenario Outline: Two tables divide into chunks 41 | Given there is a source database 42 | And there is a table posts with following columns: 43 | | name | type | length | index | 44 | | id | integer | | primary | 45 | | title | string | 64 | | 46 | | desc | string | 128 | | 47 | And the table posts contains following data: 48 | | id | title | desc | 49 | | 1 | title 1 | desc 1 | 50 | | 2 | title 2 | desc 2 | 51 | | 3 | title 3 | desc 3 | 52 | | 4 | title 4 | desc 4 | 53 | And there is a table other with following columns: 54 | | name | type | length | index | 55 | | id | integer | | primary | 56 | | other | string | 64 | | 57 | And the table other contains following data: 58 | | id | other | 59 | | 1 | other 1 | 60 | | 2 | other 2 | 61 | And there is an empty target database 62 | And the task queue is empty 63 | And the config test.yaml contains: 64 | """ 65 | tables: 66 | posts: 67 | other: 68 | """ 69 | When I run "run" command with input: 70 | | --chunk-size | | 71 | | --file | test.yaml | 72 | | --dont-wait | true | 73 | Then I should see " chunks have been added to queue" in command's output 74 | And published tasks counter should equal "" 75 | And processed tasks counter should equal 0 76 | Examples: 77 | | chunkSize | chunks | 78 | | 100 | 2 | 79 | | 4 | 2 | 80 | | 3 | 3 | 81 | | 2 | 3 | 82 | | 1 | 6 | 83 | 84 | Scenario: Table without primary and unique key should not be divided to chunks even if it is larger than chunk size 85 | Given there is a source database 86 | And there is a table posts with following columns: 87 | | name | type | length | 88 | | id | integer | | 89 | | title | string | 64 | 90 | | desc | string | 128 | 91 | And the table posts contains following data: 92 | | id | title | desc | 93 | | 1 | title 1 | desc 1 | 94 | | 2 | title 2 | desc 2 | 95 | | 3 | title 3 | desc 3 | 96 | | 4 | title 4 | desc 4 | 97 | | 5 | title 5 | desc 5 | 98 | And there is an empty target database 99 | And the task queue is empty 100 | And the config test.yaml contains: 101 | """ 102 | tables: 103 | posts: 104 | """ 105 | When I run "run" command with input: 106 | | --chunk-size | 2 | 107 | | --file | test.yaml | 108 | | --dont-wait | true | 109 | Then I should see "1 chunks have been added to queue" in command's output 110 | And published tasks counter should equal 1 111 | And processed tasks counter should equal 0 -------------------------------------------------------------------------------- /src/Command/RunCommand.php: -------------------------------------------------------------------------------- 1 | chunkProducer = $chunkProducer; 33 | 34 | parent::__construct($schemaManipulator, $refiner, $chunkCache, $chunkError, $recipeFactory); 35 | } 36 | 37 | protected function configure() 38 | { 39 | $this 40 | ->setName('fogger:run') 41 | ->addOption( 42 | 'file', 43 | 'f', 44 | InputOption::VALUE_REQUIRED, 45 | 'Where should the command look for a config file. Defaults to fogger.yaml in root folder.', 46 | ConfigLoader::DEFAULT_FILENAME 47 | ) 48 | ->addOption( 49 | 'chunk-size', 50 | 'c', 51 | InputOption::VALUE_REQUIRED, 52 | sprintf( 53 | 'Data is moved in chunks. What should be the size of a chunk. Defaults to %d', 54 | ChunkMessage::DEFAULT_CHUNK_SIZE 55 | ), 56 | ChunkMessage::DEFAULT_CHUNK_SIZE 57 | ) 58 | ->addOption( 59 | 'dont-wait', 60 | '', 61 | InputOption::VALUE_NONE, 62 | 'With this option command will not wait for the workers to finish.' 63 | ) 64 | ->setDescription('Starts the process of moving data from source to target database. '); 65 | } 66 | 67 | private function showProgressBar(OutputInterface $output) 68 | { 69 | $published = $this->chunkCache->getPublishedCount(); 70 | 71 | $output->writeln(''); 72 | $output->writeln('If you are masking big database, you can stop this process with Cmd/Ctrl + C'); 73 | $output->writeln('Moving data will continue in the background - but in that case, you must manually'); 74 | $output->writeln('invoke the fogger:finish command to recreate indexes and foreign keys'); 75 | $output->writeln(''); 76 | 77 | $output->writeln('Moving data in chunks:'); 78 | 79 | $progressBar = new ProgressBar($output, $published); 80 | $progressBar->start(); 81 | 82 | do { 83 | $processed = $this->chunkCache->getProcessedCount(); 84 | $progressBar->setProgress($processed); 85 | usleep(100000); 86 | } while ($processed < $published); 87 | 88 | $progressBar->finish(); 89 | } 90 | 91 | protected function execute(InputInterface $input, OutputInterface $output) 92 | { 93 | $io = new SymfonyStyle($input, $output); 94 | $output->writeln('Fogger run.'); 95 | 96 | $chunkSize = (int)$input->getOption('chunk-size'); 97 | if ($chunkSize < 1) { 98 | $this->outputMessage("There has been an error:\n\nChunk size should be greater than 0", $io); 99 | 100 | return -1; 101 | } 102 | 103 | try { 104 | $this->schemaManipulator->copySchemaDroppingIndexesAndForeignKeys(); 105 | $this->recipe = $this->recipeFactory 106 | ->createRecipe($input->getOption('file'), $chunkSize); 107 | $this->chunkProducer->run($this->recipe); 108 | } catch (\Exception $exception) { 109 | $this->outputMessage("There has been an error:\n\n".$exception->getMessage(), $io); 110 | 111 | return -1; 112 | } 113 | 114 | if ($input->getOption('dont-wait')) { 115 | 116 | $output->writeln(''); 117 | $output->writeln( 118 | <<With dont-wait option the command will only queue data chunks to be processed by the rabbit 120 | worker command. Worker runs in background unless you started docker-composer with --scale=worker=0. 121 | In order to recreate indexes and foreign keys you will need to manually execute the fogger:finish 122 | command after the workers 123 | EOT 124 | ); 125 | $output->writeln(''); 126 | $output->writeln( 127 | sprintf('%d chunks have been added to queue', $this->chunkCache->getPublishedCount()) 128 | ); 129 | 130 | return 0; 131 | } 132 | 133 | $this->showProgressBar($output); 134 | $output->writeln(''); 135 | $output->writeln(''); 136 | 137 | parent::execute($input, $output); 138 | 139 | return 0; 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # *Fogger* - GDPR friendly database masker 2 | 3 | ## Purpose 4 | 5 | *Fogger* is a tool that solves the problem of data privacy. When developers need to work with production data but are obliged to comply with GDPR regulations they need a way to get the database copy with all the sensitive data masked. And while you can always write your own, custom solution to the problem - **you don't have to anymore** - with *fogger* you are covered. 6 | 7 | Apart from masking data you can also subset or even exclude some tables. Don't worry for the relations with foreign keys, *fogger* will refine database so everything is clean and shiny. 8 | 9 | You can configure various masking and subsetting strategies, and when what *fogger* has to offer is not enough - you can easily extend it with your own strategies. 10 | 11 | ## How to use the docker image 12 | 13 | *Fogger* requires docker environment, redis for caching and two databases: source and target. You can set up this stack using for example this docker-compose file: 14 | ``` 15 | version: '2.0' 16 | services: 17 | fogger: 18 | image: tshio/fogger:latest 19 | volumes: 20 | - .:/fogger 21 | environment: 22 | SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source 23 | TARGET_DATABASE_URL: mysql://user:pass@target:3306/target 24 | REDIS_URL: redis://redis 25 | worker: 26 | image: tshio/fogger:latest 27 | environment: 28 | SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source 29 | TARGET_DATABASE_URL: mysql://user:pass@target:3306/target 30 | REDIS_URL: redis://redis 31 | restart: always 32 | command: fogger:consumer --messages=200 33 | redis: 34 | image: redis:4 35 | source: 36 | volumes: 37 | - ./dump.sql:/docker-entrypoint-initdb.d/dump.sql 38 | environment: 39 | MYSQL_DATABASE: source 40 | MYSQL_PASSWORD: pass 41 | MYSQL_ROOT_PASSWORD: pass 42 | MYSQL_USER: user 43 | image: mysql:5.7 44 | target: 45 | environment: 46 | MYSQL_DATABASE: target 47 | MYSQL_PASSWORD: pass 48 | MYSQL_ROOT_PASSWORD: pass 49 | MYSQL_USER: user 50 | image: mysql:5.7 51 | ``` 52 | Note: 53 | - we are mapping volume to fogger's and worker's `/fogger` directory - so the config file would be accessible both in container and in our host filesystem 54 | - we are importing database content from `dump.sql` 55 | 56 | Of course you can modify and adjust the settings to your needs - for example - instead of importing database from dump file you can pass the existing database url to `fogger` and `worker` containers in the env variables. 57 | 58 | Now we can spin up the set-up by `docker-compose up -d`. If the database is huge and you want to speed up the process you can spawn additional workers executing `docker-compose up -d --scale=worker=4` instead. Give it a few seconds for the services to spin up then you can start with *Fogger*: 59 | 60 | *Fogger* gives you three CLI commands: 61 | 62 | * `docker-compose run --rm fogger fogger:init` will connect to your source database and prepare a boilerplate configuration file with the information on tables and columns in your database. This configuration file is a place where you define which column should be masked (and how) and which tables should be subsetted. See [example config file](Example config file). 63 | * `docker-compose run --rm fogger fogger:run` is the core command that will orchestrate the copying, masking and subsetting of data. The actual copying will be done by background worker that can scale horizontally. Before `run` is executed, make sure that the config file has been modified to your needs. Available subset and mask strategies has been described below. 64 | * `docker-compose run --rm fogger fogger:finish` will recreate indexes, refine database so that all the foreign key constraints are still valid, and then recreate them as well. This command runs automatically after run so you need to execute it only when you have stopped the `run` command with `ctrl-c`. 65 | * it's done - the masked and subsetted data are in a target database. You can do whatever you please with it. For example: `docker-compose exec target /usr/bin/mysqldump -u user --password=pass target > target.sql` will save the dump of masked database in your filesystem. 66 | 67 | ### Example config file 68 | 69 | ``` 70 | tables: 71 | posts: 72 | columns: 73 | title: { maskStrategy: starify, options: { length: 12 } } 74 | body: { maskStrategy: faker, options: { method: "sentences" } } 75 | subsetStrategy: tail 76 | subsetOptions: { length: 1000 } 77 | comments: 78 | columns: 79 | comment: { maskStrategy: faker, options: { method: "sentences" } } 80 | users: 81 | columns: 82 | email: { maskStrategy: faker, options: { method: "safeEmail" } } 83 | excludes: 84 | - logs 85 | ``` 86 | This is an example of config file. The boilerplate based on your database schema will be generated for you by `fogger:init`, all you have to do is fill in the mask strategies on the columns that you want masked and subset strategies on the tables for which you only want fraction of the rows. 87 | 88 | For the clarity and readability of the config files, all the tables that will not be changed can be omitted. They will be copied as they are. Similarly you can omit columns that are not to be masked. Tables from the `excludes` section will exist in the target database, but will be empty. 89 | 90 | ### List of available strategies 91 | 92 | #### Masking data 93 | 94 | * hashify - will save the MD5 hash instead of data - you can pass optional argument: `template` 95 | 96 | `email: { maskStrategy: "hashify", options: { template: "%s@example.com" } }` 97 | 98 | * starify - will save the 10 stars instead of data - you can pass optional argument: `length` to override default 10 99 | 100 | `email: { maskStrategy: "starify", options: { }` 101 | 102 | * faker - will use a marvelous [faker](https://github.com/fzaninotto/Faker) library. Pass the `method` of faker that you want to use here as an option. 103 | 104 | `email: { maskStrategy: "faker", options: { method: "safeEmail" }` 105 | `date: { maskStrategy: "faker", options: { method: "date", arguments: ["Y::m::d", "2017-12-31 23:59:59"] }` 106 | 107 | #### Subsetting data 108 | 109 | * range - only copy those rows, where `column` is between `min` and `max` 110 | ``` 111 | subsetStrategy: range 112 | subsetOptions: { column: "createdAt", min: "2018-01-01 00:00", max: "2018-01-31 23:59:59" } 113 | ``` 114 | 115 | * head and tail - only copy `length` first / last rows 116 | ``` 117 | subsetStrategy: head 118 | subsetOptions: { length: 1000 } 119 | ``` 120 | or 121 | ``` 122 | subsetStrategy: tail 123 | subsetOptions: { length: 1000 } 124 | ``` 125 | 126 | ### Under the hood 127 | 128 | If you are interested what really happens: 129 | 130 | * source database schema without indices and foreign keys is copied to target 131 | * data is divided into chunks (this includes query modification for subsetting). Chunks are processed by background workers (using RabbitMQ) 132 | * during copying sensitive data is substituted for masked version - in order to keep the substituted values consistent, redis is used as a cache 133 | * when all data is copied, *fogger* will recreate indices 134 | * refining cleans up database removing (or setting to null) relations that point to excluded or subsetted table rows 135 | * the last step is to recreate foreign keys 136 | 137 | ## Contributing 138 | 139 | Feel free to contribute to this project! Just fork the code, make any updates and let us know! 140 | -------------------------------------------------------------------------------- /symfony.lock: -------------------------------------------------------------------------------- 1 | { 2 | "behat/behat": { 3 | "version": "v3.5.0" 4 | }, 5 | "behat/gherkin": { 6 | "version": "v4.5.1" 7 | }, 8 | "behat/symfony2-extension": { 9 | "version": "2.1", 10 | "recipe": { 11 | "repo": "github.com/symfony/recipes", 12 | "branch": "master", 13 | "version": "2.1", 14 | "ref": "6b63fb76d3bb2dd16098f12adf6d6ca844e21862" 15 | } 16 | }, 17 | "behat/transliterator": { 18 | "version": "v1.2.0" 19 | }, 20 | "container-interop/container-interop": { 21 | "version": "1.2.0" 22 | }, 23 | "doctrine/annotations": { 24 | "version": "1.0", 25 | "recipe": { 26 | "repo": "github.com/symfony/recipes", 27 | "branch": "master", 28 | "version": "1.0", 29 | "ref": "cb4152ebcadbe620ea2261da1a1c5a9b8cea7672" 30 | } 31 | }, 32 | "doctrine/cache": { 33 | "version": "v1.8.0" 34 | }, 35 | "doctrine/collections": { 36 | "version": "v1.5.0" 37 | }, 38 | "doctrine/common": { 39 | "version": "v2.9.0" 40 | }, 41 | "doctrine/dbal": { 42 | "version": "v2.8.0" 43 | }, 44 | "doctrine/doctrine-bundle": { 45 | "version": "1.6", 46 | "recipe": { 47 | "repo": "github.com/symfony/recipes", 48 | "branch": "master", 49 | "version": "1.6", 50 | "ref": "ae205d5114e719deb64d2110f56ef910787d1e04" 51 | } 52 | }, 53 | "doctrine/doctrine-cache-bundle": { 54 | "version": "1.3.3" 55 | }, 56 | "doctrine/doctrine-migrations-bundle": { 57 | "version": "1.2", 58 | "recipe": { 59 | "repo": "github.com/symfony/recipes", 60 | "branch": "master", 61 | "version": "1.2", 62 | "ref": "c1431086fec31f17fbcfe6d6d7e92059458facc1" 63 | } 64 | }, 65 | "doctrine/event-manager": { 66 | "version": "v1.0.0" 67 | }, 68 | "doctrine/inflector": { 69 | "version": "v1.3.0" 70 | }, 71 | "doctrine/instantiator": { 72 | "version": "1.1.0" 73 | }, 74 | "doctrine/lexer": { 75 | "version": "v1.0.1" 76 | }, 77 | "doctrine/migrations": { 78 | "version": "v1.8.1" 79 | }, 80 | "doctrine/orm": { 81 | "version": "v2.6.2" 82 | }, 83 | "doctrine/persistence": { 84 | "version": "v1.0.1" 85 | }, 86 | "doctrine/reflection": { 87 | "version": "v1.0.0" 88 | }, 89 | "fzaninotto/faker": { 90 | "version": "v1.8.0" 91 | }, 92 | "jdorn/sql-formatter": { 93 | "version": "v1.2.17" 94 | }, 95 | "ocramius/package-versions": { 96 | "version": "1.3.0" 97 | }, 98 | "ocramius/proxy-manager": { 99 | "version": "2.1.1" 100 | }, 101 | "phpdocumentor/reflection-common": { 102 | "version": "1.0.1" 103 | }, 104 | "phpdocumentor/reflection-docblock": { 105 | "version": "4.3.0" 106 | }, 107 | "phpdocumentor/type-resolver": { 108 | "version": "0.4.0" 109 | }, 110 | "phpspec/php-diff": { 111 | "version": "v1.1.0" 112 | }, 113 | "phpspec/phpspec": { 114 | "version": "5.0.3" 115 | }, 116 | "phpspec/prophecy": { 117 | "version": "1.8.0" 118 | }, 119 | "predis/predis": { 120 | "version": "v1.1.1" 121 | }, 122 | "psr/cache": { 123 | "version": "1.0.1" 124 | }, 125 | "psr/container": { 126 | "version": "1.0.0" 127 | }, 128 | "psr/log": { 129 | "version": "1.0.2" 130 | }, 131 | "psr/simple-cache": { 132 | "version": "1.0.1" 133 | }, 134 | "sebastian/comparator": { 135 | "version": "3.0.2" 136 | }, 137 | "sebastian/diff": { 138 | "version": "3.0.1" 139 | }, 140 | "sebastian/exporter": { 141 | "version": "3.1.0" 142 | }, 143 | "sebastian/recursion-context": { 144 | "version": "3.0.0" 145 | }, 146 | "snc/redis-bundle": { 147 | "version": "2.0", 148 | "recipe": { 149 | "repo": "github.com/symfony/recipes-contrib", 150 | "branch": "master", 151 | "version": "2.0", 152 | "ref": "e9c58bfc414cfb7f06e8e5ae9f589868498f5d6a" 153 | } 154 | }, 155 | "symfony/cache": { 156 | "version": "v4.1.4" 157 | }, 158 | "symfony/class-loader": { 159 | "version": "v3.4.15" 160 | }, 161 | "symfony/config": { 162 | "version": "v4.1.4" 163 | }, 164 | "symfony/console": { 165 | "version": "3.3", 166 | "recipe": { 167 | "repo": "github.com/symfony/recipes", 168 | "branch": "master", 169 | "version": "3.3", 170 | "ref": "e3868d2f4a5104f19f844fe551099a00c6562527" 171 | } 172 | }, 173 | "symfony/debug": { 174 | "version": "v4.1.4" 175 | }, 176 | "symfony/dependency-injection": { 177 | "version": "v4.1.4" 178 | }, 179 | "symfony/doctrine-bridge": { 180 | "version": "v4.1.4" 181 | }, 182 | "symfony/dotenv": { 183 | "version": "v4.1.4" 184 | }, 185 | "symfony/event-dispatcher": { 186 | "version": "v4.1.4" 187 | }, 188 | "symfony/filesystem": { 189 | "version": "v4.1.4" 190 | }, 191 | "symfony/finder": { 192 | "version": "v4.1.4" 193 | }, 194 | "symfony/flex": { 195 | "version": "1.0", 196 | "recipe": { 197 | "repo": "github.com/symfony/recipes", 198 | "branch": "master", 199 | "version": "1.0", 200 | "ref": "e921bdbfe20cdefa3b82f379d1cd36df1bc8d115" 201 | } 202 | }, 203 | "symfony/framework-bundle": { 204 | "version": "3.3", 205 | "recipe": { 206 | "repo": "github.com/symfony/recipes", 207 | "branch": "master", 208 | "version": "3.3", 209 | "ref": "87c585d24de9f43bca80ebcfd5cf5cb39445d95f" 210 | } 211 | }, 212 | "symfony/http-foundation": { 213 | "version": "v4.1.4" 214 | }, 215 | "symfony/http-kernel": { 216 | "version": "v4.1.4" 217 | }, 218 | "symfony/inflector": { 219 | "version": "v4.1.4" 220 | }, 221 | "symfony/orm-pack": { 222 | "version": "v1.0.5" 223 | }, 224 | "symfony/polyfill-mbstring": { 225 | "version": "v1.9.0" 226 | }, 227 | "symfony/polyfill-php72": { 228 | "version": "v1.9.0" 229 | }, 230 | "symfony/process": { 231 | "version": "v4.1.6" 232 | }, 233 | "symfony/property-access": { 234 | "version": "v4.1.4" 235 | }, 236 | "symfony/property-info": { 237 | "version": "v4.1.4" 238 | }, 239 | "symfony/routing": { 240 | "version": "4.0", 241 | "recipe": { 242 | "repo": "github.com/symfony/recipes", 243 | "branch": "master", 244 | "version": "4.0", 245 | "ref": "5f514d9d3b8a8aac3d62ae6a86b18b90ed0c7826" 246 | } 247 | }, 248 | "symfony/serializer": { 249 | "version": "v4.1.4" 250 | }, 251 | "symfony/serializer-pack": { 252 | "version": "v1.0.1" 253 | }, 254 | "symfony/translation": { 255 | "version": "3.3", 256 | "recipe": { 257 | "repo": "github.com/symfony/recipes", 258 | "branch": "master", 259 | "version": "3.3", 260 | "ref": "1fb02a6e1c8f3d4232cce485c9afa868d63b115a" 261 | } 262 | }, 263 | "symfony/var-dumper": { 264 | "version": "v4.1.4" 265 | }, 266 | "symfony/yaml": { 267 | "version": "v4.1.4" 268 | }, 269 | "webmozart/assert": { 270 | "version": "1.3.0" 271 | }, 272 | "zendframework/zend-code": { 273 | "version": "3.3.1" 274 | }, 275 | "zendframework/zend-eventmanager": { 276 | "version": "3.2.1" 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /features/bootstrap/DatabaseContext.php: -------------------------------------------------------------------------------- 1 | source = $source; 19 | $this->target = $target; 20 | } 21 | 22 | /** 23 | * @param Connection $connection 24 | * @throws \Doctrine\DBAL\DBALException 25 | */ 26 | private function dropSchema(Connection $connection) 27 | { 28 | if ($connection->getDriver()->getName() === 'pdo_mysql') { 29 | $connection->exec('SET FOREIGN_KEY_CHECKS = 0;'); 30 | } 31 | foreach ($connection->getSchemaManager()->listTables() as $table) { 32 | $connection->exec( 33 | sprintf( 34 | 'DROP TABLE %s CASCADE', 35 | $connection->quoteIdentifier($table->getName()) 36 | ) 37 | ); 38 | } 39 | if ($connection->getDriver()->getName() === 'pdo_mysql') { 40 | $connection->exec('SET FOREIGN_KEY_CHECKS = 1;'); 41 | } 42 | } 43 | 44 | /** 45 | * @Given there is a source database 46 | * @throws \Doctrine\DBAL\DBALException 47 | */ 48 | public function thereIsASourceDatabase() 49 | { 50 | $this->dropSchema($this->source); 51 | } 52 | 53 | /** 54 | * @Given there is an empty target database 55 | * @throws \Doctrine\DBAL\DBALException 56 | */ 57 | public function thereIsAnEmptyTargetDatabase() 58 | { 59 | $this->dropSchema($this->target); 60 | } 61 | 62 | /** 63 | * @Given there is a table :tableName with following columns: 64 | * @param $tableName 65 | * @param TableNode $tableNode 66 | * @throws \Doctrine\DBAL\DBALException 67 | */ 68 | public function thereIsATableWithFollowingColumns(string $tableName, TableNode $tableNode) 69 | { 70 | $columns = $pkColumns = $indexes = $uniqueIndexes = []; 71 | foreach ($tableNode->getHash() as $row) { 72 | $column = $this->createColumn($row); 73 | $columns[] = $column; 74 | switch ($row['index'] ?? false) { 75 | case 'primary': 76 | $pkColumns[] = $column->getName(); 77 | break; 78 | case 'index' : 79 | $indexes[] = $column->getName(); 80 | break; 81 | case 'unique' : 82 | $uniqueIndexes[] = $column->getName(); 83 | break; 84 | } 85 | } 86 | $table = new Schema\Table($tableName, $columns); 87 | if ($pkColumns) { 88 | $table->setPrimaryKey($pkColumns); 89 | } 90 | foreach ($indexes as $index) { 91 | $table->addIndex([$index]); 92 | } 93 | foreach ($uniqueIndexes as $index) { 94 | $table->addUniqueIndex([$index]); 95 | } 96 | $this->source->getSchemaManager()->dropAndCreateTable($table); 97 | } 98 | 99 | /** 100 | * @param $row 101 | * @return Schema\Column 102 | * @throws \Doctrine\DBAL\DBALException 103 | */ 104 | private function createColumn(array $row): Schema\Column 105 | { 106 | $column = new Schema\Column($row['name'], Type::getType($row['type'])); 107 | $column->setLength($row['length'] ?? null); 108 | $column->setComment($row['comment'] ?? ''); 109 | $column->setNotnull(($row['nullable'] ?? false) !== 'true'); 110 | 111 | return $column; 112 | } 113 | 114 | /** 115 | * @Given the table :tableName contains following data: 116 | * @param string $tableName 117 | * @param TableNode $table 118 | */ 119 | public function theTableTableContainsFollowingData(string $tableName, TableNode $table) 120 | { 121 | foreach ($table->getHash() as $hash) { 122 | $queryBuilder = $this->source->createQueryBuilder(); 123 | $queryBuilder 124 | ->insert($this->source->quoteIdentifier($tableName)) 125 | ->values( 126 | array_combine( 127 | array_map( 128 | function ($key) { 129 | return $this->source->quoteIdentifier($key); 130 | }, 131 | array_keys($hash) 132 | ), 133 | array_map( 134 | function ($value) { 135 | return $value === '' ? 'null' : $this->source->quote($value); 136 | }, 137 | $hash 138 | ) 139 | ) 140 | ) 141 | ->execute(); 142 | } 143 | } 144 | 145 | /** 146 | * @Then the table :tablename in target database should have :expected row(s) 147 | * @param string $tablename 148 | * @param int $expected 149 | * @throws Exception 150 | */ 151 | public function theTableInTargetDatabaseShouldHaveRows(string $tablename, int $expected) 152 | { 153 | $queryBuilder = $this->target->createQueryBuilder(); 154 | $count = (int)$queryBuilder 155 | ->select('count(*)') 156 | ->from($this->target->quoteIdentifier($tablename)) 157 | ->execute() 158 | ->fetchColumn(); 159 | if ($count === $expected) { 160 | return; 161 | } 162 | 163 | throw new \Exception( 164 | sprintf( 165 | 'Table contains %d rows, %d expected', 166 | $count, 167 | $expected 168 | ) 169 | ); 170 | } 171 | 172 | private function rowInTableExists(string $tablename, array $columns) 173 | { 174 | $queryBuilder = $this->target->createQueryBuilder(); 175 | $queryBuilder 176 | ->resetQueryParts() 177 | ->select('count(*)') 178 | ->from($this->target->quoteIdentifier($tablename)); 179 | $counter = 0; 180 | foreach ($columns as $key => $value) { 181 | if ($value === '') { 182 | $queryBuilder->andWhere($queryBuilder->expr()->isNull($key)); 183 | continue; 184 | } 185 | $queryBuilder 186 | ->andWhere(sprintf('%s = ?', $this->target->quoteIdentifier($key))) 187 | ->setParameter($counter++, $value); 188 | } 189 | 190 | return 0 !== (int)$queryBuilder->execute()->fetchColumn(); 191 | } 192 | 193 | /** 194 | * @Then the table :tablename in target database should contain rows: 195 | * @param string $tablename 196 | * @param TableNode $table 197 | * @throws Exception 198 | */ 199 | public function theTableInTargetDatabaseShouldContainRows(string $tablename, TableNode $table) 200 | { 201 | foreach ($table->getColumnsHash() as $hash) { 202 | if ($this->rowInTableExists($tablename, $hash)) { 203 | continue; 204 | } 205 | throw new \Exception(sprintf('Row %s not found', json_encode($hash))); 206 | } 207 | } 208 | 209 | /** 210 | * @Then the table :tablename in target database should not contain rows: 211 | * @param string $tablename 212 | * @param TableNode $table 213 | * @throws Exception 214 | */ 215 | public function theTableInTargetDatabaseShouldNotContainRows(string $tablename, TableNode $table) 216 | { 217 | foreach ($table->getColumnsHash() as $hash) { 218 | if (!$this->rowInTableExists($tablename, $hash)) { 219 | continue; 220 | } 221 | throw new \Exception('row is not in the table'); 222 | } 223 | } 224 | 225 | /** 226 | * @Given the :local references :foreign 227 | * @param string $local 228 | * @param string $foreign 229 | */ 230 | public function theUsersSupervisorReferencesUsersEmail(string $local, string $foreign) 231 | { 232 | $local = explode('.', $local); 233 | $foreign = explode('.', $foreign); 234 | 235 | $schemaManager = $this->source->getSchemaManager(); 236 | $schemaManager->createForeignKey( 237 | new Schema\ForeignKeyConstraint([$local[1]], $foreign[0], [$foreign[1]]), 238 | $local[0] 239 | ); 240 | } 241 | } 242 | --------------------------------------------------------------------------------