├── .github ├── FUNDING.yml └── workflows │ ├── release-on-milestone-closed.yml │ └── continuous-integration.yml ├── .gitignore ├── renovate.json ├── infection.json.dist ├── src └── DoctrineBatchUtils │ └── BatchProcessing │ ├── Exception │ ├── ExceptionInterface.php │ └── MissingBatchItemException.php │ ├── SelectBatchIteratorAggregate.php │ └── SimpleBatchIteratorAggregate.php ├── examples ├── entity │ └── MyEntity.php ├── persisting-new-objects-in-batch.php ├── bootstrap-orm.php └── working-with-query-resultsets-in-batch.php ├── .laminas-ci.json ├── phpunit.xml.dist ├── phpcs.xml.dist ├── LICENSE ├── UPGRADE.md ├── test └── DoctrineBatchUtilsTest │ ├── BatchProcessing │ ├── Exception │ │ └── MissingBatchItemExceptionTest.php │ ├── SimpleBatchIteratorAggregateTest.php │ └── SelectBatchIteratorAggregateTest.php │ └── MockEntityManager.php ├── psalm.xml ├── composer.json └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [Ocramius] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | composer.phar 3 | clover.xml 4 | .phpunit.result.cache 5 | infectionlog.txt 6 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "local>Ocramius/.github:renovate-config" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /infection.json.dist: -------------------------------------------------------------------------------- 1 | { 2 | "source": { 3 | "directories": [ 4 | "src" 5 | ] 6 | }, 7 | "timeout": 15, 8 | "logs": { 9 | "text": "infectionlog.txt" 10 | }, 11 | "minMsi": 93.0, 12 | "minCoveredMsi": 94.0 13 | } 14 | -------------------------------------------------------------------------------- /src/DoctrineBatchUtils/BatchProcessing/Exception/ExceptionInterface.php: -------------------------------------------------------------------------------- 1 | id = $id; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /.laminas-ci.json: -------------------------------------------------------------------------------- 1 | { 2 | "extensions": [ 3 | "bcmath", 4 | "pcov" 5 | ], 6 | "exclude": [ 7 | {"name": "Infection"} 8 | ], 9 | "additional_checks": [ 10 | { 11 | "name": "Infection (PCOV)", 12 | "job": { 13 | "php": "@lowest", 14 | "dependencies": "locked", 15 | "command": "./vendor/bin/roave-infection-static-analysis-plugin" 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | src 18 | 19 | 20 | 21 | 22 | ./test/DoctrineBatchUtilsTest 23 | 24 | 25 | -------------------------------------------------------------------------------- /phpcs.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | This project follows doctrine/coding-standard 6 | 7 | src 8 | test 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/persisting-new-objects-in-batch.php: -------------------------------------------------------------------------------- 1 | persist(new MyEntity($i)); 17 | 18 | yield $i; 19 | } 20 | }), 21 | $entityManager, 22 | 100, // flush/clear after 100 iterations 23 | ); 24 | 25 | foreach ($iterable as $record) { 26 | // operate on records here 27 | 28 | var_dump([MyEntity::class => $record]); 29 | var_dump(['memory_get_peak_usage()' => (memory_get_peak_usage(true) / 1024 / 1024) . ' MiB']); 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Marco Pivetta 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/DoctrineBatchUtils/BatchProcessing/Exception/MissingBatchItemException.php: -------------------------------------------------------------------------------- 1 | getIdentifierValues($object)); 23 | if ($identifier === false) { 24 | $identifier = 'unknown'; 25 | } 26 | 27 | return new self(sprintf( 28 | 'Requested batch item %s#%s (of type %s) with identifier "%s" could not be found', 29 | get_class($object), 30 | spl_object_hash($object), 31 | $metadata->getName(), 32 | $identifier, 33 | )); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /examples/bootstrap-orm.php: -------------------------------------------------------------------------------- 1 | setMetadataDriverImpl(new AttributeDriver([__DIR__ . '/entity'])); 20 | $configuration->setAutoGenerateProxyClasses(ProxyFactory::AUTOGENERATE_EVAL); 21 | $configuration->setProxyNamespace('ORMProxies'); 22 | $configuration->setProxyDir(sys_get_temp_dir()); 23 | 24 | $connection = DriverManager::getConnection(['driver' => 'pdo_sqlite', 'memory' => true], $configuration); 25 | $entityManager = new EntityManager($connection, $configuration); 26 | 27 | (new SchemaTool($entityManager)) 28 | ->createSchema( 29 | $entityManager 30 | ->getMetadataFactory() 31 | ->getAllMetadata(), 32 | ); 33 | 34 | return $entityManager; 35 | }; 36 | -------------------------------------------------------------------------------- /examples/working-with-query-resultsets-in-batch.php: -------------------------------------------------------------------------------- 1 | persist(new MyEntity($i)); 16 | 17 | yield $i; 18 | } 19 | }), 20 | $entityManager, 21 | 100, // flush/clear after 100 iterations 22 | ); 23 | 24 | iterator_to_array($persistAllEntries); // quickly consume the previous iterator 25 | 26 | /** @var MyEntity[] $savedEntries */ 27 | $savedEntries = SimpleBatchIteratorAggregate::fromQuery( 28 | $entityManager->createQuery(sprintf('SELECT e FROM %s e', MyEntity::class)), 29 | 100, // flush/clear after 100 iterations 30 | ); 31 | 32 | foreach ($savedEntries as $savedEntry) { 33 | // operate on records here 34 | 35 | var_dump([MyEntity::class => $savedEntry->id]); 36 | var_dump(['memory_get_peak_usage()' => (memory_get_peak_usage(true) / 1024 / 1024) . ' MiB']); 37 | } 38 | -------------------------------------------------------------------------------- /UPGRADE.md: -------------------------------------------------------------------------------- 1 | # Upgrade 2 | 3 | ## 2.0.0 4 | 5 | **BC Breaks** 6 | 7 | Access to the actual entity has changed. In previous versions the entity could be accessed via `[0]` on result item like this: 8 | 9 | ```php 10 | $iterable = SimpleBatchIteratorAggregate::fromArrayResult(...); 11 | foreach ($iterable as $record) { 12 | $entity = $record[0]; 13 | ... 14 | } 15 | ``` 16 | 17 | That was rather confusing and unexpected so it is no longer wrapped in array and `[0]` acessor must be dropped: 18 | 19 | ```php 20 | foreach ($iterable as $record) { 21 | $entity = $record; 22 | ... 23 | } 24 | ``` 25 | 26 | - The parameter `$batchSize` of `DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate::fromQuery()` changed from no type to a non-contravariant int 27 | - The parameter `$batchSize` of `DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate::fromArrayResult()` changed from no type to a non-contravariant int 28 | - The parameter `$batchSize` of `DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate::fromTraversableResult()` changed from no type to a non-contravariant int 29 | - The return type of `DoctrineBatchUtils\BatchProcessing\Exception\MissingBatchItemException::fromInvalidReference()` changed from no type to `DoctrineBatchUtils\BatchProcessing\Exception\MissingBatchItemException` 30 | - The parameter `$object` of `DoctrineBatchUtils\BatchProcessing\Exception\MissingBatchItemException::fromInvalidReference()` changed from no type to a non-contravariant object 31 | - The parameter `$object` of `DoctrineBatchUtils\BatchProcessing\Exception\MissingBatchItemException::fromInvalidReference()` changed from no type to object 32 | -------------------------------------------------------------------------------- /test/DoctrineBatchUtilsTest/BatchProcessing/Exception/MissingBatchItemExceptionTest.php: -------------------------------------------------------------------------------- 1 | createMock(ClassMetadata::class); 24 | 25 | $metadata->expects(self::any())->method('getName')->willReturn('Foo'); 26 | $metadata->expects(self::any())->method('getIdentifierValues')->with($object)->willReturn(['abc' => 'def']); 27 | 28 | $exception = MissingBatchItemException::fromInvalidReference($metadata, $object); 29 | 30 | $this->assertInstanceOf(MissingBatchItemException::class, $exception); 31 | $this->assertInstanceOf(UnexpectedValueException::class, $exception); 32 | $this->assertInstanceOf(ExceptionInterface::class, $exception); 33 | 34 | self::assertSame( 35 | 'Requested batch item stdClass#' 36 | . spl_object_hash($object) 37 | . ' (of type Foo) with identifier "{"abc":"def"}" could not be found', 38 | $exception->getMessage(), 39 | ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /psalm.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ocramius/doctrine-batch-utils", 3 | "description": "A set of utilities to operate with Doctrine ORM's batch processing functionality", 4 | "type": "library", 5 | "license": "MIT", 6 | "homepage": "https://github.com/Ocramius/DoctrineBatchUtils", 7 | "keywords": [ 8 | "doctrine", 9 | "doctrine2", 10 | "orm", 11 | "batch processing" 12 | ], 13 | "authors": [ 14 | { 15 | "name": "Marco Pivetta", 16 | "email": "ocramius@gmail.com", 17 | "homepage": "http://ocramius.github.io/" 18 | } 19 | ], 20 | "require": { 21 | "php": "~8.3.0 || ~8.4.0 || ~8.5.0", 22 | "doctrine/orm": "^3.6.0", 23 | "doctrine/persistence": "^4.1.1" 24 | }, 25 | "require-dev": { 26 | "doctrine/coding-standard": "^14.0.0", 27 | "roave/infection-static-analysis-plugin": "^1.42.0", 28 | "phpunit/phpunit": "^12.5.4", 29 | "vimeo/psalm": "^6.14.3", 30 | "doctrine/dbal" : "^4.4.1", 31 | "psalm/plugin-phpunit": "^0.19.5" 32 | }, 33 | "autoload": { 34 | "psr-4": { 35 | "DoctrineBatchUtils\\": "src/DoctrineBatchUtils" 36 | } 37 | }, 38 | "autoload-dev": { 39 | "psr-4": { 40 | "DoctrineBatchUtilsTest\\": "test/DoctrineBatchUtilsTest" 41 | } 42 | }, 43 | "config": { 44 | "allow-plugins": { 45 | "dealerdirect/phpcodesniffer-composer-installer": true, 46 | "infection/extension-installer": true 47 | }, 48 | "platform": { 49 | "php": "8.3.99" 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /.github/workflows/release-on-milestone-closed.yml: -------------------------------------------------------------------------------- 1 | # https://help.github.com/en/categories/automating-your-workflow-with-github-actions 2 | 3 | name: "Automatic Releases" 4 | 5 | on: 6 | milestone: 7 | types: 8 | - "closed" 9 | 10 | jobs: 11 | release: 12 | name: "GIT tag, release & create merge-up PR" 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: "Checkout" 17 | uses: "actions/checkout@v6" 18 | 19 | - name: "Release" 20 | uses: "laminas/automatic-releases@v1" 21 | with: 22 | command-name: "laminas:automatic-releases:release" 23 | env: 24 | "GITHUB_TOKEN": ${{ secrets.GITHUB_TOKEN }} 25 | "SIGNING_SECRET_KEY": ${{ secrets.SIGNING_SECRET_KEY }} 26 | "GIT_AUTHOR_NAME": ${{ secrets.GIT_AUTHOR_NAME }} 27 | "GIT_AUTHOR_EMAIL": ${{ secrets.GIT_AUTHOR_EMAIL }} 28 | 29 | - name: "Create Merge-Up Pull Request" 30 | uses: "laminas/automatic-releases@v1" 31 | with: 32 | command-name: "laminas:automatic-releases:create-merge-up-pull-request" 33 | env: 34 | "GITHUB_TOKEN": ${{ secrets.GITHUB_TOKEN }} 35 | "SIGNING_SECRET_KEY": ${{ secrets.SIGNING_SECRET_KEY }} 36 | "GIT_AUTHOR_NAME": ${{ secrets.GIT_AUTHOR_NAME }} 37 | "GIT_AUTHOR_EMAIL": ${{ secrets.GIT_AUTHOR_EMAIL }} 38 | 39 | - name: "Create and/or Switch to new Release Branch" 40 | uses: "laminas/automatic-releases@v1" 41 | with: 42 | command-name: "laminas:automatic-releases:switch-default-branch-to-next-minor" 43 | env: 44 | "GITHUB_TOKEN": ${{ secrets.ORGANIZATION_ADMIN_TOKEN }} 45 | "SIGNING_SECRET_KEY": ${{ secrets.SIGNING_SECRET_KEY }} 46 | "GIT_AUTHOR_NAME": ${{ secrets.GIT_AUTHOR_NAME }} 47 | "GIT_AUTHOR_EMAIL": ${{ secrets.GIT_AUTHOR_EMAIL }} 48 | 49 | - name: "Bump Changelog Version On Originating Release Branch" 50 | uses: "laminas/automatic-releases@v1" 51 | with: 52 | command-name: "laminas:automatic-releases:bump-changelog" 53 | env: 54 | "GITHUB_TOKEN": ${{ secrets.GITHUB_TOKEN }} 55 | "SIGNING_SECRET_KEY": ${{ secrets.SIGNING_SECRET_KEY }} 56 | "GIT_AUTHOR_NAME": ${{ secrets.GIT_AUTHOR_NAME }} 57 | "GIT_AUTHOR_EMAIL": ${{ secrets.GIT_AUTHOR_EMAIL }} 58 | 59 | - name: "Create new milestones" 60 | uses: "laminas/automatic-releases@v1" 61 | with: 62 | command-name: "laminas:automatic-releases:create-milestones" 63 | env: 64 | "GITHUB_TOKEN": ${{ secrets.GITHUB_TOKEN }} 65 | "SIGNING_SECRET_KEY": ${{ secrets.SIGNING_SECRET_KEY }} 66 | "GIT_AUTHOR_NAME": ${{ secrets.GIT_AUTHOR_NAME }} 67 | "GIT_AUTHOR_EMAIL": ${{ secrets.GIT_AUTHOR_EMAIL }} 68 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration.yml: -------------------------------------------------------------------------------- 1 | # See https://github.com/laminas/laminas-continuous-integration-action 2 | # Generates a job matrix based on current dependencies and supported version 3 | # ranges, then runs all those jobs 4 | name: "Continuous Integration" 5 | 6 | on: 7 | pull_request: 8 | push: 9 | 10 | jobs: 11 | matrix: 12 | name: Generate job matrix 13 | runs-on: ubuntu-latest 14 | outputs: 15 | matrix: ${{ steps.matrix.outputs.matrix }} 16 | steps: 17 | - name: Gather CI configuration 18 | id: matrix 19 | uses: laminas/laminas-ci-matrix-action@1.32.0 20 | 21 | qa: 22 | name: QA Checks 23 | needs: [ matrix ] 24 | runs-on: ${{ matrix.operatingSystem }} 25 | strategy: 26 | fail-fast: false 27 | matrix: ${{ fromJSON(needs.matrix.outputs.matrix) }} 28 | steps: 29 | - name: ${{ matrix.name }} 30 | uses: laminas/laminas-continuous-integration-action@1.43.0 31 | env: 32 | "GITHUB_TOKEN": ${{ secrets.GITHUB_TOKEN }} 33 | with: 34 | job: ${{ matrix.job }} 35 | 36 | demo-scripts: 37 | name: "Check Demo Scripts" 38 | 39 | runs-on: ${{ matrix.operating-system }} 40 | 41 | strategy: 42 | matrix: 43 | dependencies: 44 | - "locked" 45 | php-version: 46 | - "8.3" 47 | - "8.4" 48 | - "8.5" 49 | operating-system: 50 | - "ubuntu-latest" 51 | 52 | steps: 53 | - name: "Checkout" 54 | uses: "actions/checkout@v6" 55 | 56 | - name: "Install PHP" 57 | uses: "shivammathur/setup-php@2.36.0" 58 | with: 59 | coverage: "pcov" 60 | php-version: "${{ matrix.php-version }}" 61 | tools: composer:v2, cs2pr 62 | 63 | - name: "Cache dependencies" 64 | uses: "actions/cache@v5" 65 | with: 66 | path: | 67 | ~/.composer/cache 68 | vendor 69 | key: "php-${{ matrix.php-version }}-${{ matrix.dependencies }}" 70 | restore-keys: "php-${{ matrix.php-version }}-${{ matrix.dependencies }}" 71 | 72 | - name: "Install lowest dependencies" 73 | if: ${{ matrix.dependencies == 'lowest' }} 74 | run: "composer update --prefer-lowest --no-interaction --no-progress --no-suggest" 75 | 76 | - name: "Install highest dependencies" 77 | if: ${{ matrix.dependencies == 'highest' }} 78 | run: "composer update --no-interaction --no-progress --no-suggest" 79 | 80 | - name: "Install locked dependencies" 81 | if: ${{ matrix.dependencies == 'locked' }} 82 | run: "composer install --no-interaction --no-progress --no-suggest" 83 | 84 | - name: "Check Demo Scripts" 85 | run: | 86 | php examples/persisting-new-objects-in-batch.php 87 | php examples/working-with-query-resultsets-in-batch.php 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DoctrineBatchUtils 2 | 3 | This repository attempts to ease the pain of dealing with 4 | [batch-processing](http://docs.doctrine-project.org/projects/doctrine-orm/en/latest/reference/batch-processing.html) 5 | in the context of [Doctrine ORM](http://docs.doctrine-project.org/projects/doctrine-orm/en/latest/) 6 | transactions. 7 | 8 | This repository is maintained by [Patrick Reimers (PReimers)](https://github.com/PReimers). 9 | 10 | [![License](https://img.shields.io/packagist/l/ocramius/doctrine-batch-utils.svg)](https://github.com/Ocramius/DoctrineBatchUtils/blob/master/LICENSE) 11 | [![Current release](https://img.shields.io/packagist/v/ocramius/doctrine-batch-utils.svg)](https://packagist.org/packages/ocramius/doctrine-batch-utils) 12 | [![Build Status](https://github.com/Ocramius/DoctrineBatchUtils/actions/workflows/continuous-integration.yml/badge.svg)](https://github.com/Ocramius/DoctrineBatchUtils/actions/workflows/continuous-integration.yml) 13 | 14 | ## Installation 15 | 16 | Supported installation method is via [Composer](http://getcomposer.org/): 17 | 18 | ```sh 19 | composer require ocramius/doctrine-batch-utils 20 | ``` 21 | 22 | ## Current features 23 | 24 | As it stands, the only implemented utility in this repository is an 25 | [`IteratorAggregate`](http://php.net/manual/en/class.iteratoraggregate.php) that 26 | wraps around a DB transaction and calls 27 | [`ObjectManager#flush()`](https://github.com/doctrine/common/blob/v2.5.1/lib/Doctrine/Common/Persistence/ObjectManager.php#L120) 28 | and [`ObjectManager#clear()`](https://github.com/doctrine/common/blob/v2.5.1/lib/Doctrine/Common/Persistence/ObjectManager.php#L88) 29 | on the given [`EntityManager`](https://github.com/doctrine/doctrine2/blob/v2.5.1/lib/Doctrine/ORM/EntityManagerInterface.php). 30 | 31 | ### Example (array iteration) 32 | 33 | It can be used as following: 34 | 35 | ```php 36 | use DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate; 37 | 38 | $object1 = $entityManager->find('Foo', 1); 39 | $object2 = $entityManager->find('Bar', 2); 40 | 41 | $iterable = SimpleBatchIteratorAggregate::fromArrayResult( 42 | [$object1, $object2], // items to iterate 43 | $entityManager, // the entity manager to operate on 44 | 100 // items to traverse before flushing/clearing 45 | ); 46 | 47 | foreach ($iterable as $record) { 48 | // operate on records here 49 | } 50 | ``` 51 | 52 | #### `$record` freshness 53 | 54 | Please note that the `$record` inside the loop will always be "fresh" 55 | ([`managed`](http://doctrine-orm.readthedocs.org/projects/doctrine-orm/en/latest/reference/working-with-objects.html#persisting-entities) state), 56 | as the iterator re-fetches it on its own: this prevents you from having to 57 | manually call [`ObjectManager#find()`](https://github.com/doctrine/common/blob/v2.5.1/lib/Doctrine/Common/Persistence/ObjectManager.php#L42) 58 | on your own for every iteration. 59 | 60 | #### Automatic flushing/clearing 61 | 62 | In this example, the `EntityManager` will be flushed and cleared only once, 63 | but if there were more than 100 records, then it would flush (and clear) twice 64 | or more. 65 | 66 | ### Example (query/iterators) 67 | 68 | The previous example is still not memory efficient, as we are operating on a 69 | pre-loaded array of objects loaded by the ORM. 70 | 71 | We can use queries instead: 72 | 73 | ```php 74 | use DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate; 75 | 76 | $iterable = SimpleBatchIteratorAggregate::fromQuery( 77 | $entityManager->createQuery('SELECT f FROM Files f'), 78 | 100 // flush/clear after 100 iterations 79 | ); 80 | 81 | foreach ($iterable as $record) { 82 | // operate on records here 83 | } 84 | ``` 85 | 86 | Or our own iterator/generator: 87 | 88 | ```php 89 | use DoctrineBatchUtils\BatchProcessing\SimpleBatchIteratorAggregate; 90 | 91 | // This is where you'd persist/create/load your entities (a lot of them!) 92 | $results = function () { 93 | for ($i = 0; $i < 100000000; $i += 1) { 94 | yield new MyEntity($i); // note: identifier must exist in the DB 95 | } 96 | }; 97 | 98 | $iterable = SimpleBatchIteratorAggregate::fromTraversableResult( 99 | $results(), 100 | $entityManager, 101 | 100 // flush/clear after 100 iterations 102 | ); 103 | 104 | foreach ($iterable as $record) { 105 | // operate on records here 106 | } 107 | 108 | // eventually after all records have been processed, the assembled transaction will be committed to the database 109 | ``` 110 | 111 | Both of these approaches are much more memory efficient. 112 | -------------------------------------------------------------------------------- /src/DoctrineBatchUtils/BatchProcessing/SelectBatchIteratorAggregate.php: -------------------------------------------------------------------------------- 1 | 26 | */ 27 | final class SelectBatchIteratorAggregate implements IteratorAggregate 28 | { 29 | /** @var iterable */ 30 | private iterable $resultSet; 31 | private EntityManagerInterface $entityManager; 32 | /** @psalm-var positive-int */ 33 | private int $batchSize; 34 | 35 | /** @psalm-param positive-int $batchSize */ 36 | public static function fromQuery(AbstractQuery $query, int $batchSize): self 37 | { 38 | return new self($query->toIterable(), $query->getEntityManager(), $batchSize); 39 | } 40 | 41 | /** 42 | * @param array $results 43 | * @psalm-param positive-int $batchSize 44 | * 45 | * @return self 46 | * 47 | * @template C 48 | * @template D 49 | */ 50 | public static function fromArrayResult( 51 | array $results, 52 | EntityManagerInterface $entityManager, 53 | int $batchSize 54 | ): self { 55 | return new self($results, $entityManager, $batchSize); 56 | } 57 | 58 | /** 59 | * @param Traversable $results 60 | * @psalm-param positive-int $batchSize 61 | * 62 | * @return self 63 | * 64 | * @template E 65 | * @template F 66 | */ 67 | public static function fromTraversableResult( 68 | Traversable $results, 69 | EntityManagerInterface $entityManager, 70 | int $batchSize 71 | ): self { 72 | return new self($results, $entityManager, $batchSize); 73 | } 74 | 75 | /** 76 | * @return Traversable 77 | * 78 | * @psalm-suppress InvalidReturnType psalm can't infer the correct key/value pairs here, but we've carefully 79 | * tested this signature. 80 | */ 81 | public function getIterator(): Traversable 82 | { 83 | $iteration = 0; 84 | 85 | foreach ($this->resultSet as $key => $value) { 86 | $iteration += 1; 87 | 88 | if (is_array($value)) { 89 | $firstKey = key($value); 90 | if ($firstKey !== null && is_object($value[$firstKey]) && $value === [$firstKey => $value[$firstKey]]) { 91 | yield $key => $this->reFetchObject($value[$firstKey]); 92 | 93 | $this->clearBatch($iteration); 94 | continue; 95 | } 96 | } 97 | 98 | if (! is_object($value)) { 99 | yield $key => $value; 100 | 101 | $this->clearBatch($iteration); 102 | continue; 103 | } 104 | 105 | yield $key => $this->reFetchObject($value); 106 | 107 | $this->clearBatch($iteration); 108 | } 109 | 110 | $this->entityManager->clear(); 111 | } 112 | 113 | /** 114 | * BatchIteratorAggregate constructor (private by design: use a named constructor instead). 115 | * 116 | * @param iterable $resultSet 117 | * @psalm-param positive-int $batchSize 118 | */ 119 | private function __construct(iterable $resultSet, EntityManagerInterface $entityManager, int $batchSize) 120 | { 121 | $this->resultSet = $resultSet; 122 | $this->entityManager = $entityManager; 123 | $this->batchSize = $batchSize; 124 | } 125 | 126 | /** 127 | * @psalm-param TReFetched $object 128 | * 129 | * @psalm-return TReFetched 130 | * 131 | * @template TReFetched of object 132 | */ 133 | private function reFetchObject(object $object): object 134 | { 135 | $className = get_class($object); 136 | $metadata = $this->entityManager->getClassMetadata($className); 137 | $freshValue = $this->entityManager->find($className, $metadata->getIdentifierValues($object)); 138 | 139 | if (! $freshValue) { 140 | throw MissingBatchItemException::fromInvalidReference($metadata, $object); 141 | } 142 | 143 | return $freshValue; 144 | } 145 | 146 | /** @param int<0, max> $iteration */ 147 | private function clearBatch(int $iteration): void 148 | { 149 | if ($iteration % $this->batchSize) { 150 | return; 151 | } 152 | 153 | $this->entityManager->clear(); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/DoctrineBatchUtils/BatchProcessing/SimpleBatchIteratorAggregate.php: -------------------------------------------------------------------------------- 1 | 23 | * @psalm-suppress MissingOverrideAttribute Psalm is confused by the positioning of the {@see \Override} attribute, 24 | * so we skipped adding it here. 25 | */ 26 | final class SimpleBatchIteratorAggregate implements IteratorAggregate 27 | { 28 | /** @var iterable */ 29 | private iterable $resultSet; 30 | private EntityManagerInterface $entityManager; 31 | /** @psalm-var positive-int */ 32 | private int $batchSize; 33 | 34 | /** @psalm-param positive-int $batchSize */ 35 | public static function fromQuery(AbstractQuery $query, int $batchSize): self 36 | { 37 | return new self($query->toIterable(), $query->getEntityManager(), $batchSize); 38 | } 39 | 40 | /** 41 | * @param array $results 42 | * @psalm-param positive-int $batchSize 43 | * 44 | * @return self 45 | * 46 | * @template C 47 | * @template D 48 | */ 49 | public static function fromArrayResult(array $results, EntityManagerInterface $entityManager, int $batchSize): self 50 | { 51 | return new self($results, $entityManager, $batchSize); 52 | } 53 | 54 | /** 55 | * @param Traversable $results 56 | * @psalm-param positive-int $batchSize 57 | * 58 | * @return self 59 | * 60 | * @template E 61 | * @template F 62 | */ 63 | public static function fromTraversableResult( 64 | Traversable $results, 65 | EntityManagerInterface $entityManager, 66 | int $batchSize 67 | ): self { 68 | return new self($results, $entityManager, $batchSize); 69 | } 70 | 71 | /** 72 | * @return Traversable 73 | * 74 | * @psalm-suppress InvalidReturnType psalm can't infer the correct key/value pairs here, but we've carefully 75 | * tested this signature. 76 | */ 77 | public function getIterator(): Traversable 78 | { 79 | $iteration = 0; 80 | 81 | $this->entityManager->beginTransaction(); 82 | 83 | try { 84 | foreach ($this->resultSet as $key => $value) { 85 | $iteration += 1; 86 | 87 | if (is_array($value)) { 88 | $firstKey = key($value); 89 | if ($firstKey !== null && is_object($value[$firstKey]) && $value === [$firstKey => $value[$firstKey]]) { 90 | yield $key => $this->reFetchObject($value[$firstKey]); 91 | 92 | $this->flushAndClearBatch($iteration); 93 | continue; 94 | } 95 | } 96 | 97 | if (! is_object($value)) { 98 | yield $key => $value; 99 | 100 | $this->flushAndClearBatch($iteration); 101 | continue; 102 | } 103 | 104 | yield $key => $this->reFetchObject($value); 105 | 106 | $this->flushAndClearBatch($iteration); 107 | } 108 | } catch (Throwable $exception) { 109 | $this->entityManager->rollback(); 110 | 111 | throw $exception; 112 | } 113 | 114 | $this->flushAndClearEntityManager(); 115 | $this->entityManager->commit(); 116 | } 117 | 118 | /** 119 | * BatchIteratorAggregate constructor (private by design: use a named constructor instead). 120 | * 121 | * @param iterable $resultSet 122 | * @psalm-param positive-int $batchSize 123 | */ 124 | private function __construct(iterable $resultSet, EntityManagerInterface $entityManager, int $batchSize) 125 | { 126 | $this->resultSet = $resultSet; 127 | $this->entityManager = $entityManager; 128 | $this->batchSize = $batchSize; 129 | } 130 | 131 | /** 132 | * @psalm-param TReFetched $object 133 | * 134 | * @psalm-return TReFetched 135 | * 136 | * @template TReFetched of object 137 | */ 138 | private function reFetchObject(object $object): object 139 | { 140 | $className = get_class($object); 141 | $metadata = $this->entityManager->getClassMetadata($className); 142 | $freshValue = $this->entityManager->find($className, $metadata->getIdentifierValues($object)); 143 | 144 | if (! $freshValue) { 145 | throw MissingBatchItemException::fromInvalidReference($metadata, $object); 146 | } 147 | 148 | return $freshValue; 149 | } 150 | 151 | /** @param int<0, max> $iteration */ 152 | private function flushAndClearBatch(int $iteration): void 153 | { 154 | if ($iteration % $this->batchSize) { 155 | return; 156 | } 157 | 158 | $this->flushAndClearEntityManager(); 159 | } 160 | 161 | private function flushAndClearEntityManager(): void 162 | { 163 | $this->entityManager->flush(); 164 | $this->entityManager->clear(); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /test/DoctrineBatchUtilsTest/MockEntityManager.php: -------------------------------------------------------------------------------- 1 | realEntityManager = $realEntityManager; 36 | } 37 | 38 | #[Override] 39 | public function isUninitializedObject(mixed $value): bool 40 | { 41 | echo __FUNCTION__ . "\n"; 42 | 43 | return false; 44 | } 45 | 46 | #[Override] 47 | public function getProxyFactory(): ProxyFactory 48 | { 49 | return $this->realEntityManager->getProxyFactory(); 50 | } 51 | 52 | #[Override] 53 | public function getMetadataFactory(): ClassMetadataFactory 54 | { 55 | return $this->realEntityManager->getMetadataFactory(); 56 | } 57 | 58 | #[Override] 59 | public function getClassMetadata(string $className): ClassMetadata 60 | { 61 | return $this->realEntityManager->getClassMetadata($className); 62 | } 63 | 64 | #[Override] 65 | public function getUnitOfWork(): UnitOfWork 66 | { 67 | return $this->realEntityManager->getUnitOfWork(); 68 | } 69 | 70 | #[Override] 71 | public function getCache(): Cache|null 72 | { 73 | return $this->realEntityManager->getCache(); 74 | } 75 | 76 | #[Override] 77 | public function getConnection(): Connection 78 | { 79 | return $this->realEntityManager->getConnection(); 80 | } 81 | 82 | #[Override] 83 | public function getExpressionBuilder(): Expr 84 | { 85 | return $this->realEntityManager->getExpressionBuilder(); 86 | } 87 | 88 | #[Override] 89 | public function beginTransaction(): void 90 | { 91 | echo __FUNCTION__ . "\n"; 92 | } 93 | 94 | #[Override] 95 | public function wrapInTransaction(callable $func): mixed 96 | { 97 | return $this->realEntityManager->wrapInTransaction($func); 98 | } 99 | 100 | #[Override] 101 | public function commit(): void 102 | { 103 | echo __FUNCTION__ . "\n"; 104 | } 105 | 106 | #[Override] 107 | public function rollback(): void 108 | { 109 | echo __FUNCTION__ . "\n"; 110 | } 111 | 112 | #[Override] 113 | public function createQuery(string $dql = ''): Query 114 | { 115 | return $this->realEntityManager->createQuery($dql); 116 | } 117 | 118 | #[Override] 119 | public function createNativeQuery(string $sql, ResultSetMapping $rsm): NativeQuery 120 | { 121 | return $this->realEntityManager->createNativeQuery($sql, $rsm); 122 | } 123 | 124 | #[Override] 125 | public function createQueryBuilder(): QueryBuilder 126 | { 127 | return $this->realEntityManager->createQueryBuilder(); 128 | } 129 | 130 | #[Override] 131 | public function getReference(string $entityName, mixed $id): object|null 132 | { 133 | return $this->realEntityManager->getReference($entityName, $id); 134 | } 135 | 136 | #[Override] 137 | public function close(): void 138 | { 139 | echo __FUNCTION__ . "\n"; 140 | } 141 | 142 | #[Override] 143 | public function lock(object $entity, LockMode|int $lockMode, DateTimeInterface|int|null $lockVersion = null): void 144 | { 145 | echo __FUNCTION__ . "\n"; 146 | } 147 | 148 | #[Override] 149 | public function getEventManager(): EventManager 150 | { 151 | return $this->realEntityManager->getEventManager(); 152 | } 153 | 154 | #[Override] 155 | public function getConfiguration(): Configuration 156 | { 157 | return $this->realEntityManager->getConfiguration(); 158 | } 159 | 160 | #[Override] 161 | public function isOpen(): bool 162 | { 163 | return $this->realEntityManager->isOpen(); 164 | } 165 | 166 | /** {@inheritDoc} */ 167 | #[Override] 168 | public function newHydrator($hydrationMode): AbstractHydrator 169 | { 170 | return $this->realEntityManager->newHydrator($hydrationMode); 171 | } 172 | 173 | #[Override] 174 | public function getFilters(): FilterCollection 175 | { 176 | return $this->realEntityManager->getFilters(); 177 | } 178 | 179 | #[Override] 180 | public function isFiltersStateClean(): bool 181 | { 182 | return $this->realEntityManager->isFiltersStateClean(); 183 | } 184 | 185 | #[Override] 186 | public function hasFilters(): bool 187 | { 188 | return $this->realEntityManager->hasFilters(); 189 | } 190 | 191 | #[Override] 192 | public function find(string $className, mixed $id, LockMode|int|null $lockMode = null, int|null $lockVersion = null): object|null 193 | { 194 | return $this->realEntityManager->find($className, $id, $lockMode, $lockVersion); 195 | } 196 | 197 | #[Override] 198 | public function persist(object $object): void 199 | { 200 | echo __FUNCTION__ . "\n"; 201 | } 202 | 203 | #[Override] 204 | public function remove(object $object): void 205 | { 206 | echo __FUNCTION__ . "\n"; 207 | } 208 | 209 | #[Override] 210 | public function clear(): void 211 | { 212 | echo __FUNCTION__ . "\n"; 213 | } 214 | 215 | #[Override] 216 | public function detach(object $object): void 217 | { 218 | echo __FUNCTION__ . "\n"; 219 | } 220 | 221 | #[Override] 222 | public function refresh(object $object, LockMode|int|null $lockMode = null): void 223 | { 224 | echo __FUNCTION__ . "\n"; 225 | } 226 | 227 | #[Override] 228 | public function flush(): void 229 | { 230 | echo __FUNCTION__ . "\n"; 231 | } 232 | 233 | #[Override] 234 | public function getRepository(string $className): EntityRepository 235 | { 236 | return $this->realEntityManager->getRepository($className); 237 | } 238 | 239 | #[Override] 240 | public function initializeObject(object $obj): void 241 | { 242 | echo __FUNCTION__ . "\n"; 243 | } 244 | 245 | #[Override] 246 | public function contains(object $object): bool 247 | { 248 | return $this->realEntityManager->contains($object); 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /test/DoctrineBatchUtilsTest/BatchProcessing/SimpleBatchIteratorAggregateTest.php: -------------------------------------------------------------------------------- 1 | metadata = $this->createMock(ClassMetadata::class); 41 | $this->query = $this->createMock(AbstractQuery::class); 42 | $this->entityManager = $this->getMockBuilder(MockEntityManager::class) 43 | ->disableOriginalConstructor() 44 | ->disableOriginalClone() 45 | ->onlyMethods(['getClassMetadata', 'find']) 46 | ->getMock(); 47 | 48 | $this->query->method('getEntityManager')->willReturn($this->entityManager); 49 | $this->metadata->method('getName')->willReturn('Yadda'); 50 | 51 | $classMetadataCall = $this->entityManager->method('getClassMetadata'); 52 | 53 | $classMetadataCall->willReturn($this->metadata); 54 | 55 | parent::setUp(); 56 | } 57 | 58 | public function testFromQuery(): void 59 | { 60 | $this->query->method('toIterable')->willReturn(new ArrayIterator()); 61 | 62 | self::assertInstanceOf( 63 | SimpleBatchIteratorAggregate::class, 64 | SimpleBatchIteratorAggregate::fromQuery($this->query, 100), 65 | ); 66 | } 67 | 68 | public function testFromArray(): void 69 | { 70 | self::assertInstanceOf( 71 | SimpleBatchIteratorAggregate::class, 72 | SimpleBatchIteratorAggregate::fromArrayResult([], $this->entityManager, 100), 73 | ); 74 | } 75 | 76 | public function testFromTraversableResult(): void 77 | { 78 | self::assertInstanceOf( 79 | SimpleBatchIteratorAggregate::class, 80 | SimpleBatchIteratorAggregate::fromTraversableResult(new ArrayIterator([]), $this->entityManager, 100), 81 | ); 82 | } 83 | 84 | public function testIterationWithEmptySet(): void 85 | { 86 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult([], $this->entityManager, 100); 87 | 88 | $this->expectOutputString("beginTransaction\nflush\nclear\ncommit\n"); 89 | 90 | foreach ($iterator as $key => $value) { 91 | throw new UnexpectedValueException('Iterator should have been empty!'); 92 | } 93 | } 94 | 95 | public function testIterationRollsBackOnMissingItems(): void 96 | { 97 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult([new stdClass()], $this->entityManager, 100); 98 | 99 | $this->expectOutputString("beginTransaction\nrollback\n"); 100 | 101 | $this->expectException(MissingBatchItemException::class); 102 | 103 | foreach ($iterator as $key => $value) { 104 | $dummy = $key; 105 | } 106 | } 107 | 108 | public function testIterationWithNonObjects(): void 109 | { 110 | $items = ['foo' => 'bar', 'bar' => 'baz']; 111 | 112 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult($items, $this->entityManager, 100); 113 | 114 | $this->entityManager->expects(self::never())->method('find'); 115 | 116 | $this->expectOutputString("beginTransaction\nflush\nclear\ncommit\n"); 117 | 118 | $iteratedObjects = []; 119 | 120 | foreach ($iterator as $key => $value) { 121 | $iteratedObjects[$key] = $value; 122 | } 123 | 124 | $this->assertSame($items, $iteratedObjects); 125 | } 126 | 127 | public function testIterationWithSuccessfulReFetches(): void 128 | { 129 | $originalObjects = ['foo' => new stdClass(), 'bar' => new stdClass()]; 130 | $freshObjects = ['foo' => new stdClass(), 'bar' => new stdClass()]; 131 | 132 | $this->metadata->method('getIdentifierValues')->willReturnMap([ 133 | [$originalObjects['foo'], ['id' => 123]], 134 | [$originalObjects['bar'], ['id' => 456]], 135 | ]); 136 | $this->entityManager->expects(self::exactly(count($originalObjects)))->method('find')->willReturnMap([ 137 | [stdClass::class, ['id' => 123], null, null, $freshObjects['foo']], 138 | [stdClass::class, ['id' => 456], null, null, $freshObjects['bar']], 139 | ]); 140 | 141 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult($originalObjects, $this->entityManager, 100); 142 | 143 | $this->expectOutputString("beginTransaction\nflush\nclear\ncommit\n"); 144 | 145 | $iteratedObjects = []; 146 | 147 | foreach ($iterator as $key => $value) { 148 | $iteratedObjects[$key] = $value; 149 | } 150 | 151 | $this->assertSame($freshObjects, $iteratedObjects); 152 | } 153 | 154 | /** 155 | * \Doctrine\ORM\AbstractQuery#iterate() produces nested results like [[entity],[entity],[entity]] instead 156 | * of a flat [entity,entity,entity], so we have to unwrap the results to refresh them. 157 | */ 158 | public function testIterationWithSuccessfulReFetchesInNestedIterableResult(): void 159 | { 160 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 161 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 162 | 163 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult($originalObjects, $this->entityManager, 100); 164 | 165 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 166 | } 167 | 168 | public function testIterationWithSuccessfulReFetchesInNestedIterableResultFromQuery(): void 169 | { 170 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 171 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 172 | 173 | $this->query->method('toIterable')->willReturn(new ArrayIterator($originalObjects)); 174 | $iterator = SimpleBatchIteratorAggregate::fromQuery($this->query, 100); 175 | 176 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 177 | } 178 | 179 | public function testIterationWithSuccessfulReFetchesInNestedIterableResultFromTraversableResult(): void 180 | { 181 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 182 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 183 | 184 | $this->query->method('toIterable')->willReturn(new ArrayIterator($originalObjects)); 185 | $iterator = SimpleBatchIteratorAggregate::fromTraversableResult(new ArrayIterator($originalObjects), $this->entityManager, 100); 186 | 187 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 188 | } 189 | 190 | /** 191 | * @param array{aaa: list{stdClass}, bbb: list{stdClass}} $originalObjects 192 | * @param array{aaa: stdClass, bbb: stdClass} $freshObjects 193 | */ 194 | private function assertSuccessfulReFetchesInNestedIterableResult(SimpleBatchIteratorAggregate $iterator, array $originalObjects, array $freshObjects): void 195 | { 196 | $this->metadata->method('getIdentifierValues')->willReturnMap( 197 | [ 198 | [$originalObjects['aaa'][0], ['id' => 123]], 199 | [$originalObjects['bbb'][0], ['id' => 456]], 200 | ], 201 | ); 202 | $this->entityManager->expects(self::exactly(count($originalObjects)))->method('find')->willReturnMap( 203 | [ 204 | [stdClass::class, ['id' => 123], null, null, $freshObjects['aaa']], 205 | [stdClass::class, ['id' => 456], null, null, $freshObjects['bbb']], 206 | ], 207 | ); 208 | 209 | $iteratedObjects = []; 210 | 211 | $this->expectOutputString("beginTransaction\nflush\nclear\ncommit\n"); 212 | 213 | foreach ($iterator as $key => $value) { 214 | $iteratedObjects[$key] = $value; 215 | } 216 | 217 | $this->assertSame( 218 | [ 219 | 'aaa' => $freshObjects['aaa'], 220 | 'bbb' => $freshObjects['bbb'], 221 | ], 222 | $iteratedObjects, 223 | ); 224 | } 225 | 226 | /** 227 | * \Doctrine\ORM\AbstractQuery#iterate() produces nested results like [[entity],[entity],[entity]] instead 228 | * of a flat [entity,entity,entity], so we have to skip any entries that do not look like those. 229 | */ 230 | public function testWillNotReFetchEntitiesInNonIterableAlikeResult(): void 231 | { 232 | $originalObjects = [ 233 | [new stdClass(), new stdClass()], 234 | ['123'], 235 | [], 236 | ]; 237 | 238 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult($originalObjects, $this->entityManager, 100); 239 | 240 | $this->entityManager->expects(self::never())->method('find'); 241 | $this->expectOutputString("beginTransaction\nflush\nclear\ncommit\n"); 242 | 243 | $iteratedObjects = []; 244 | 245 | foreach ($iterator as $key => $value) { 246 | $iteratedObjects[$key] = $value; 247 | } 248 | 249 | $this->assertSame($originalObjects, $iteratedObjects); 250 | } 251 | 252 | /** 253 | * @psalm-param positive-int $batchSize 254 | * 255 | * @dataProvider iterationFlushesProvider 256 | */ 257 | #[DataProvider('iterationFlushesProvider')] 258 | public function testIterationFlushesAtGivenBatchSizes(int $resultItemsCount, int $batchSize, string $expectOutputString): void 259 | { 260 | $object = new stdClass(); 261 | 262 | $iterator = SimpleBatchIteratorAggregate::fromArrayResult( 263 | array_fill(0, $resultItemsCount, $object), 264 | $this->entityManager, 265 | $batchSize, 266 | ); 267 | 268 | $this->metadata->method('getIdentifierValues')->willReturn(['id' => 123]); 269 | $this->entityManager->expects(self::exactly($resultItemsCount))->method('find')->willReturn($object); 270 | 271 | $this->expectOutputString($expectOutputString); 272 | 273 | $iteratedObjects = []; 274 | 275 | foreach ($iterator as $key => $value) { 276 | $iteratedObjects[$key] = $value; 277 | } 278 | 279 | $this->assertCount($resultItemsCount, $iteratedObjects); 280 | } 281 | 282 | /** @return non-empty-list, int<1, max>, non-empty-string}> */ 283 | public static function iterationFlushesProvider(): array 284 | { 285 | return [ 286 | [10, 5, "beginTransaction\nflush\nclear\nflush\nclear\nflush\nclear\ncommit\n"], 287 | [2, 1, "beginTransaction\nflush\nclear\nflush\nclear\nflush\nclear\ncommit\n"], 288 | [15, 5, "beginTransaction\nflush\nclear\nflush\nclear\nflush\nclear\nflush\nclear\ncommit\n"], 289 | [10, 2, "beginTransaction\nflush\nclear\nflush\nclear\nflush\nclear\nflush\nclear\nflush\nclear\nflush\nclear\ncommit\n"], 290 | ]; 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /test/DoctrineBatchUtilsTest/BatchProcessing/SelectBatchIteratorAggregateTest.php: -------------------------------------------------------------------------------- 1 | query = $this->createMock(AbstractQuery::class); 42 | $this->entityManager = $this->createMock(EntityManagerInterface::class); 43 | $this->metadata = $this->createMock(ClassMetadata::class); 44 | 45 | $this->entityManager->expects(self::never())->method('flush'); 46 | $this->query->method('getEntityManager')->willReturn($this->entityManager); 47 | $this->entityManager->method('getClassMetadata')->willReturn($this->metadata); 48 | $this->metadata->method('getName')->willReturn('Yadda'); 49 | 50 | parent::setUp(); 51 | } 52 | 53 | public function testFromQuery(): void 54 | { 55 | $this->query->method('toIterable')->willReturn(new ArrayIterator()); 56 | 57 | self::assertInstanceOf( 58 | SelectBatchIteratorAggregate::class, 59 | SelectBatchIteratorAggregate::fromQuery($this->query, 100), 60 | ); 61 | } 62 | 63 | public function testFromArray(): void 64 | { 65 | self::assertInstanceOf( 66 | SelectBatchIteratorAggregate::class, 67 | SelectBatchIteratorAggregate::fromArrayResult([], $this->entityManager, 100), 68 | ); 69 | } 70 | 71 | public function testFromTraversableResult(): void 72 | { 73 | self::assertInstanceOf( 74 | SelectBatchIteratorAggregate::class, 75 | SelectBatchIteratorAggregate::fromTraversableResult(new ArrayIterator([]), $this->entityManager, 100), 76 | ); 77 | } 78 | 79 | public function testIterationWithEmptySet(): void 80 | { 81 | $iterator = SelectBatchIteratorAggregate::fromArrayResult([], $this->entityManager, 100); 82 | 83 | $this->entityManager->expects(self::exactly(1))->method('clear'); 84 | 85 | foreach ($iterator as $key => $value) { 86 | throw new UnexpectedValueException('Iterator should have been empty!'); 87 | } 88 | } 89 | 90 | public function testIterationWithNonObjects(): void 91 | { 92 | $items = ['foo' => 'bar', 'bar' => 'baz']; 93 | 94 | $iterator = SelectBatchIteratorAggregate::fromArrayResult($items, $this->entityManager, 100); 95 | 96 | $this->entityManager->expects(self::never())->method('find'); 97 | $this->entityManager->expects(self::exactly(1))->method('clear'); 98 | 99 | $iteratedObjects = []; 100 | 101 | foreach ($iterator as $key => $value) { 102 | $iteratedObjects[$key] = $value; 103 | } 104 | 105 | $this->assertSame($items, $iteratedObjects); 106 | } 107 | 108 | public function testIterationWithSuccessfulReFetches(): void 109 | { 110 | $originalObjects = ['foo' => new stdClass(), 'bar' => new stdClass()]; 111 | $freshObjects = ['foo' => new stdClass(), 'bar' => new stdClass()]; 112 | 113 | $query = $this->createMock(AbstractQuery::class); 114 | $metadata = $this->createMock(ClassMetadata::class); 115 | $entityManager = new class ($metadata, $freshObjects) extends MockEntityManager { 116 | private ClassMetadata $classMetadata; 117 | /** @var array */ 118 | private array $freshObjects; 119 | private int $atFind; 120 | 121 | /** @param array $freshObjects */ 122 | public function __construct(ClassMetadata $classMetadata, array $freshObjects) 123 | { 124 | $this->classMetadata = $classMetadata; 125 | $this->freshObjects = $freshObjects; 126 | $this->atFind = 0; 127 | } 128 | 129 | /** 130 | * @param string|class-string $className 131 | * 132 | * @return ClassMetadata 133 | * 134 | * @inheritDoc 135 | * @template TRequested of object 136 | */ 137 | #[Override] 138 | public function getClassMetadata($className): ClassMetadata 139 | { 140 | echo __FUNCTION__ . "\n"; 141 | 142 | /** @psalm-var ClassMetadata $metadata inference not really possible here - all stubs */ 143 | $metadata = $this->classMetadata; 144 | 145 | return $metadata; 146 | } 147 | 148 | #[Override] 149 | public function find(string $className, mixed $id, LockMode|int|null $lockMode = null, int|null $lockVersion = null): object|null 150 | { 151 | echo __FUNCTION__ . "\n"; 152 | $this->atFind++; 153 | 154 | if ($this->atFind === 1) { 155 | TestCase::assertSame(['id' => 123], $id); 156 | 157 | $freshObject = $this->freshObjects['foo']; 158 | 159 | TestCase::assertInstanceOf($className, $freshObject); 160 | 161 | return $freshObject; 162 | } 163 | 164 | if ($this->atFind === 2) { 165 | TestCase::assertSame(['id' => 456], $id); 166 | 167 | $freshObject = $this->freshObjects['bar']; 168 | 169 | TestCase::assertInstanceOf($className, $freshObject); 170 | 171 | return $freshObject; 172 | } 173 | 174 | throw new RuntimeException('should not be call more than twice'); 175 | } 176 | }; 177 | 178 | $query->method('getEntityManager')->willReturn($entityManager); 179 | $metadata->method('getName')->willReturn('Yadda'); 180 | $metadata->method('getIdentifierValues')->willReturnMap([ 181 | [$originalObjects['foo'], ['id' => 123]], 182 | [$originalObjects['bar'], ['id' => 456]], 183 | ]); 184 | $iterator = SelectBatchIteratorAggregate::fromArrayResult($originalObjects, $entityManager, 100); 185 | 186 | $this->expectOutputString("getClassMetadata\nfind\ngetClassMetadata\nfind\nclear\n"); 187 | 188 | $iteratedObjects = []; 189 | 190 | foreach ($iterator as $key => $value) { 191 | $iteratedObjects[$key] = $value; 192 | } 193 | 194 | $this->assertSame($freshObjects, $iteratedObjects); 195 | } 196 | 197 | /** 198 | * \Doctrine\ORM\AbstractQuery#iterate() produces nested results like [[entity],[entity],[entity]] instead 199 | * of a flat [entity,entity,entity], so we have to unwrap the results to refresh them. 200 | */ 201 | public function testIterationWithSuccessfulReFetchesInNestedIterableResult(): void 202 | { 203 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 204 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 205 | 206 | $iterator = SelectBatchIteratorAggregate::fromArrayResult($originalObjects, $this->entityManager, 100); 207 | 208 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 209 | } 210 | 211 | public function testIterationWithSuccessfulReFetchesInNestedIterableResultFromQuery(): void 212 | { 213 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 214 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 215 | 216 | $this->query->method('toIterable')->willReturn(new ArrayIterator($originalObjects)); 217 | $iterator = SelectBatchIteratorAggregate::fromQuery($this->query, 100); 218 | 219 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 220 | } 221 | 222 | public function testIterationWithSuccessfulReFetchesInNestedIterableResultFromTraversableResult(): void 223 | { 224 | $originalObjects = ['aaa' => [new stdClass()], 'bbb' => [new stdClass()]]; 225 | $freshObjects = ['aaa' => new stdClass(), 'bbb' => new stdClass()]; 226 | 227 | $this->query->method('toIterable')->willReturn(new ArrayIterator($originalObjects)); 228 | $iterator = SelectBatchIteratorAggregate::fromTraversableResult(new ArrayIterator($originalObjects), $this->entityManager, 100); 229 | 230 | $this->assertSuccessfulReFetchesInNestedIterableResult($iterator, $originalObjects, $freshObjects); 231 | } 232 | 233 | /** 234 | * @param array{aaa: list{stdClass}, bbb: list{stdClass}} $originalObjects 235 | * @param array{aaa: stdClass, bbb: stdClass} $freshObjects 236 | */ 237 | private function assertSuccessfulReFetchesInNestedIterableResult(SelectBatchIteratorAggregate $iterator, array $originalObjects, array $freshObjects): void 238 | { 239 | $this->metadata->method('getIdentifierValues')->willReturnMap( 240 | [ 241 | [$originalObjects['aaa'][0], ['id' => 123]], 242 | [$originalObjects['bbb'][0], ['id' => 456]], 243 | ], 244 | ); 245 | $this->entityManager->expects(self::exactly(count($originalObjects)))->method('find')->willReturnMap( 246 | [ 247 | [stdClass::class, ['id' => 123], null, null, $freshObjects['aaa']], 248 | [stdClass::class, ['id' => 456], null, null, $freshObjects['bbb']], 249 | ], 250 | ); 251 | $this->entityManager->expects(self::once())->method('clear'); 252 | 253 | $iteratedObjects = []; 254 | 255 | foreach ($iterator as $key => $value) { 256 | $iteratedObjects[$key] = $value; 257 | } 258 | 259 | $this->assertSame( 260 | [ 261 | 'aaa' => $freshObjects['aaa'], 262 | 'bbb' => $freshObjects['bbb'], 263 | ], 264 | $iteratedObjects, 265 | ); 266 | } 267 | 268 | /** 269 | * \Doctrine\ORM\AbstractQuery#iterate() produces nested results like [[entity],[entity],[entity]] instead 270 | * of a flat [entity,entity,entity], so we have to skip any entries that do not look like those. 271 | */ 272 | public function testWillNotReFetchEntitiesInNonIterableAlikeResult(): void 273 | { 274 | $originalObjects = [ 275 | [new stdClass(), new stdClass()], 276 | ['123'], 277 | [], 278 | ]; 279 | 280 | $iterator = SelectBatchIteratorAggregate::fromArrayResult($originalObjects, $this->entityManager, 100); 281 | 282 | $this->entityManager->expects(self::never())->method('find'); 283 | $this->entityManager->expects(self::exactly(1))->method('clear'); 284 | 285 | $iteratedObjects = []; 286 | 287 | foreach ($iterator as $key => $value) { 288 | $iteratedObjects[$key] = $value; 289 | } 290 | 291 | $this->assertSame($originalObjects, $iteratedObjects); 292 | } 293 | 294 | /** 295 | * @psalm-param positive-int $batchSize 296 | * 297 | * @dataProvider iterationClearsProvider 298 | */ 299 | #[DataProvider('iterationClearsProvider')] 300 | public function testIterationClearsAtGivenBatchSizes(int $resultItemsCount, int $batchSize, int $expectedClearsCount): void 301 | { 302 | $object = new stdClass(); 303 | 304 | $iterator = SelectBatchIteratorAggregate::fromArrayResult( 305 | array_fill(0, $resultItemsCount, $object), 306 | $this->entityManager, 307 | $batchSize, 308 | ); 309 | 310 | $this->metadata->method('getIdentifierValues')->willReturn(['id' => 123]); 311 | $this->entityManager->expects(self::exactly($resultItemsCount))->method('find')->willReturn($object); 312 | $this->entityManager->expects(self::exactly($expectedClearsCount))->method('clear'); 313 | 314 | $iteratedObjects = []; 315 | 316 | foreach ($iterator as $key => $value) { 317 | $iteratedObjects[$key] = $value; 318 | } 319 | 320 | $this->assertCount($resultItemsCount, $iteratedObjects); 321 | } 322 | 323 | /** @return non-empty-list, int<1, max>, int<1, max>}> */ 324 | public static function iterationClearsProvider(): array 325 | { 326 | return [ 327 | [10, 5, 3], 328 | [2, 1, 3], 329 | [15, 5, 4], 330 | [10, 2, 6], 331 | ]; 332 | } 333 | } 334 | --------------------------------------------------------------------------------