├── .formatter.exs ├── .github └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── config ├── .credo.exs └── config.exs ├── docker-compose.yml ├── lib ├── ecto_cursor_based_stream.ex └── ecto_cursor_based_stream │ └── task_synchronous.ex ├── mix.exs ├── mix.lock ├── priv └── repo │ └── migrations │ ├── 20211021163137_create_users.exs │ ├── 20240801134526_add_fields_to_users.exs │ └── 20240802124348_create_multi_cursor_table.exs └── test ├── ecto_cursor_based_stream_test.exs ├── support └── test_app │ ├── multi_cursor.ex │ ├── repo.ex │ ├── repo_case.ex │ └── user.ex └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [published] 7 | 8 | jobs: 9 | test: 10 | uses: ./.github/workflows/test.yml 11 | 12 | release: 13 | needs: test 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | elixir: ["1.16.2"] 19 | otp: ["26.1.2"] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | 24 | - name: Setup Elixir 25 | uses: erlef/setup-beam@v1 26 | with: 27 | elixir-version: ${{ matrix.elixir }} 28 | otp-version: ${{ matrix.otp }} 29 | 30 | - name: Retrieve Cached Dependencies 31 | uses: actions/cache@v2 32 | id: mix-cache 33 | with: 34 | path: | 35 | deps 36 | _build 37 | priv/plts 38 | key: ${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('mix.lock') }} 39 | 40 | - name: Install Dependencies 41 | if: steps.mix-cache.outputs.cache-hit != 'true' 42 | run: | 43 | mkdir -p priv/plts 44 | mix local.rebar --force 45 | mix local.hex --force 46 | mix deps.get 47 | mix deps.compile 48 | 49 | - name: Publish to Hex 50 | env: 51 | HEX_API_KEY: ${{ secrets.HEX_API_KEY }} 52 | run: mix hex.publish --yes --replace 53 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | workflow_call: 5 | workflow_dispatch: 6 | pull_request: 7 | types: [synchronize, opened, reopened] 8 | push: 9 | branches: ["main"] 10 | tags-ignore: ["**"] 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | elixir: ["1.14.5", "1.15.7", "1.16.2"] 19 | otp: ["24.3.4.14", "25.3.2.7", "26.1.2"] 20 | 21 | services: 22 | db: 23 | image: postgres:12-alpine 24 | ports: ["54323:5432"] 25 | env: 26 | POSTGRES_USER: postgres 27 | POSTGRES_PASSWORD: postgres 28 | POSTGRES_DB: postgres 29 | options: >- 30 | --health-cmd pg_isready 31 | --health-interval 10s 32 | --health-timeout 5s 33 | --health-retries 5 34 | 35 | steps: 36 | - uses: actions/checkout@v2 37 | 38 | - name: Setup Elixir 39 | uses: erlef/setup-beam@v1 40 | with: 41 | elixir-version: ${{ matrix.elixir }} 42 | otp-version: ${{ matrix.otp }} 43 | 44 | - name: Retrieve Cached Dependencies 45 | uses: actions/cache@v2 46 | id: mix-cache 47 | with: 48 | path: | 49 | deps 50 | _build 51 | priv/plts 52 | key: ${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('mix.lock') }} 53 | 54 | - name: Install Dependencies 55 | if: steps.mix-cache.outputs.cache-hit != 'true' 56 | run: | 57 | mkdir -p priv/plts 58 | mix local.rebar --force 59 | mix local.hex --force 60 | mix deps.get 61 | mix deps.compile 62 | MIX_ENV=test mix deps.compile 63 | mix dialyzer --plt 64 | 65 | - name: Audit dependencies 66 | run: mix hex.audit 67 | 68 | - name: Check Formatting 69 | run: mix format --check-formatted 70 | 71 | - name: Check Unused deps 72 | run: mix deps.unlock --check-unused 73 | 74 | - name: Compile project without warnings 75 | run: | 76 | mix compile --warnings-as-errors 77 | MIX_ENV=test mix compile --warnings-as-errors 78 | 79 | - name: Run Credo 80 | run: mix credo --strict 81 | 82 | - name: Run Dialyzer 83 | run: mix dialyzer --no-check 84 | 85 | - name: Run tests 86 | run: mix test 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | ecto_cursor_based_stream-*.tar 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [Unreleased] 2 | ### Changed 3 | 4 | ## [1.2.0] - 2024-08-14 5 | - allow to iterate over multiple fields in cursor, e.g. `cursor_field: [:id_1, :id_2]` 6 | - allow multiple fields in starting cursor, e.g. `after_cursor: %{id_1: id1, id_2: id2}` 7 | - allow ordering or results, e.g. `order: :desc` 8 | - pass Ecto options to `Ecto.Repo.all/2` 9 | - raise errors with friendly message on invalid cursor_field, invalid after_cursor and invalid custom select in Ecto query 10 | - add option to fetch records and process them in parallel, `parallel: true` 11 | 12 | ## [1.1.0] - 2024-05-16 13 | ### Changed 14 | - update dependencies 15 | - improve typespecs 16 | 17 | ## [1.0.2] - 2023-03-16 18 | ### Changed 19 | - Fix link to examples in hex docs 20 | 21 | ## [1.0.1] - 2023-02-16 22 | ### Changed 23 | - fix: Use correct option param names in docs and type specs 24 | 25 | ## [1.0.0] - 2023-02-01 26 | ### Changed 27 | - initial release 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2023] [Jack Tomaszewski] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EctoCursorBasedStream 2 | 3 | ![Build Status](https://img.shields.io/github/actions/workflow/status/allegro/ecto-cursor-based-stream/test.yml) [![Hex.pm](https://img.shields.io/hexpm/v/ecto_cursor_based_stream.svg)](https://hex.pm/packages/ecto_cursor_based_stream) [![Documentation](https://img.shields.io/badge/documentation-gray)](https://hexdocs.pm/ecto_cursor_based_stream/) 4 | 5 | Cursor-based streaming of Ecto records, that does not require database transaction. 6 | 7 | Gives you a [`cursor_based_stream/2`](https://hexdocs.pm/ecto_cursor_based_stream/EctoCursorBasedStream.html#c:cursor_based_stream/2) function that mimics [`Ecto.Repo.stream/2`](https://hexdocs.pm/ecto/Ecto.Repo.html#c:stream/2) interface. 8 | 9 | Advantages in comparison to the standard `Ecto.Repo.stream/2`: 10 | 11 | - streaming can be stopped and continued at any point (by passing option `after_cursor: ...`), 12 | - works with tables that have milions of records. 13 | 14 | Only limitation is that you have to supply a _cursor column or columns_ (by passing option `cursor_field: ...`, defaults to `:id`). Such a column(s): 15 | 16 | - must have unique values, 17 | - should have a database index. (So that sorting by it, and returning a number of rows larger than `x` is a performant operation.) 18 | 19 | ## Usage 20 | 21 | 1. Add `ecto_cursor_based_stream` to your list of dependencies in `mix.exs`: 22 | 23 | ```elixir 24 | def deps do 25 | [ 26 | {:ecto_cursor_based_stream, "~> 1.2.0"} 27 | ] 28 | end 29 | ``` 30 | 31 | 2. Add `use EctoCursorBasedStream` to the module that uses `Ecto.Repo`: 32 | 33 | ```elixir 34 | defmodule MyRepo do 35 | use Ecto.Repo 36 | use EctoCursorBasedStream 37 | end 38 | ``` 39 | 40 | 3. Stream the rows using `cursor_based_stream/2`: 41 | 42 | ```elixir 43 | Post 44 | |> MyRepo.cursor_based_stream() 45 | |> Stream.each(...) 46 | |> Stream.run() 47 | ``` 48 | 49 | ## Useful links 50 | 51 | - [Documentation](https://hexdocs.pm/ecto_cursor_based_stream/EctoCursorBasedStream.html) 52 | - [Examples](https://github.com/allegro/ecto-cursor-based-stream/blob/main/test/ecto_cursor_based_stream_test.exs) 53 | 54 | ## Contributing 55 | 56 | ### Running tests 57 | 58 | Run the following after cloning the repo: 59 | 60 | ```sh 61 | mix deps.get 62 | docker-compose up -d 63 | mix test 64 | ``` 65 | -------------------------------------------------------------------------------- /config/.credo.exs: -------------------------------------------------------------------------------- 1 | %{ 2 | configs: [ 3 | %{ 4 | name: "default", 5 | strict: true, 6 | checks: [ 7 | {Credo.Check.Readability.ModuleDoc, files: %{excluded: ["test"]}} 8 | ] 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :logger, level: :warning 4 | 5 | config :ecto_cursor_based_stream, ecto_repos: [TestApp.Repo] 6 | 7 | config :ecto_cursor_based_stream, TestApp.Repo, 8 | username: "postgres", 9 | password: "postgres", 10 | database: "postgres", 11 | hostname: "localhost", 12 | port: 54323, 13 | pool: Ecto.Adapters.SQL.Sandbox 14 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | db: 4 | image: postgres:12-alpine 5 | ports: ["54323:5432"] 6 | environment: 7 | - POSTGRES_DB=postgres 8 | - POSTGRES_USER=postgres 9 | - POSTGRES_PASSWORD=postgres 10 | healthcheck: 11 | test: ["CMD-SHELL", "pg_isready"] 12 | interval: 10s 13 | timeout: 5s 14 | retries: 5 15 | -------------------------------------------------------------------------------- /lib/ecto_cursor_based_stream.ex: -------------------------------------------------------------------------------- 1 | defmodule EctoCursorBasedStream do 2 | @moduledoc """ 3 | Use this module in any module that uses `Ecto.Repo` 4 | to enrich it with `cursor_based_stream/2` function. 5 | 6 | Example: 7 | 8 | defmodule MyRepo do 9 | use Ecto.Repo 10 | use EctoCursorBasedStream 11 | end 12 | 13 | MyUser 14 | |> MyRepo.cursor_based_stream(max_rows: 100) 15 | |> Stream.each(...) 16 | |> Stream.run() 17 | """ 18 | import Ecto.Query 19 | 20 | @type cursor_based_stream_opts :: [ 21 | {:max_rows, non_neg_integer()} 22 | | {:after_cursor, term() | %{atom() => term()}} 23 | | {:cursor_field, atom() | [atom()]} 24 | | {:order, :asc | :desc} 25 | | {:parallel, boolean()} 26 | | {:prefix, String.t()} 27 | | {:timeout, non_neg_integer()} 28 | | {:log, false | Logger.level()} 29 | | {:telemetry_event, term()} 30 | | {:telemetry_options, term()} 31 | ] 32 | 33 | @doc """ 34 | Return a lazy enumerable that emits all entries from the data store 35 | matching the given query. 36 | 37 | In contrast to `Ecto.Repo.stream/2`, 38 | this will not use database mechanisms (e.g. database transactions) to stream the rows. 39 | 40 | It does so by sorting all the rows by `:cursor_field` and iterating over them in chunks 41 | of size `:max_rows`. 42 | 43 | ## Options 44 | 45 | * `:cursor_field` - the field or list of fields by which all rows should be iterated. 46 | 47 | **This field must have unique values. (Otherwise, some rows may get skipped.)** 48 | 49 | For performance reasons, we recommend that you have an index on that field. Defaults to `:id`. 50 | 51 | * `:after_cursor` - the value of the `:cursor_field` that results start. When `:cursor_field` is a list 52 | then`:after_cursor` must be a map where keys are cursor fields (not all fields are required). 53 | 54 | Useful when you want to continue streaming from a certain point. 55 | Any rows with value equal or smaller than this value will not be included. 56 | 57 | Defaults to `nil`. (All rows will be included.) 58 | 59 | * `:max_rows` - The number of rows to load from the database as we stream. 60 | 61 | Defaults to 500. 62 | 63 | * `:order` - Order of results, `:asc` or `:desc` 64 | 65 | Defaults to `:asc`. 66 | 67 | * `:parallel` - when `true` fetches next batch of records in parallel to processing the stream. 68 | 69 | Defaults to `false` as this spawns `Task`s and could cause issues e.g. with Ecto Sandbox in tests. 70 | 71 | * `:prefix, :timeout, :log, :telemetry_event, :telemetry_options` - options passed directly to `Ecto.Repo.all/2` 72 | 73 | ## Examples 74 | 75 | MyUser 76 | |> MyRepo.cursor_based_stream(max_rows: 1000) 77 | |> Stream.each(...) 78 | |> Stream.run() 79 | 80 | # change order, run in parallel 81 | MyUser 82 | |> MyRepo.cursor_based_stream(order: :desc, parallel: true) 83 | |> Stream.each(...) 84 | |> Stream.run() 85 | 86 | # change cursor field and set starting cursor 87 | MyUser 88 | |> MyRepo.cursor_based_stream(cursor_field: :email, after_cursor: "foo@bar.com") 89 | |> Stream.each(...) 90 | |> Stream.run() 91 | 92 | # with multiple fields 93 | MyUser 94 | |> MyRepo.cursor_based_stream(cursor_field: [:email, :date_of_birth], after_cursor: %{email: "foo@bar.com"}) 95 | |> Stream.each(...) 96 | |> Stream.run() 97 | 98 | # select custom fields, remember to add cursor_field to select 99 | MyUser 100 | |> select([u], map(u, [:my_id, ...]) 101 | |> select_merge([u], ...) 102 | |> MyRepo.cursor_based_stream(cursor_field: :my_id) 103 | |> Stream.each(...) 104 | |> Stream.run() 105 | 106 | # pass custom options to Ecto.Repo.all/2 107 | MyUser 108 | |> MyRepo.cursor_based_stream(timeout: 60_000, prefix: "public") 109 | |> Stream.each(...) 110 | |> Stream.run() 111 | """ 112 | @callback cursor_based_stream(Ecto.Queryable.t(), cursor_based_stream_opts) :: Enum.t() 113 | 114 | defmacro __using__(_) do 115 | quote do 116 | @behaviour EctoCursorBasedStream 117 | 118 | @impl EctoCursorBasedStream 119 | def cursor_based_stream(queryable, options \\ []) do 120 | EctoCursorBasedStream.call(__MODULE__, queryable, options) 121 | end 122 | end 123 | end 124 | 125 | @doc false 126 | @spec call(Ecto.Repo.t(), Ecto.Queryable.t(), cursor_based_stream_opts) :: Enumerable.t() 127 | def call(repo, queryable, options \\ []) do 128 | %{after_cursor: after_cursor, cursor_fields: cursor_fields} = options = parse_options(options) 129 | 130 | Stream.unfold(nil, fn 131 | nil -> 132 | task = get_rows_task(repo, queryable, after_cursor, options) 133 | {[], task} 134 | 135 | task -> 136 | case options.task_module.await(task) do 137 | [] -> 138 | nil 139 | 140 | rows -> 141 | next_cursor = get_last_row_cursor(rows, cursor_fields) 142 | task = get_rows_task(repo, queryable, next_cursor, options) 143 | {rows, task} 144 | end 145 | end) 146 | |> Stream.flat_map(& &1) 147 | end 148 | 149 | defp parse_options(options) do 150 | max_rows = Keyword.get(options, :max_rows, 500) 151 | after_cursor = Keyword.get(options, :after_cursor, nil) 152 | cursor_field = Keyword.get(options, :cursor_field, :id) 153 | order = Keyword.get(options, :order, :asc) 154 | 155 | task_module = 156 | if Keyword.get(options, :parallel, false), 157 | do: Task, 158 | else: EctoCursorBasedStream.TaskSynchronous 159 | 160 | repo_opts = 161 | Keyword.take(options, [:prefix, :timeout, :log, :telemetry_event, :telemetry_options]) 162 | 163 | cursor_fields = validate_cursor_fields(cursor_field) 164 | 165 | %{ 166 | max_rows: max_rows, 167 | cursor_fields: cursor_fields, 168 | after_cursor: validate_initial_cursor(cursor_fields, after_cursor), 169 | order: order, 170 | repo_opts: repo_opts, 171 | task_module: task_module 172 | } 173 | end 174 | 175 | defp validate_cursor_fields(value) do 176 | cursor_fields = List.wrap(value) 177 | 178 | if Enum.all?(cursor_fields, &is_atom/1) do 179 | cursor_fields 180 | else 181 | raise ArgumentError, 182 | "EctoCursorBasedStream expected `cursor_field` to be an atom or list of atoms, got: #{inspect(value)}." 183 | end 184 | end 185 | 186 | defp validate_initial_cursor(_, nil) do 187 | %{} 188 | end 189 | 190 | defp validate_initial_cursor(cursor_fields, %{} = value) do 191 | {after_cursor, rest} = Map.split(value, cursor_fields) 192 | 193 | if map_size(rest) == 0 do 194 | after_cursor 195 | else 196 | raise ArgumentError, 197 | "EctoCursorBasedStream expected `after_cursor` to be a map with fields #{inspect(cursor_fields)}, got: #{inspect(value)}." 198 | end 199 | end 200 | 201 | defp validate_initial_cursor([cursor_field], value) 202 | when not is_list(value) and not is_tuple(value) do 203 | %{cursor_field => value} 204 | end 205 | 206 | defp validate_initial_cursor(cursor_fields, value) do 207 | raise ArgumentError, 208 | "EctoCursorBasedStream expected `after_cursor` to be a map with fields #{inspect(cursor_fields)}, got: #{inspect(value)}." 209 | end 210 | 211 | defp get_rows_task(repo, query, cursor, options) do 212 | %{cursor_fields: cursor_fields, order: order, max_rows: max_rows, repo_opts: repo_opts} = 213 | options 214 | 215 | order_by = Enum.map(cursor_fields, fn cursor_field -> {order, cursor_field} end) 216 | 217 | options.task_module.async(fn -> 218 | query 219 | |> order_by([o], ^order_by) 220 | |> apply_cursor_conditions(cursor_fields, cursor, order) 221 | |> limit(^max_rows) 222 | |> repo.all(repo_opts) 223 | end) 224 | end 225 | 226 | defp apply_cursor_conditions(query, _cursor_fields, cursor, _order) 227 | when map_size(cursor) == 0 do 228 | query 229 | end 230 | 231 | defp apply_cursor_conditions(query, cursor_fields, cursor, :asc) do 232 | conditions = 233 | cursor_fields 234 | |> zip_cursor_fields_with_values(cursor) 235 | |> Enum.reverse() 236 | |> Enum.reduce(nil, fn 237 | {field, value}, nil -> 238 | dynamic([r], field(r, ^field) > ^value) 239 | 240 | {field, value}, acc -> 241 | dynamic([r], field(r, ^field) >= ^value and (field(r, ^field) > ^value or ^acc)) 242 | end) 243 | 244 | where(query, [r], ^conditions) 245 | end 246 | 247 | defp apply_cursor_conditions(query, cursor_fields, cursor, :desc) do 248 | conditions = 249 | cursor_fields 250 | |> zip_cursor_fields_with_values(cursor) 251 | |> Enum.reverse() 252 | |> Enum.reduce(nil, fn 253 | {field, value}, nil -> 254 | dynamic([r], field(r, ^field) < ^value) 255 | 256 | {field, value}, acc -> 257 | dynamic( 258 | [r], 259 | field(r, ^field) <= ^value and 260 | (field(r, ^field) < ^value or ^acc) 261 | ) 262 | end) 263 | 264 | where(query, [r], ^conditions) 265 | end 266 | 267 | defp zip_cursor_fields_with_values(cursor_fields, cursor) do 268 | cursor_fields 269 | |> Enum.map(fn cursor_field -> 270 | {cursor_field, Map.get(cursor, cursor_field)} 271 | end) 272 | |> Enum.reject(&is_nil(elem(&1, 1))) 273 | end 274 | 275 | defp get_last_row_cursor(rows, cursor_fields) do 276 | last_row = List.last(rows) 277 | 278 | unless is_map(last_row) do 279 | select = Enum.map_join(cursor_fields, ", ", &inspect/1) 280 | 281 | raise RuntimeError, 282 | "EctoCursorBasedStream query must return a map with cursor field. If you are using custom `select` ensure that all cursor fields are returned as a map, e.g. `select([s], map(s, [#{select}]))`." 283 | end 284 | 285 | Map.new(cursor_fields, fn cursor_field -> 286 | case Map.fetch(last_row, cursor_field) do 287 | {:ok, value} -> 288 | {cursor_field, value} 289 | 290 | :error -> 291 | raise RuntimeError, 292 | "EctoCursorBasedStream query did not return cursor field #{inspect(cursor_field)}. If you are using custom `select` ensure that all cursor fields are returned as a map, e.g. `select([s], map(s, [#{inspect(cursor_field)}, ...]))`." 293 | end 294 | end) 295 | end 296 | end 297 | -------------------------------------------------------------------------------- /lib/ecto_cursor_based_stream/task_synchronous.ex: -------------------------------------------------------------------------------- 1 | defmodule EctoCursorBasedStream.TaskSynchronous do 2 | @moduledoc false 3 | 4 | def async(fun) do 5 | result = fun.() 6 | 7 | struct(Task, %{ 8 | owner: self(), 9 | pid: self(), 10 | ref: result, 11 | mfa: {:erlang, :apply, [fun, []]} 12 | }) 13 | end 14 | 15 | def await(%Task{ref: result}, _timeout \\ 5000) do 16 | result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule EctoCursorBasedStream.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :ecto_cursor_based_stream, 7 | version: "1.2.0", 8 | elixir: "~> 1.13", 9 | elixirc_paths: elixirc_paths(Mix.env()), 10 | start_permanent: Mix.env() == :prod, 11 | description: 12 | "Cursor-based streaming of Ecto records that doesn't require database transaction.", 13 | package: [ 14 | files: ["lib", ".formatter.exs", "mix.exs", "README*", "LICENSE*"], 15 | licenses: ["Apache-2.0"], 16 | links: %{ 17 | "Source code" => "https://github.com/allegro/ecto-cursor-based-stream", 18 | "Documentation" => "https://hexdocs.pm/ecto_cursor_based_stream" 19 | } 20 | ], 21 | docs: [ 22 | main: "readme", 23 | source_url: "https://github.com/allegro/ecto-cursor-based-stream", 24 | extras: ["README.md"] 25 | ], 26 | deps: deps(), 27 | aliases: aliases() 28 | ] 29 | end 30 | 31 | def application do 32 | [ 33 | extra_applications: [:logger] 34 | ] 35 | end 36 | 37 | defp elixirc_paths(:test), do: ["lib", "test/support"] 38 | defp elixirc_paths(_), do: ["lib"] 39 | 40 | defp deps do 41 | [ 42 | {:ecto_sql, "~> 3.0"}, 43 | {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false}, 44 | {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, 45 | {:ex_doc, "~> 0.32", only: :dev, runtime: false}, 46 | {:postgrex, "~> 0.17", only: [:test]} 47 | ] 48 | end 49 | 50 | defp aliases do 51 | [ 52 | test: ["ecto.create --quiet", "ecto.migrate", "test"] 53 | ] 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 3 | "credo": {:hex, :credo, "1.7.6", "b8f14011a5443f2839b04def0b252300842ce7388f3af177157c86da18dfbeea", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "146f347fb9f8cbc5f7e39e3f22f70acbef51d441baa6d10169dd604bfbc55296"}, 4 | "db_connection": {:hex, :db_connection, "2.6.0", "77d835c472b5b67fc4f29556dee74bf511bbafecdcaf98c27d27fa5918152086", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c2f992d15725e721ec7fbc1189d4ecdb8afef76648c746a8e1cad35e3b8a35f3"}, 5 | "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, 6 | "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, 7 | "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, 8 | "ecto": {:hex, :ecto, "3.11.2", "e1d26be989db350a633667c5cda9c3d115ae779b66da567c68c80cfb26a8c9ee", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c38bca2c6f8d8023f2145326cc8a80100c3ffe4dcbd9842ff867f7fc6156c65"}, 9 | "ecto_sql": {:hex, :ecto_sql, "3.11.1", "e9abf28ae27ef3916b43545f9578b4750956ccea444853606472089e7d169470", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.11.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.16.0 or ~> 0.17.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ce14063ab3514424276e7e360108ad6c2308f6d88164a076aac8a387e1fea634"}, 10 | "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, 11 | "ex_doc": {:hex, :ex_doc, "0.32.2", "f60bbeb6ccbe75d005763e2a328e6f05e0624232f2393bc693611c2d3ae9fa0e", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "a4480305cdfe7fdfcbb77d1092c76161626d9a7aa4fb698aee745996e34602df"}, 12 | "file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"}, 13 | "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, 14 | "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, 15 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 16 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.0", "6f0eff9c9c489f26b69b61440bf1b238d95badae49adac77973cbacae87e3c2e", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "ea7a9307de9d1548d2a72d299058d1fd2339e3d398560a0e46c27dab4891e4d2"}, 17 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 18 | "postgrex": {:hex, :postgrex, "0.17.5", "0483d054938a8dc069b21bdd636bf56c487404c241ce6c319c1f43588246b281", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "50b8b11afbb2c4095a3ba675b4f055c416d0f3d7de6633a595fc131a828a67eb"}, 19 | "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, 20 | } 21 | -------------------------------------------------------------------------------- /priv/repo/migrations/20211021163137_create_users.exs: -------------------------------------------------------------------------------- 1 | defmodule TestApp.Repo.Migrations.CreateUsers do 2 | use Ecto.Migration 3 | 4 | def up do 5 | create table(:users) do 6 | add :email, :string, null: false 7 | end 8 | end 9 | 10 | def down do 11 | drop table(:users) 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /priv/repo/migrations/20240801134526_add_fields_to_users.exs: -------------------------------------------------------------------------------- 1 | defmodule TestApp.Repo.Migrations.AddFieldsToUsers do 2 | use Ecto.Migration 3 | 4 | def change do 5 | alter table(:users) do 6 | add :country_of_birth, :string 7 | add :date_of_birth, :utc_datetime_usec 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /priv/repo/migrations/20240802124348_create_multi_cursor_table.exs: -------------------------------------------------------------------------------- 1 | defmodule TestApp.Repo.Migrations.CreateMultiCursorTable do 2 | use Ecto.Migration 3 | 4 | def change do 5 | create table(:multi_cursor, primary_key: false) do 6 | add :id_1, :integer, primary_key: true 7 | add :id_2, :integer, primary_key: true 8 | add :id_3, :integer, primary_key: true 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /test/ecto_cursor_based_stream_test.exs: -------------------------------------------------------------------------------- 1 | defmodule EctoCursorBasedStreamTest do 2 | use TestApp.RepoCase 3 | 4 | describe "cursor_based_stream/2" do 5 | setup do 6 | rows = [ 7 | Repo.insert!(%User{ 8 | email: "1@test.com", 9 | country_of_birth: "POL", 10 | date_of_birth: ~U[1990-01-01 00:00:00.000000Z] 11 | }), 12 | Repo.insert!(%User{ 13 | email: "2@test.com", 14 | country_of_birth: "POL", 15 | date_of_birth: ~U[1991-02-02 00:00:00.000000Z] 16 | }), 17 | Repo.insert!(%User{ 18 | email: "3@test.com", 19 | country_of_birth: "GER", 20 | date_of_birth: ~U[1992-03-03 00:00:00.000000Z] 21 | }), 22 | Repo.insert!(%User{ 23 | email: "4@test.com", 24 | country_of_birth: "GER", 25 | date_of_birth: ~U[1993-04-04 00:00:00.000000Z] 26 | }), 27 | Repo.insert!(%User{ 28 | email: "5@test.com", 29 | country_of_birth: "GBR", 30 | date_of_birth: ~U[1994-05-05 00:00:00.000000Z] 31 | }) 32 | ] 33 | 34 | %{rows: rows} 35 | end 36 | 37 | test "if rows total count is smaller than :max_rows option, streams all the rows", %{ 38 | rows: rows 39 | } do 40 | result = User |> Repo.cursor_based_stream(max_rows: 50) |> Enum.to_list() 41 | 42 | assert result == rows |> Enum.sort_by(& &1.id) 43 | end 44 | 45 | test "if rows total count is equal to the :max_rows option, streams all the rows", %{ 46 | rows: rows 47 | } do 48 | result = User |> Repo.cursor_based_stream(max_rows: 3) |> Enum.to_list() 49 | 50 | assert result == rows |> Enum.sort_by(& &1.id) 51 | end 52 | 53 | test "if rows total count is larger than the :max_rows option, streams all the rows", %{ 54 | rows: rows 55 | } do 56 | result = User |> Repo.cursor_based_stream(max_rows: 2) |> Enum.to_list() 57 | 58 | assert result == rows |> Enum.sort_by(& &1.id) 59 | end 60 | 61 | test "if :cursor_field option is given, iterates rows over that field", %{rows: rows} do 62 | result = 63 | User 64 | |> Repo.cursor_based_stream(max_rows: 2, cursor_field: :email) 65 | |> Enum.to_list() 66 | 67 | assert result == rows 68 | end 69 | 70 | test "if :cursor_field is a list of fields, iterates rows over all fields", %{rows: rows} do 71 | result = 72 | User 73 | |> Repo.cursor_based_stream(cursor_field: [:country_of_birth, :date_of_birth]) 74 | |> Enum.to_list() 75 | 76 | assert result == Enum.sort_by(rows, &{&1.country_of_birth, &1.date_of_birth}) 77 | end 78 | 79 | test "if :after_cursor option is given, skips any rows with value not greater than it", 80 | %{ 81 | rows: rows 82 | } do 83 | result = 84 | User 85 | |> Repo.cursor_based_stream( 86 | max_rows: 2, 87 | cursor_field: :email, 88 | after_cursor: "1@test.com" 89 | ) 90 | |> Enum.to_list() 91 | 92 | assert result == rows |> Enum.slice(1, 4) 93 | end 94 | 95 | test "if :order option is given, changes order of result", %{rows: rows} do 96 | result = 97 | User 98 | |> Repo.cursor_based_stream(max_rows: 2, order: :asc) 99 | |> Enum.to_list() 100 | 101 | assert result == rows 102 | 103 | result = 104 | User 105 | |> Repo.cursor_based_stream(max_rows: 2, order: :desc) 106 | |> Enum.to_list() 107 | 108 | assert result == Enum.reverse(rows) 109 | end 110 | 111 | test "sorting on multiple cursor fields", %{rows: rows} do 112 | result = 113 | User 114 | |> Repo.cursor_based_stream( 115 | cursor_field: [:country_of_birth, :date_of_birth], 116 | order: :asc 117 | ) 118 | |> Enum.to_list() 119 | 120 | assert result == Enum.sort_by(rows, &{&1.country_of_birth, &1.date_of_birth}) 121 | 122 | result = 123 | User 124 | |> Repo.cursor_based_stream( 125 | cursor_field: [:country_of_birth, :date_of_birth], 126 | order: :desc 127 | ) 128 | |> Enum.to_list() 129 | 130 | assert result == 131 | Enum.sort_by(rows, &{&1.country_of_birth, &1.date_of_birth}) |> Enum.reverse() 132 | end 133 | 134 | defmodule RepoStub do 135 | use EctoCursorBasedStream 136 | 137 | def all(query, options) do 138 | send(self(), {__MODULE__, query, options}) 139 | [] 140 | end 141 | end 142 | 143 | test "if repo opts are given they are passed to Repo.all/2" do 144 | User 145 | |> RepoStub.cursor_based_stream(prefix: "public", timeout: 10_000, log: false) 146 | |> Enum.to_list() 147 | 148 | assert_receive {RepoStub, _query, options} 149 | 150 | assert options == [prefix: "public", timeout: 10_000, log: false] 151 | end 152 | 153 | defmodule ParallelRepoStub do 154 | use EctoCursorBasedStream 155 | 156 | def all(_query, _options) do 157 | :timer.sleep(100) 158 | 159 | [%User{id: System.monotonic_time()}] 160 | end 161 | end 162 | 163 | test "if :parellel option is given, fetches data in parallel" do 164 | [not_parallel_duration, parallel_duration] = 165 | for parallel? <- [false, true] do 166 | :timer.tc(fn -> 167 | ParallelRepoStub.cursor_based_stream(User, parallel: parallel?) 168 | |> Stream.each(fn _ -> :timer.sleep(100) end) 169 | |> Enum.take(5) 170 | end) 171 | |> elem(0) 172 | end 173 | 174 | # assert parallel version is roughly 2 times faster 175 | difference = parallel_duration / not_parallel_duration * 100 176 | assert difference <= 60 177 | assert difference >= 50 178 | end 179 | end 180 | 181 | describe "multi column cursor" do 182 | setup do 183 | data = 184 | for x <- 1..5, y <- 1..5, z <- 1..5 do 185 | %{id_1: x, id_2: y, id_3: z} 186 | end 187 | 188 | Repo.insert_all(MultiCursor, data) 189 | :ok 190 | end 191 | 192 | test "iterates over all values" do 193 | for order <- [:asc, :desc] do 194 | result = 195 | MultiCursor 196 | |> where([c], c.id_1 == 1) 197 | |> Repo.cursor_based_stream( 198 | cursor_field: [:id_2, :id_3], 199 | max_rows: :rand.uniform(10), 200 | order: order 201 | ) 202 | |> Enum.count() 203 | 204 | assert result == 25 205 | 206 | result = 207 | MultiCursor 208 | |> Repo.cursor_based_stream( 209 | cursor_field: [:id_1, :id_2, :id_3], 210 | max_rows: :rand.uniform(10), 211 | order: order 212 | ) 213 | |> Enum.count() 214 | 215 | assert result == 125 216 | end 217 | end 218 | 219 | test ":after_cursor on multiple cursor fields with initial cursor ascending" do 220 | result = 221 | MultiCursor 222 | |> Repo.cursor_based_stream( 223 | cursor_field: [:id_1, :id_2, :id_3], 224 | after_cursor: %{id_1: 2, id_2: 3}, 225 | max_rows: 20 226 | ) 227 | |> Enum.to_list() 228 | 229 | assert length(result) == 3 * 25 + 2 * 5 230 | assert %{id_1: 2, id_2: 4, id_3: 1} = hd(result) 231 | assert %{id_1: 5, id_2: 5, id_3: 5} = List.last(result) 232 | 233 | result = 234 | MultiCursor 235 | |> Repo.cursor_based_stream( 236 | cursor_field: [:id_1, :id_2, :id_3], 237 | after_cursor: %{id_1: 2, id_2: 3, id_3: 4}, 238 | max_rows: 20 239 | ) 240 | |> Enum.to_list() 241 | 242 | assert length(result) == 3 * 25 + 2 * 5 + 1 243 | assert %{id_1: 2, id_2: 3, id_3: 5} = hd(result) 244 | assert %{id_1: 5, id_2: 5, id_3: 5} = List.last(result) 245 | end 246 | 247 | test ":after_cursor on multiple cursor fields with initial cursor descending" do 248 | result = 249 | MultiCursor 250 | |> Repo.cursor_based_stream( 251 | cursor_field: [:id_1, :id_2, :id_3], 252 | after_cursor: %{id_1: 2, id_2: 3}, 253 | max_rows: 20, 254 | order: :desc 255 | ) 256 | |> Enum.to_list() 257 | 258 | assert length(result) == 1 * 25 + 2 * 5 259 | assert %{id_1: 2, id_2: 2, id_3: 5} = hd(result) 260 | assert %{id_1: 1, id_2: 1, id_3: 1} = List.last(result) 261 | 262 | result = 263 | MultiCursor 264 | |> Repo.cursor_based_stream( 265 | cursor_field: [:id_1, :id_2, :id_3], 266 | after_cursor: %{id_1: 2, id_2: 3, id_3: 4}, 267 | max_rows: 20, 268 | order: :desc 269 | ) 270 | |> Enum.to_list() 271 | 272 | assert length(result) == 1 * 25 + 2 * 5 + 3 273 | assert %{id_1: 2, id_2: 3, id_3: 3} = hd(result) 274 | assert %{id_1: 1, id_2: 1, id_3: 1} = List.last(result) 275 | end 276 | end 277 | 278 | describe "validations" do 279 | setup do 280 | rows = [ 281 | Repo.insert!(%User{ 282 | email: "1@test.com", 283 | country_of_birth: "POL", 284 | date_of_birth: ~U[1990-01-01 00:00:00.000000Z] 285 | }) 286 | ] 287 | 288 | %{rows: rows} 289 | end 290 | 291 | test ":cursor_field must be an atom or list of atoms" do 292 | assert_raise ArgumentError, 293 | "EctoCursorBasedStream expected `cursor_field` to be an atom or list of atoms, got: %{}.", 294 | fn -> 295 | User 296 | |> Repo.cursor_based_stream(cursor_field: %{}) 297 | |> Enum.to_list() 298 | end 299 | 300 | assert_raise ArgumentError, 301 | "EctoCursorBasedStream expected `cursor_field` to be an atom or list of atoms, got: \"email\".", 302 | fn -> 303 | User 304 | |> Repo.cursor_based_stream(cursor_field: "email") 305 | |> Enum.to_list() 306 | end 307 | 308 | assert_raise ArgumentError, 309 | "EctoCursorBasedStream expected `cursor_field` to be an atom or list of atoms, got: [:id, \"email\"].", 310 | fn -> 311 | User 312 | |> Repo.cursor_based_stream(cursor_field: [:id, "email"]) 313 | |> Enum.to_list() 314 | end 315 | end 316 | 317 | test ":after_cursor must contain fields from `cursor_field`" do 318 | assert_raise ArgumentError, 319 | "EctoCursorBasedStream expected `after_cursor` to be a map with fields [:id, :email], got: \"10\".", 320 | fn -> 321 | User 322 | |> Repo.cursor_based_stream(cursor_field: [:id, :email], after_cursor: "10") 323 | |> Enum.to_list() 324 | end 325 | 326 | assert_raise ArgumentError, 327 | "EctoCursorBasedStream expected `after_cursor` to be a map with fields [:id, :email], got: %{emial: \"foo@bar.com\"}.", 328 | fn -> 329 | User 330 | |> Repo.cursor_based_stream( 331 | cursor_field: [:id, :email], 332 | after_cursor: %{emial: "foo@bar.com"} 333 | ) 334 | |> Enum.to_list() 335 | end 336 | end 337 | 338 | test "query must `select` cursor fields" do 339 | assert_raise RuntimeError, 340 | "EctoCursorBasedStream query must return a map with cursor field. If you are using custom `select` ensure that all cursor fields are returned as a map, e.g. `select([s], map(s, [:id]))`.", 341 | fn -> 342 | User 343 | |> select([u], u.id) 344 | |> Repo.cursor_based_stream(cursor_field: :id) 345 | |> Enum.to_list() 346 | end 347 | 348 | assert_raise RuntimeError, 349 | "EctoCursorBasedStream query did not return cursor field :id. If you are using custom `select` ensure that all cursor fields are returned as a map, e.g. `select([s], map(s, [:id, ...]))`.", 350 | fn -> 351 | User 352 | |> select([u], %{email: u.email}) 353 | |> Repo.cursor_based_stream(cursor_field: :id) 354 | |> Enum.to_list() 355 | end 356 | 357 | assert_raise RuntimeError, 358 | "EctoCursorBasedStream query did not return cursor field :id. If you are using custom `select` ensure that all cursor fields are returned as a map, e.g. `select([s], map(s, [:id, ...]))`.", 359 | fn -> 360 | User 361 | |> select([u], %{email: u.email}) 362 | |> Repo.cursor_based_stream(cursor_field: [:email, :id]) 363 | |> Enum.to_list() 364 | end 365 | end 366 | end 367 | end 368 | -------------------------------------------------------------------------------- /test/support/test_app/multi_cursor.ex: -------------------------------------------------------------------------------- 1 | defmodule TestApp.MultiCursor do 2 | use Ecto.Schema 3 | 4 | @primary_key false 5 | schema "multi_cursor" do 6 | field(:id_1, :integer) 7 | field(:id_2, :integer) 8 | field(:id_3, :integer) 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /test/support/test_app/repo.ex: -------------------------------------------------------------------------------- 1 | defmodule TestApp.Repo do 2 | use Ecto.Repo, 3 | otp_app: :ecto_cursor_based_stream, 4 | adapter: Ecto.Adapters.Postgres 5 | 6 | use EctoCursorBasedStream 7 | end 8 | -------------------------------------------------------------------------------- /test/support/test_app/repo_case.ex: -------------------------------------------------------------------------------- 1 | defmodule TestApp.RepoCase do 2 | @moduledoc """ 3 | This module defines the setup for tests requiring 4 | access to the application's data layer. 5 | 6 | You may define functions here to be used as helpers in 7 | your tests. 8 | 9 | Finally, if the test case interacts with the database, 10 | it cannot be async. For this reason, every test runs 11 | inside a transaction which is reset at the beginning 12 | of the test unless the test case is marked as async. 13 | """ 14 | 15 | use ExUnit.CaseTemplate 16 | 17 | alias Ecto.Adapters.SQL.Sandbox 18 | 19 | using do 20 | quote do 21 | import Ecto 22 | import Ecto.Changeset 23 | import Ecto.Query 24 | 25 | alias TestApp.MultiCursor 26 | alias TestApp.Repo 27 | alias TestApp.User 28 | end 29 | end 30 | 31 | setup tags do 32 | :ok = Sandbox.checkout(TestApp.Repo) 33 | 34 | unless tags[:async] do 35 | Sandbox.mode(TestApp.Repo, {:shared, self()}) 36 | end 37 | 38 | :ok 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/support/test_app/user.ex: -------------------------------------------------------------------------------- 1 | defmodule TestApp.User do 2 | use Ecto.Schema 3 | 4 | schema "users" do 5 | field(:email, :string) 6 | field(:country_of_birth, :string) 7 | field(:date_of_birth, :utc_datetime_usec) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | Application.ensure_all_started(:postgrex) 2 | Application.ensure_all_started(:ecto_sql) 3 | 4 | {:ok, _} = TestApp.Repo.start_link() 5 | 6 | ExUnit.start() 7 | --------------------------------------------------------------------------------