├── .formatter.exs
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── guides
    ├── examples
    │   ├── amazon-sqs.md
    │   ├── apache-kafka.md
    │   ├── custom-producers.md
    │   ├── google-cloud-pubsub.md
    │   ├── introduction.md
    │   └── rabbitmq.md
    └── internals
    │   └── architecture.md
├── lib
    ├── broadway.ex
    └── broadway
    │   ├── acknowledger.ex
    │   ├── application.ex
    │   ├── batch_info.ex
    │   ├── caller_acknowledger.ex
    │   ├── config_storage.ex
    │   ├── config_storage
    │       ├── ets.ex
    │       └── persistent_term.ex
    │   ├── dummy_producer.ex
    │   ├── message.ex
    │   ├── noop_acknowledger.ex
    │   ├── options.ex
    │   ├── producer.ex
    │   ├── topology.ex
    │   └── topology
    │       ├── batch_processor_stage.ex
    │       ├── batcher_stage.ex
    │       ├── processor_stage.ex
    │       ├── producer_stage.ex
    │       ├── rate_limiter.ex
    │       ├── subscriber.ex
    │       └── terminator.ex
├── mix.exs
├── mix.lock
└── test
    ├── broadway
        ├── acknowledger_test.exs
        ├── config_storage_test.exs
        ├── dummy_producer_test.exs
        └── topology
        │   ├── batcher_stage_test.exs
        │   ├── processor_stage_test.exs
        │   └── producer_stage_test.exs
    ├── broadway_test.exs
    └── test_helper.exs


/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-20.04
12 |     env:
13 |       MIX_ENV: test
14 |       GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         include:
19 |           # Earliest-supported versions.
20 |           - elixir: "1.7.4"
21 |             otp: "21.3.8.17"
22 | 
23 |           # Latest versions.
24 |           - elixir: "1.18"
25 |             otp: "27.2"
26 |             lint: lint
27 |             coverage: coverage
28 |     steps:
29 |       - name: Check out this repository
30 |         uses: actions/checkout@v4
31 | 
32 |       - name: Set up Erlang and Elixir
33 |         uses: erlef/setup-beam@v1
34 |         with:
35 |           otp-version: ${{matrix.otp}}
36 |           elixir-version: ${{matrix.elixir}}
37 | 
38 |       - name: Cache Mix dependencies
39 |         uses: actions/cache@v3
40 |         id: cache-deps
41 |         with:
42 |           path: |
43 |             deps
44 |             _build
45 |           key: |
46 |             mix-${{ runner.os }}-${{matrix.elixir}}-${{matrix.otp}}-${{ hashFiles('**/mix.lock') }}
47 |           restore-keys: |
48 |             mix-${{ runner.os }}-${{matrix.elixir}}-${{matrix.otp}}-
49 | 
50 |       - run: mix do deps.get --check-locked, deps.compile
51 |         if: steps.cache-deps.outputs.cache-hit != 'true'
52 | 
53 |       - run: mix format --check-formatted
54 |         if: ${{ matrix.lint }}
55 | 
56 |       - run: mix deps.unlock --check-unused
57 |         if: ${{ matrix.lint }}
58 | 
59 |       - run: mix compile --warnings-as-errors
60 |         if: ${{ matrix.lint }}
61 | 
62 |       - run: mix test
63 |         if: ${{!matrix.coverage}}
64 | 
65 |       - run: mix coveralls.github
66 |         if: ${{matrix.coverage}}
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where 3rd-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | broadway-*.tar
24 | 
25 | .elixir_ls
26 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## v1.2.1 (2025-02-12)
  4 | 
  5 | * Run `setup` callback on Broadway startup
  6 | 
  7 | ## v1.2.0 (2025-02-10)
  8 | 
  9 | * Do not clean up persistent terms on shutdown
 10 | * Add format_discarded callback
 11 | * Allow different config storages
 12 | 
 13 | ## v1.1.0 (2024-06-21)
 14 | 
 15 | ### Bug fix
 16 | 
 17 |   * No longer set demand to `:accumulate` when draining, for compatibility with GenStage v1.2+. This means that any polling implementation must implement the `prepare_for_draining` callback and stop polling messages. You can check how [BroadwaySQS](https://github.com/dashbitco/broadway_sqs/commit/5b8f18a78e4760b5fcc839ad576be8c63345add0) tackles this problem as an example
 18 | 
 19 | ### Enhancements
 20 | 
 21 |   * Log leaked trapped exits
 22 | 
 23 | ## v1.0.7 (2023-04-22)
 24 | 
 25 | ### Enhancements
 26 | 
 27 |   * Relax `nimble_options` dependency
 28 |   * Improve documentation and error messages
 29 | 
 30 | ## v1.0.6 (2023-01-19)
 31 | 
 32 | ### Bug fixes
 33 | 
 34 |   * Remove the assumption a cancelled timer has been delivered
 35 | 
 36 | ## v1.0.5 (2022-11-06)
 37 | 
 38 | ### Bug fixes
 39 | 
 40 |   * Fix NoopAcknowledger metadata name
 41 | 
 42 | ## v1.0.4 (2022-11-05)
 43 | 
 44 | ### Enhancements
 45 | 
 46 |   * Add `init` convenience function to acknowledgers
 47 |   * Allow `:reset` option when calling `update_rate_limiting`. When set to true, the rate limit counter and interval is immediately reset
 48 |   * Add the producer to the telemetry metadata
 49 |   * Support custom function in `:batch_size` for customized batch splitting logic
 50 | 
 51 | ## v1.0.3 (2022-03-18)
 52 | 
 53 | ### Bug fixes
 54 | 
 55 |   * Move the `process_name/2` callback to the `Broadway` behaviour where it belongs
 56 | 
 57 | ## v1.0.2 (2022-01-12)
 58 | 
 59 | ### Enhancements
 60 | 
 61 |   * Also allow `nimble_options ~> 0.4.0`
 62 | 
 63 | ## v1.0.1 (2021-10-12)
 64 | 
 65 | ### Bug fixes
 66 | 
 67 |   * Make sure `processors` and `batch_processors` demands are properly shuffled
 68 |   * Ensure proper messages metadata for telemetry events on `Broadway.Message.failed/2`
 69 | 
 70 | ## v1.0.0 (2021-08-30)
 71 | 
 72 | Broadway v1.0 requires Erlang/OTP 21.3+.
 73 | 
 74 | ### Backwards incompatible changes
 75 | 
 76 |   * Remove `Broadway.TermStorage` now that we have Broadway topology information on the producer init callback
 77 |   * Rename `:events` to `:messages` in batcher telemetry event
 78 |   * Remove `:time` from "stop" telemetry event measurements
 79 |   * Rename `:time` to `:system_time` in telemetry event measurements
 80 |   * Rename `[:broadway, :consumer, *]` to `[:broadway, :batch_processor, *]` in telemetry event
 81 | 
 82 | ### Enhancements
 83 | 
 84 |   * Add `Broadway.Message.put_data/2`
 85 |   * Add `Broadway.stop/1`
 86 |   * Add `Broadway.all_running/0`
 87 |   * Add `Broadway.topology/1`
 88 |   * Add ack configuration to `Broadway.test_message/3` and `Broadway.test_batch/3`
 89 |   * Allow Broadway :via tuples as broadway names
 90 |   * Enrich telemetry events with metadata
 91 | 
 92 | ## v0.6.2 (2020-08-17)
 93 | 
 94 |   * Make `Broadway.Producer` public
 95 |   * Add optional `prepare_messages` callback
 96 | 
 97 | ## v0.6.1 (2020-06-02)
 98 | 
 99 |   * Rename `:failure` Telemetry event to `:exception` so it conforms to the telemetry specification
100 |   * Deprecate `Broadway.test_messages/3` in favor of `Broadway.test_message/3` and `Broadway.test_batch/3`
101 | 
102 | ## v0.6.0 (2020-02-13)
103 | 
104 |   * Deprecate `:stages` in favor of `:concurrency` for clarity
105 |   * Do not validate `:batcher` if message failed
106 |   * Add support for rate limiting producers
107 |   * Support returning state in `c:Broadway.Producer.prepare_for_draining/1`
108 |   * Emit telemetry events
109 |   * Add Kafka guide
110 | 
111 | ## v0.5.0 (2019-11-04)
112 | 
113 |   * Deprecate `:producers` in favor of a single `:producer` key
114 |   * Add `Broadway.Message.configure_ack/3`
115 |   * Add `Broadway.Message.ack_immediately/1`
116 |   * Add `Broadway.producer_names/1`
117 |   * Add the `c:Broadway.handle_failed/2` optional callback which is invoked with failed messages
118 |   * Add `:crash_reason` to Logger reports metadata
119 |   * Add `c:Broadway.Producer.prepare_for_start/2` optional callback which allows producers to customize the topology
120 |   * Support `partition_by` in processors and batchers
121 |   * Log if `handle_batch` returns less messages than expected
122 | 
123 | ## v0.4.0 (2019-08-05)
124 | 
125 |   * Add `:batch_mode` to `Broadway.test_messages/3` to control how test messages are flushed
126 |   * Add `Broadway.DummyProducer` for testing
127 |   * Append .Broadway to module prefixes to avoid potential naming conflicts
128 | 
129 | ## v0.3.0 (2019-04-26)
130 | 
131 |   * Add `metadata` field to the `Message` struct so clients can append extra information
132 | 
133 | ## v0.2.0 (2019-04-04)
134 | 
135 |   * `Broadway.Message.put_partition/2` has been renamed to `Broadway.Message.put_batch_key/2`
136 |   * Allow `Broadway.Producer` to `prepare_for_draining/1`
137 |   * Allow pipelines without batchers
138 | 
139 | ## v0.1.0 (2019-02-19)
140 | 
141 |   * Initial release
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright 2019 Plataformatec
  2 | Copyright 2020 Dashbit
  3 | 
  4 |                                  Apache License
  5 |                            Version 2.0, January 2004
  6 |                         http://www.apache.org/licenses/
  7 | 
  8 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  9 | 
 10 |    1. Definitions.
 11 | 
 12 |       "License" shall mean the terms and conditions for use, reproduction,
 13 |       and distribution as defined by Sections 1 through 9 of this document.
 14 | 
 15 |       "Licensor" shall mean the copyright owner or entity authorized by
 16 |       the copyright owner that is granting the License.
 17 | 
 18 |       "Legal Entity" shall mean the union of the acting entity and all
 19 |       other entities that control, are controlled by, or are under common
 20 |       control with that entity. For the purposes of this definition,
 21 |       "control" means (i) the power, direct or indirect, to cause the
 22 |       direction or management of such entity, whether by contract or
 23 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 24 |       outstanding shares, or (iii) beneficial ownership of such entity.
 25 | 
 26 |       "You" (or "Your") shall mean an individual or Legal Entity
 27 |       exercising permissions granted by this License.
 28 | 
 29 |       "Source" form shall mean the preferred form for making modifications,
 30 |       including but not limited to software source code, documentation
 31 |       source, and configuration files.
 32 | 
 33 |       "Object" form shall mean any form resulting from mechanical
 34 |       transformation or translation of a Source form, including but
 35 |       not limited to compiled object code, generated documentation,
 36 |       and conversions to other media types.
 37 | 
 38 |       "Work" shall mean the work of authorship, whether in Source or
 39 |       Object form, made available under the License, as indicated by a
 40 |       copyright notice that is included in or attached to the work
 41 |       (an example is provided in the Appendix below).
 42 | 
 43 |       "Derivative Works" shall mean any work, whether in Source or Object
 44 |       form, that is based on (or derived from) the Work and for which the
 45 |       editorial revisions, annotations, elaborations, or other modifications
 46 |       represent, as a whole, an original work of authorship. For the purposes
 47 |       of this License, Derivative Works shall not include works that remain
 48 |       separable from, or merely link (or bind by name) to the interfaces of,
 49 |       the Work and Derivative Works thereof.
 50 | 
 51 |       "Contribution" shall mean any work of authorship, including
 52 |       the original version of the Work and any modifications or additions
 53 |       to that Work or Derivative Works thereof, that is intentionally
 54 |       submitted to Licensor for inclusion in the Work by the copyright owner
 55 |       or by an individual or Legal Entity authorized to submit on behalf of
 56 |       the copyright owner. For the purposes of this definition, "submitted"
 57 |       means any form of electronic, verbal, or written communication sent
 58 |       to the Licensor or its representatives, including but not limited to
 59 |       communication on electronic mailing lists, source code control systems,
 60 |       and issue tracking systems that are managed by, or on behalf of, the
 61 |       Licensor for the purpose of discussing and improving the Work, but
 62 |       excluding communication that is conspicuously marked or otherwise
 63 |       designated in writing by the copyright owner as "Not a Contribution."
 64 | 
 65 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 66 |       on behalf of whom a Contribution has been received by Licensor and
 67 |       subsequently incorporated within the Work.
 68 | 
 69 |    2. Grant of Copyright License. Subject to the terms and conditions of
 70 |       this License, each Contributor hereby grants to You a perpetual,
 71 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 72 |       copyright license to reproduce, prepare Derivative Works of,
 73 |       publicly display, publicly perform, sublicense, and distribute the
 74 |       Work and such Derivative Works in Source or Object form.
 75 | 
 76 |    3. Grant of Patent License. Subject to the terms and conditions of
 77 |       this License, each Contributor hereby grants to You a perpetual,
 78 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 79 |       (except as stated in this section) patent license to make, have made,
 80 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 81 |       where such license applies only to those patent claims licensable
 82 |       by such Contributor that are necessarily infringed by their
 83 |       Contribution(s) alone or by combination of their Contribution(s)
 84 |       with the Work to which such Contribution(s) was submitted. If You
 85 |       institute patent litigation against any entity (including a
 86 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 87 |       or a Contribution incorporated within the Work constitutes direct
 88 |       or contributory patent infringement, then any patent licenses
 89 |       granted to You under this License for that Work shall terminate
 90 |       as of the date such litigation is filed.
 91 | 
 92 |    4. Redistribution. You may reproduce and distribute copies of the
 93 |       Work or Derivative Works thereof in any medium, with or without
 94 |       modifications, and in Source or Object form, provided that You
 95 |       meet the following conditions:
 96 | 
 97 |       (a) You must give any other recipients of the Work or
 98 |           Derivative Works a copy of this License; and
 99 | 
100 |       (b) You must cause any modified files to carry prominent notices
101 |           stating that You changed the files; and
102 | 
103 |       (c) You must retain, in the Source form of any Derivative Works
104 |           that You distribute, all copyright, patent, trademark, and
105 |           attribution notices from the Source form of the Work,
106 |           excluding those notices that do not pertain to any part of
107 |           the Derivative Works; and
108 | 
109 |       (d) If the Work includes a "NOTICE" text file as part of its
110 |           distribution, then any Derivative Works that You distribute must
111 |           include a readable copy of the attribution notices contained
112 |           within such NOTICE file, excluding those notices that do not
113 |           pertain to any part of the Derivative Works, in at least one
114 |           of the following places: within a NOTICE text file distributed
115 |           as part of the Derivative Works; within the Source form or
116 |           documentation, if provided along with the Derivative Works; or,
117 |           within a display generated by the Derivative Works, if and
118 |           wherever such third-party notices normally appear. The contents
119 |           of the NOTICE file are for informational purposes only and
120 |           do not modify the License. You may add Your own attribution
121 |           notices within Derivative Works that You distribute, alongside
122 |           or as an addendum to the NOTICE text from the Work, provided
123 |           that such additional attribution notices cannot be construed
124 |           as modifying the License.
125 | 
126 |       You may add Your own copyright statement to Your modifications and
127 |       may provide additional or different license terms and conditions
128 |       for use, reproduction, or distribution of Your modifications, or
129 |       for any such Derivative Works as a whole, provided Your use,
130 |       reproduction, and distribution of the Work otherwise complies with
131 |       the conditions stated in this License.
132 | 
133 |    5. Submission of Contributions. Unless You explicitly state otherwise,
134 |       any Contribution intentionally submitted for inclusion in the Work
135 |       by You to the Licensor shall be under the terms and conditions of
136 |       this License, without any additional terms or conditions.
137 |       Notwithstanding the above, nothing herein shall supersede or modify
138 |       the terms of any separate license agreement you may have executed
139 |       with Licensor regarding such Contributions.
140 | 
141 |    6. Trademarks. This License does not grant permission to use the trade
142 |       names, trademarks, service marks, or product names of the Licensor,
143 |       except as required for reasonable and customary use in describing the
144 |       origin of the Work and reproducing the content of the NOTICE file.
145 | 
146 |    7. Disclaimer of Warranty. Unless required by applicable law or
147 |       agreed to in writing, Licensor provides the Work (and each
148 |       Contributor provides its Contributions) on an "AS IS" BASIS,
149 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
150 |       implied, including, without limitation, any warranties or conditions
151 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
152 |       PARTICULAR PURPOSE. You are solely responsible for determining the
153 |       appropriateness of using or redistributing the Work and assume any
154 |       risks associated with Your exercise of permissions under this License.
155 | 
156 |    8. Limitation of Liability. In no event and under no legal theory,
157 |       whether in tort (including negligence), contract, or otherwise,
158 |       unless required by applicable law (such as deliberate and grossly
159 |       negligent acts) or agreed to in writing, shall any Contributor be
160 |       liable to You for damages, including any direct, indirect, special,
161 |       incidental, or consequential damages of any character arising as a
162 |       result of this License or out of the use or inability to use the
163 |       Work (including but not limited to damages for loss of goodwill,
164 |       work stoppage, computer failure or malfunction, or any and all
165 |       other commercial damages or losses), even if such Contributor
166 |       has been advised of the possibility of such damages.
167 | 
168 |    9. Accepting Warranty or Additional Liability. While redistributing
169 |       the Work or Derivative Works thereof, You may choose to offer,
170 |       and charge a fee for, acceptance of support, warranty, indemnity,
171 |       or other liability obligations and/or rights consistent with this
172 |       License. However, in accepting such obligations, You may act only
173 |       on Your own behalf and on Your sole responsibility, not on behalf
174 |       of any other Contributor, and only if You agree to indemnify,
175 |       defend, and hold each Contributor harmless for any liability
176 |       incurred by, or claims asserted against, such Contributor by reason
177 |       of your accepting any such warranty or additional liability.
178 | 
179 |    END OF TERMS AND CONDITIONS
180 | 
181 |    APPENDIX: How to apply the Apache License to your work.
182 | 
183 |       To apply the Apache License to your work, attach the following
184 |       boilerplate notice, with the fields enclosed by brackets "[]"
185 |       replaced with your own identifying information. (Don't include
186 |       the brackets!)  The text should be enclosed in the appropriate
187 |       comment syntax for the file format. We also recommend that a
188 |       file or class name and description of purpose be included on the
189 |       same "printed page" as the copyright notice for easier
190 |       identification within third-party archives.
191 | 
192 |    Copyright [yyyy] [name of copyright owner]
193 | 
194 |    Licensed under the Apache License, Version 2.0 (the "License");
195 |    you may not use this file except in compliance with the License.
196 |    You may obtain a copy of the License at
197 | 
198 |        http://www.apache.org/licenses/LICENSE-2.0
199 | 
200 |    Unless required by applicable law or agreed to in writing, software
201 |    distributed under the License is distributed on an "AS IS" BASIS,
202 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
203 |    See the License for the specific language governing permissions and
204 |    limitations under the License.
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Broadway
  2 | 
  3 | [![CI](https://github.com/dashbitco/broadway/actions/workflows/ci.yml/badge.svg)](https://github.com/dashbitco/broadway/actions/workflows/ci.yml)
  4 | 
  5 | Build concurrent and multi-stage data ingestion and data processing pipelines with Elixir. Broadway allows developers to consume data efficiently from different sources, known as producers, such as Amazon SQS, Apache Kafka, Google Cloud PubSub, RabbitMQ, and others. Broadway pipelines are long-lived, concurrent, and robust, thanks to the Erlang VM and its actors.
  6 | 
  7 | Broadway takes its name from the famous [Broadway street](https://en.wikipedia.org/wiki/Broadway_theatre) in New York City, renowned for its stages, actors, and producers.
  8 | 
  9 | To learn more and get started, check out [our official website](https://elixir-broadway.org) and [our guides and docs](https://hexdocs.pm/broadway).
 10 | 
 11 | ![Broadway Logo](https://user-images.githubusercontent.com/9582/117824616-ed298500-b26e-11eb-8ded-0fb7e608bf70.png)
 12 | 
 13 | ## Built-in features
 14 | 
 15 | Broadway takes the burden of defining concurrent GenStage topologies and provides a simple configuration API that automatically defines concurrent producers, concurrent processing, batch handling, and more, leading to both time and cost efficient ingestion and processing of data. It features:
 16 | 
 17 |   * Back-pressure
 18 |   * Automatic acknowledgements at the end of the pipeline
 19 |   * Batching
 20 |   * Fault tolerance
 21 |   * Graceful shutdown
 22 |   * Built-in testing
 23 |   * Custom failure handling
 24 |   * Ordering and partitioning
 25 |   * Rate-limiting
 26 |   * Metrics
 27 | 
 28 | ### Producers
 29 | 
 30 | There are several producers that you can use to integrate with existing services and technologies. [See the docs for detailed how-tos and supported producers](https://hexdocs.pm/broadway/introduction.html#official-producers).
 31 | 
 32 | ## Installation
 33 | 
 34 | Add `:broadway` to the list of dependencies in `mix.exs`:
 35 | 
 36 | ```elixir
 37 | def deps do
 38 |   [
 39 |     {:broadway, "~> 1.0"}
 40 |   ]
 41 | end
 42 | ```
 43 | 
 44 | ## A quick example: SQS integration
 45 | 
 46 | Assuming you have added [`broadway_sqs`](https://github.com/dashbitco/broadway_sqs) as a dependency and configured your SQS credentials accordingly, you can consume Amazon SQS events in only 20 LOCs:
 47 | 
 48 | ```elixir
 49 | defmodule MyBroadway do
 50 |   use Broadway
 51 | 
 52 |   alias Broadway.Message
 53 | 
 54 |   def start_link(_opts) do
 55 |     Broadway.start_link(__MODULE__,
 56 |       name: __MODULE__,
 57 |       producer: [
 58 |         module: {BroadwaySQS.Producer, queue_url: "https://us-east-2.queue.amazonaws.com/100000000001/my_queue"}
 59 |       ],
 60 |       processors: [
 61 |         default: [concurrency: 50]
 62 |       ],
 63 |       batchers: [
 64 |         s3: [concurrency: 5, batch_size: 10, batch_timeout: 1000]
 65 |       ]
 66 |     )
 67 |   end
 68 | 
 69 |   def handle_message(_processor_name, message, _context) do
 70 |     message
 71 |     |> Message.update_data(&process_data/1)
 72 |     |> Message.put_batcher(:s3)
 73 |   end
 74 | 
 75 |   def handle_batch(:s3, messages, _batch_info, _context) do
 76 |     # Send batch of messages to S3
 77 |   end
 78 | 
 79 |   defp process_data(data) do
 80 |     # Do some calculations, generate a JSON representation, process images.
 81 |   end
 82 | end
 83 | ```
 84 | 
 85 | Once your Broadway module is defined, you just need to add it as a child of your application supervision tree as `{MyBroadway, []}`.
 86 | 
 87 | ## Comparison to Flow
 88 | 
 89 | You may also be interested in [Flow by Dashbit](https://github.com/dashbitco/flow). Both Broadway and Flow are built on top of GenStage. Flow is a more general abstraction than Broadway that focuses on data as a whole, providing features like aggregation, joins, windows, etc. Broadway focuses on events and on operational features, such as metrics, automatic acknowledgements, failure handling, and so on. Broadway is recommended for continuous, long-running pipelines. Flow works with short- and long-lived data processing.
 90 | 
 91 | ## License
 92 | 
 93 | Copyright 2019 Plataformatec\
 94 | Copyright 2020 Dashbit
 95 | 
 96 | Licensed under the Apache License, Version 2.0 (the "License");
 97 | you may not use this file except in compliance with the License.
 98 | You may obtain a copy of the License at
 99 | 
100 |     http://www.apache.org/licenses/LICENSE-2.0
101 | 
102 | Unless required by applicable law or agreed to in writing, software
103 | distributed under the License is distributed on an "AS IS" BASIS,
104 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
105 | See the License for the specific language governing permissions and
106 | limitations under the License.
107 | 


--------------------------------------------------------------------------------
/guides/examples/amazon-sqs.md:
--------------------------------------------------------------------------------
  1 | # Amazon SQS
  2 | 
  3 | Amazon Simple Queue Service (SQS) is a highly scalable distributed message
  4 | queuing service provided by Amazon.com. AWS SQS offers two types of message
  5 | queues:
  6 | 
  7 |   * Standard
  8 |     * Nearly unlimited throughput
  9 |     * Best-effort ordering
 10 |     * At-least-once delivery
 11 | 
 12 |   * FIFO
 13 |     * Limited number of transactions per second (TPS).
 14 |       See [Amazon SQS FIFO](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/FIFO-queues.html)
 15 |       developer guide for more information on limits.
 16 |     * Order in which messages are sent/received is strictly preserved
 17 |     * Exactly-once delivery
 18 | 
 19 | Broadway can work seamlessly with both, Standard and FIFO queues.
 20 | 
 21 | ## Getting Started
 22 | 
 23 | In order to use Broadway with SQS, we need to:
 24 | 
 25 |   1. Create a SQS queue (or use an existing one)
 26 |   1. Configure our Elixir project to use Broadway
 27 |   1. Define your pipeline configuration
 28 |   1. Implement Broadway callbacks
 29 |   1. Run the Broadway pipeline
 30 |   1. Tuning the configuration (Optional)
 31 | 
 32 | ## Create a SQS queue
 33 | 
 34 | Amazon provides a comprehensive [Step-by-step Guide](https://aws.amazon.com/getting-started/tutorials/send-messages-distributed-applications/)
 35 | on creating SQS queues. In case you don't have an AWS account and want to
 36 | test Broadway locally, use can easily install [ElasticMQ](https://github.com/softwaremill/elasticmq),
 37 | which is a message queue system that offers a SQS-compatible query interface.
 38 | 
 39 | ## Configure the project
 40 | 
 41 | In this guide we're going to use [BroadwaySQS](https://github.com/dashbitco/broadway_sqs),
 42 | which is a Broadway SQS Connector provided by [Dashbit](https://dashbit.co/).
 43 | 
 44 | ### Starting a new project
 45 | 
 46 | If you plan to start a new project, just run:
 47 | 
 48 |     $ mix new my_app --sup
 49 | 
 50 | The `--sup` flag instructs Elixir to generate an application with a supervision tree.
 51 | 
 52 | ### Setting up dependencies
 53 | 
 54 | Add `:broadway_sqs` to the list of dependencies in `mix.exs` along the HTTP
 55 | client of your choice (defaults to `:hackney`):
 56 | 
 57 |     def deps do
 58 |       [
 59 |         ...
 60 |         {:broadway_sqs, "~> 0.7"},
 61 |         {:hackney, "~> 1.9"},
 62 |       ]
 63 |     end
 64 | 
 65 | Don't forget to check for the latest version of dependencies.
 66 | 
 67 | ## Define the pipeline configuration
 68 | 
 69 | Broadway is a process-based behaviour and to define a Broadway
 70 | pipeline, we need to define three functions: `start_link/1`,
 71 | `handle_message/3` and `handle_batch/4`. We will cover `start_link/1`
 72 | in this section and the `handle_` callbacks in the next one.
 73 | 
 74 | Similar to other process-based behaviour, `start_link/1` simply
 75 | delegates to `Broadway.start_link/2`, which should define the
 76 | producers, processors, and batchers in the Broadway pipeline.
 77 | Assuming we want to consume messages from a queue called
 78 | `my_queue`, the minimal configuration would be:
 79 | 
 80 |     defmodule MyBroadway do
 81 |       use Broadway
 82 | 
 83 |       alias Broadway.Message
 84 | 
 85 |       def start_link(_opts) do
 86 |         Broadway.start_link(__MODULE__,
 87 |           name: __MODULE__,
 88 |           producer: [
 89 |             module: {BroadwaySQS.Producer,
 90 |                      queue_url: "https://us-east-2.queue.amazonaws.com/100000000001/my_queue"}
 91 |           ],
 92 |           processors: [
 93 |             default: []
 94 |           ],
 95 |           batchers: [
 96 |             default: [
 97 |               batch_size: 10,
 98 |               batch_timeout: 2000
 99 |             ]
100 |           ]
101 |         )
102 |       end
103 | 
104 |       ...callbacks...
105 |     end
106 | 
107 | The above configuration also assumes that you have the AWS credentials
108 | set up in your environment, for instance, by having the `AWS_ACCESS_KEY_ID`
109 | and `AWS_SECRET_ACCESS_KEY` environment variables set. If that's
110 | not the case, you will need to pass that information to the client so it
111 | can properly connect to the AWS servers. Here is how you can do it:
112 | 
113 |     ...
114 |     producer: [
115 |       module:
116 |         {BroadwaySQS.Producer,
117 |          queue_url: "https://us-east-2.queue.amazonaws.com/100000000001/my_queue",
118 |          config: [
119 |            access_key_id: "YOUR_AWS_ACCESS_KEY_ID",
120 |            secret_access_key: "YOUR_AWS_SECRET_ACCESS_KEY"
121 |          ]}
122 |     ]
123 |     ...
124 | 
125 | For a full list of options for `BroadwaySQS.Producer`, please see
126 | [BroadwaySQS](https://hexdocs.pm/broadway_sqs/) documentation.
127 | 
128 | For general information about setting up Broadway, see `Broadway`
129 | module docs as well as `Broadway.start_link/2`.
130 | 
131 | > Note: Even though batching is optional since Broadway v0.2, we recommend that all Amazon SQS
132 | > pipelines have at least a default batcher. This lets you control the exact batch
133 | > size and frequency that messages are acknowledged to Amazon SQS, often leading to
134 | > pipelines that are more cost and time efficient.
135 | 
136 | ## Implement Broadway callbacks
137 | 
138 | In order to process incoming messages, we need to implement the
139 | required callbacks. For the sake of simplicity, we're considering that
140 | all messages received from the queue are just numbers:
141 | 
142 |     defmodule MyBroadway do
143 |       use Broadway
144 | 
145 |       alias Broadway.Message
146 | 
147 |       ...start_link...
148 | 
149 |       @impl true
150 |       def handle_message(_, %Message{data: data} = message, _) do
151 |         message
152 |         |> Message.update_data(fn data -> data * data end)
153 |       end
154 | 
155 |       @impl true
156 |       def handle_batch(_, messages, _, _) do
157 |         list = messages |> Enum.map(fn e -> e.data end)
158 |         IO.inspect(list, label: "Got batch of finished jobs from processors, sending ACKs to SQS as a batch.")
159 |         messages
160 |       end
161 |     end
162 | 
163 | We are not doing anything fancy here, but it should be enough for our
164 | purpose. First we update the message's data individually inside
165 | `handle_message/3` and then we print each batch inside `handle_batch/4`.
166 | 
167 | For more information, see `c:Broadway.handle_message/3` and
168 | `c:Broadway.handle_batch/4`.
169 | 
170 | ## Run the Broadway pipeline
171 | 
172 | To run your `Broadway` pipeline, you just need to add as a child in
173 | a supervision tree. Most applications have a supervision tree defined
174 | at `lib/my_app/application.ex`. You can add Broadway as a child to a
175 | supervisor as follows:
176 | 
177 |     children = [
178 |       {MyBroadway, []}
179 |     ]
180 | 
181 |     Supervisor.start_link(children, strategy: :one_for_one)
182 | 
183 | Now the Broadway pipeline should be started when your application starts.
184 | Also, if your Broadway has any dependency (for example, it needs to talk
185 | to the database), make sure that Broadway is listed *after* its dependencies
186 | in the supervision tree.
187 | 
188 | ## Tuning the configuration
189 | 
190 | Some of the configuration options available for Broadway come already with a
191 | "reasonable" default value. However those values might not suit your
192 | requirements. Depending on the number of messages you get, how much processing
193 | they need and how much IO work is going to take place, you might need completely
194 | different values to optimize the flow of your pipeline. The `concurrency` option
195 | available for every set of producers, processors and batchers, among with
196 | `max_demand`, `batch_size`, and `batch_timeout` can give you a great deal
197 | of flexibility.
198 | 
199 | The `concurrency` option controls the concurrency level in each layer of
200 | the pipeline.
201 | See the notes on [`Producer concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-producer-concurrency)
202 | and [`Batcher concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-batcher-concurrency)
203 | for details.
204 | 
205 | Here's an example on how you could tune them according to
206 | your needs.
207 | 
208 |     defmodule MyBroadway do
209 |       use Broadway
210 | 
211 |       def start_link(_opts) do
212 |         Broadway.start_link(__MODULE__,
213 |           name: __MODULE__,
214 |           producer: [
215 |             ...
216 |             concurrency: 10,
217 |           ],
218 |           processors: [
219 |             default: [
220 |               concurrency: 100,
221 |               max_demand: 1,
222 |             ]
223 |           ],
224 |           batchers: [
225 |             default: [
226 |               batch_size: 10,
227 |               concurrency: 10,
228 |             ]
229 |           ]
230 |         )
231 |       end
232 | 
233 |       ...callbacks...
234 |     end
235 | 
236 | In order to get a good set of configurations for your pipeline, it's
237 | important to respect the limitations of the servers you're running,
238 | as well as the limitations of the services you're providing/consuming
239 | data to/from. Broadway comes with telemetry, so you can measure your
240 | pipeline and help ensure your changes are effective.
241 | 


--------------------------------------------------------------------------------
/guides/examples/apache-kafka.md:
--------------------------------------------------------------------------------
  1 | # Apache Kafka
  2 | 
  3 | Kafka is a distributed streaming platform that has three key capabilities:
  4 | 
  5 |   * Publish and subscribe to streams of records
  6 |   * Store streams of records in a fault-tolerant durable way
  7 |   * Process streams of records as they occur
  8 | 
  9 | ## Getting Started
 10 | 
 11 | In order to use Broadway with Kafka, we need to:
 12 | 
 13 |   1. Create a stream of records (or use an existing one)
 14 |   1. Configure your Elixir project to use Broadway
 15 |   1. Define your pipeline configuration
 16 |   1. Implement Broadway callbacks
 17 |   1. Run the Broadway pipeline
 18 | 
 19 | ## Create a stream of records (or use an existing one)
 20 | 
 21 | In case you don't have Kafka installed yet, please follow the instructions on Kafka's
 22 | [Quickstart](https://kafka.apache.org/quickstart) for a clean installation. After
 23 | initializing Kafka, you can create a new stream by running:
 24 | 
 25 |     $ kafka-topics --create --zookeeper localhost:2181 --partitions 3 --topic test
 26 | 
 27 | ## Configure your Elixir project to use Broadway
 28 | 
 29 | This guide describes the steps necessary to integrate Broadway with Kafka using
 30 | [BroadwayKafka](https://github.com/dashbitco/broadway_kafka),
 31 | which is a Broadway Kafka Connector provided by [Dashbit](https://dashbit.co/).
 32 | 
 33 | BroadwayKafka can subscribe to one or more topics and process streams of records
 34 | using Kafka's [Consumer API](https://kafka.apache.org/documentation.html#consumerapi).
 35 | 
 36 | Each GenStage producer initialized by BroadwayKafka will be available as a consumer,
 37 | all registered using the same self-labeled **consumer group**. Each record published to a
 38 | topic/partition will be delivered to one consumer instance within each consumer group.
 39 | 
 40 | Bear in mind that a topic/partition can be assigned to any consumer instance that has
 41 | been subscribed using the same consumer group, i.e, any Broadway instance or application
 42 | running on any machine connected to the Kafka cluster.
 43 | 
 44 | ### Starting a new project
 45 | 
 46 | Create a new project running:
 47 | 
 48 |     $ mix new my_app --sup
 49 | 
 50 | The `--sup` flag instructs Elixir to generate an application with a supervision tree.
 51 | 
 52 | ### Setting up dependencies
 53 | 
 54 | Add `:broadway_kafka` to the list of dependencies in `mix.exs`:
 55 | 
 56 |     def deps do
 57 |       [
 58 |         ...
 59 |         {:broadway_kafka, "~> 0.3"}
 60 |       ]
 61 |     end
 62 | 
 63 | Don't forget to check for the latest version of dependencies.
 64 | 
 65 | ## Define the pipeline configuration
 66 | 
 67 | Broadway is a process-based behaviour and to define a Broadway pipeline,
 68 | we need to define three functions: `start_link/1`, `handle_message/3`
 69 | and optionally `handle_batch/4`. We will cover `start_link/1` in this
 70 | section and the `handle_` callbacks in the next one.
 71 | 
 72 | Similar to other process-based behaviours, `start_link/1` simply
 73 | delegates to `Broadway.start_link/2`, which should define the
 74 | producers, processors, and batchers in the Broadway pipeline.
 75 | Assuming we want to consume messages from a topic called
 76 | `test`, one possible configuration would be:
 77 | 
 78 |     defmodule MyBroadway do
 79 |       use Broadway
 80 | 
 81 |       alias Broadway.Message
 82 | 
 83 |       def start_link(_opts) do
 84 |         Broadway.start_link(__MODULE__,
 85 |           name: __MODULE__,
 86 |           producer: [
 87 |             module:
 88 |               {BroadwayKafka.Producer,
 89 |                [
 90 |                  hosts: [localhost: 9092],
 91 |                  group_id: "group_1",
 92 |                  topics: ["test"]
 93 |                ]},
 94 |             concurrency: 1
 95 |           ],
 96 |           processors: [
 97 |             default: [
 98 |               concurrency: 10
 99 |             ]
100 |           ],
101 |           batchers: [
102 |             default: [
103 |               batch_size: 100,
104 |               batch_timeout: 200,
105 |               concurrency: 10
106 |             ]
107 |           ]
108 |         )
109 |       end
110 | 
111 |       ...callbacks...
112 |     end
113 | 
114 | > **Note**: Pipelines built on top of BroadwayKafka are automatically partitioned.
115 | So even though there are multiple processes (stages), these processes will preserve
116 | Kafka's ordering semantics when it comes to topics/partitions. Internally, this is
117 | achieved by making sure all messages from the same topic/partition will always be
118 | forwarded to the same processor and batch processor.
119 | 
120 | For a full list of options for `BroadwayKafka.Producer`, refer to the
121 | official [BroadwayKafka](https://hexdocs.pm/broadway_kafka/) documentation.
122 | 
123 | For general information about setting up Broadway, see `Broadway`
124 | module docs as well as `Broadway.start_link/2`.
125 | 
126 | ## Implement Broadway callbacks
127 | 
128 | In order to process incoming messages, we need to implement the
129 | required callbacks. For the sake of simplicity, we're considering that
130 | all messages received from the topic are just numbers:
131 | 
132 |     defmodule MyBroadway do
133 |       use Broadway
134 | 
135 |       alias Broadway.Message
136 | 
137 |       ...start_link...
138 | 
139 |       @impl true
140 |       def handle_message(_, message, _) do
141 |         message
142 |         |> Message.update_data(fn data -> {data, String.to_integer(data) * 2} end)
143 |       end
144 | 
145 |       @impl true
146 |       def handle_batch(_, messages, _, _) do
147 |         list = messages |> Enum.map(fn e -> e.data end)
148 |         IO.inspect(list, label: "Got batch")
149 |         messages
150 |       end
151 |     end
152 | 
153 | We are not doing anything fancy here, but it should be enough for our
154 | purpose. First, we update the message's data individually inside
155 | `handle_message/3` and then we print each batch inside `handle_batch/4`.
156 | 
157 | For more information, see `c:Broadway.handle_message/3` and
158 | `c:Broadway.handle_batch/4`.
159 | 
160 | > Note: Since Broadway v0.2, batching is optional. In case you don't need to
161 | > group messages as batches for further processing/publishing, you can remove
162 | > the `:batchers` configuration along with the `handle_batch/4` callback.
163 | 
164 | ## Run the Broadway pipeline
165 | 
166 | To run your `Broadway` pipeline, you just need to add as a child in
167 | a supervision tree. Most applications have a supervision tree defined
168 | at `lib/my_app/application.ex`. You can add Broadway as a child to a
169 | supervisor as follows:
170 | 
171 |     children = [
172 |       {MyBroadway, []}
173 |     ]
174 | 
175 |     Supervisor.start_link(children, strategy: :one_for_one)
176 | 
177 | Now the Broadway pipeline should be started when your application starts.
178 | Also, if your Broadway has any dependency (for example, it needs to talk
179 | to the database), make sure that Broadway is listed *after* its dependencies
180 | in the supervision tree.
181 | 
182 | You can now test your pipeline by entering an `iex` session:
183 | 
184 |     $ iex -S mix
185 | 
186 | If everything went fine, you should see lots of `info` log messages like this
187 | one coming from the `:brod` supervisors:
188 | 
189 |     15:14:04.356 [info]  [supervisor: {:local, :brod_sup}, started: [pid: #PID<0.251.0>, id: :test_client, mfargs: {:brod_client, :start_link, [[localhost: 9092], :test_client, []]}, restart_type: {:permanent, 10}, shutdown: 5000, child_type: :worker]]
190 | 
191 | [Brod](https://github.com/klarna/brod/) is the client that BroadwayKafka uses
192 | under the hood to communicate with Kafka.
193 | 
194 | ### Sending messages to Kafka
195 | 
196 | Finally, we can send some sample messages to Kafka using using `:brod` with the following snippet:
197 | 
198 |     topic = "test"
199 |     client_id = :my_client
200 |     hosts = [localhost: 9092]
201 | 
202 |     :ok = :brod.start_client(hosts, client_id, _client_config=[])
203 |     :ok = :brod.start_producer(client_id, topic, _producer_config = [])
204 | 
205 |     Enum.each(1..1000, fn i ->
206 |       partition = rem(i, 3)
207 |       :ok = :brod.produce_sync(client_id, topic, partition, _key="", "#{i}")
208 |     end)
209 | 
210 | You should see the output showing the generated batches:
211 | 
212 |     Got batch: [
213 |       {"2", 4},
214 |       {"5", 10},
215 |       {"8", 16},
216 |       {"11", 22},
217 |       {"14", 28},
218 |       ...
219 |     ]
220 |     Got batch: [
221 |       {"3", 6},
222 |       {"6", 12},
223 |       {"9", 18},
224 |       {"12", 24},
225 |       {"15", 30},
226 |       ...
227 |     ]
228 | 
229 | ## Tuning the configuration
230 | 
231 | Some of the configuration options available for Broadway come already with a
232 | "reasonable" default value. However, those values might not suit your
233 | requirements. Depending on the number of records you get, how much processing
234 | they need and how much IO work is going to take place, you might need completely
235 | different values to optimize the flow of your pipeline. The `concurrency` option
236 | available for every set of producers, processors and batchers, along with
237 | `batch_size` and `batch_timeout` can give you a great deal of flexibility.
238 | See the notes on [`Producer concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-producer-concurrency)
239 | and [`Batcher concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-batcher-concurrency)
240 | for details.
241 | 
242 | By setting the `concurrency` option, you define the number of concurrent processes
243 | that will be started by Broadway, allowing you to have full control over the
244 | concurrency level in each layer of the pipeline. Keep in mind that since the
245 | concurrency model provided by **Kafka** is based on **partitioning**, in order to take
246 | full advantage of this model, you need to set the `concurrency` option for
247 | your processors and batchers accordingly. Having less concurrency than topic/partitions
248 | assigned will result in individual processors handling more than one partition,
249 | decreasing the overall level of concurrency. Therefore, if you want to always be able
250 | to process messages at maximum concurrency (assuming you have enough resources to do it),
251 | you should increase the concurrency up front to make sure you have enough processors to
252 | handle the extra records received from new partitions assigned.
253 | 
254 | > **Note**: Even if you don't plan to add more partitions to a Kafka topic, your pipeline
255 | can still receive more assignments than planned. For instance, if another consumer crashes,
256 | the server will reassign all its topic/partition to other available consumers, including
257 | any Broadway producer subscribed to the same topic.
258 | 
259 | There are other options that you may want to take a closer look when tuning your configuration.
260 | The `:max_bytes` option, for instance, belongs to the `:fetch_config` group and defines the
261 | maximum amount of data to be fetched at a time from a single partition. The default is
262 | 1048576 (1 MiB). Setting greater values can improve throughput at the cost of more
263 | memory consumption. For more information and other fetch options, please refer to the
264 | "Fetch config options" in the official [BroadwayKafka](https://hexdocs.pm/broadway_kafka/)
265 | documentation.
266 | 
267 | Other two important options are `:offset_commit_interval_seconds` and `:offset_commit_on_ack`.
268 | Both belong to the main configuration and they can make a huge impact on performance.
269 | 
270 | The `:offset_commit_interval_seconds` defines the time interval between two
271 | OffsetCommitRequest messages. The default is 5s.
272 | 
273 | The `:offset_commit_on_ack`, when set to `true`, tells Broadway to send an
274 | OffsetCommitRequest immediately after each acknowledgement, bypassing any
275 | interval defined in `:offset_commit_interval_seconds`. Setting this option to
276 | `false` can increase performance since any commit requests will start respecting
277 | the `:offset_commit_interval_seconds` option. This will usually result in fewer
278 | requests to be sent to the server. However, setting long commit intervals might
279 | lead to a large number of duplicated records to be processed after a server
280 | restart or connection loss. Since it is always possible that duplicate messages
281 | will be received by consumers, make sure your logic is idempotent when consuming
282 | records to avoid inconsistencies. Also, bear in mind that the negative
283 | performance impact might be insignificant if you're using batchers since only
284 | one commit request will be performed per batch. As a basic rule, always take
285 | into account the values of `batch_size` and `batch_timeout` whenever you're
286 | tuning `:offset_commit_interval_seconds` and `:offset_commit_on_ack`.
287 | 
288 | ## Handling failed messages
289 | 
290 | `broadway_kafka` never stops the flow of the stream, i.e. it will **always ack** the messages
291 | even when they fail. Unlike queue-based connectors, where you can mark a single message as failed.
292 | In Kafka that's not possible due to its single offset per topic/partition ack strategy. If you
293 | want to reprocess failed messages, you need to roll your own strategy. A possible way to do that
294 | is to implement `handle_failed/2` and send failed messages to a separated stream or queue for
295 | later processing.
296 | 


--------------------------------------------------------------------------------
/guides/examples/custom-producers.md:
--------------------------------------------------------------------------------
 1 | # Custom Producers
 2 | 
 3 | If you want to use Broadway but there is no existing Broadway producer
 4 | for the technology of your choice, you can integrate any existing GenStage
 5 | producer into the pipeline with relative ease.
 6 | 
 7 | ## Example
 8 | 
 9 | In general, producers must generate `%Broadway.Message{}` structs in order
10 | to be processed by Broadway. In case you need to use an existing GenStage
11 | producer and you don't want to change its original implementation,
12 | you'll have to set the producer's `:transformer` option to translate the
13 | generated events into Broadway messages.
14 | 
15 | In the following example the producer is a regular GenStage, i.e., it
16 | produces plain events that cannot be processed by Broadway directly:
17 | 
18 |     defmodule Counter do
19 |       use GenStage
20 | 
21 |       def start_link(number) do
22 |         GenStage.start_link(Counter, number)
23 |       end
24 | 
25 |       def init(counter) do
26 |         {:producer, counter}
27 |       end
28 | 
29 |       def handle_demand(demand, counter) when demand > 0 do
30 |         events = Enum.to_list(counter..counter+demand-1)
31 |         {:noreply, events, counter + demand}
32 |       end
33 |     end
34 | 
35 | By using a transformer, you can tell Broadway to transform all events
36 | generated by the producer into proper Broadway messages:
37 | 
38 |     defmodule MyBroadway do
39 |       use Broadway
40 | 
41 |       @behaviour Broadway.Acknowledger
42 | 
43 |       alias Broadway.Message
44 | 
45 |       def start_link(_opts) do
46 |         Broadway.start_link(__MODULE__,
47 |           name: __MODULE__,
48 |           producer: [
49 |             module: {Counter, 1},
50 |             transformer: {__MODULE__, :transform, []}
51 |           ],
52 |           processors: [
53 |             default: [concurrency: 10]
54 |           ],
55 |           batchers: [
56 |             default: [concurrency: 2, batch_size: 5],
57 |           ]
58 |         )
59 |       end
60 | 
61 |       ...callbacks...
62 | 
63 |       def transform(event, _opts) do
64 |         %Message{
65 |           data: event,
66 |           acknowledger: {__MODULE__, :ack_id, :ack_data}
67 |         }
68 |       end
69 | 
70 |       @impl Broadway.Acknowledger
71 |       def ack(:ack_id, successful, failed) do
72 |         # Write ack code here
73 |         :ok
74 |       end
75 |     end
76 | 
77 | Notice that you need to pass two options to the producer:
78 | 
79 |   * `:module` - a tuple representing the GenStage producer as `{mod, arg}`.
80 |     Where `mod` is module that implements the GenStage behaviour and `arg`
81 |     the argument that will be given to the `init` callback of the GenStage.
82 |     It is very important to note that Broadway **will not call** the
83 |     `child_spec/1` or `start_link/1` function of the producer. That's
84 |     because Broadway wraps the producer to augment it with extra features.
85 | 
86 |   * `:transformer` - a module-function-args tuple that will be invoked
87 |     inside the producer, for every producer message, that should create
88 |     a `Broadway.Message` struct with the `data` and `acknowledger` fields.
89 | 
90 | See the `Broadway.Acknowledger` module for more information on defining
91 | and setting up acknowledgements.
92 | 


--------------------------------------------------------------------------------
/guides/examples/google-cloud-pubsub.md:
--------------------------------------------------------------------------------
  1 | # Google Cloud Pub/Sub
  2 | 
  3 | Cloud Pub/Sub is a fully-managed real-time messaging service provided by Google.
  4 | 
  5 | ## Getting Started
  6 | 
  7 | In order to use Broadway with Cloud Pub/Sub you need to:
  8 | 
  9 |   1. Setup a Cloud Pub/Sub project
 10 |   1. Configure your Elixir project to use Broadway
 11 |   1. Define your pipeline configuration
 12 |   1. Implement Broadway callbacks
 13 |   1. Run the Broadway pipeline
 14 |   1. Tune the configuration (Optional)
 15 | 
 16 | If you are just getting familiar with Google Pub/Sub, refer to [the documentation](https://cloud.google.com/pubsub/docs/)
 17 | to get started. Instead of testing against a live environment, you may also consider using the
 18 | [emulator](https://cloud.google.com/pubsub/docs/emulator) to simulate integrating with Cloud
 19 | Pub/Sub.
 20 | 
 21 | If you have an existing project, topic, subscription, and credentials, you can skip [step
 22 | 1](#setup-cloud-pub-sub-project) and jump to [Configure the project](#configure-the-project)
 23 | section.
 24 | 
 25 | ## Setup Cloud Pub/Sub project
 26 | 
 27 | In this tutorial we'll use the [`gcloud`](https://cloud.google.com/sdk/gcloud/) command-line tool
 28 | to set everything up in Google Cloud. Alternatively, you can roughly follow this guide by using
 29 | [Cloud Console](https://console.cloud.google.com).
 30 | 
 31 | To install `gcloud` follow the [documentation](https://cloud.google.com/sdk/gcloud/). If you are
 32 | on macOS you may consider installing it with Homebrew:
 33 | 
 34 |     $ brew install --cask google-cloud-sdk
 35 | 
 36 | Now, authenticate the CLI:
 37 | 
 38 |     $ gcloud auth login
 39 | 
 40 | Then, create a new project:
 41 | 
 42 |     $ gcloud projects create test-pubsub
 43 | 
 44 | A new topic:
 45 | 
 46 |     $ gcloud pubsub topics create test-topic --project test-pubsub
 47 |     Created topic [projects/test-pubsub/topics/test-topic].
 48 | 
 49 | > Note: If you run this command immediately after creating a new Google Cloud project, you may receive an error indicating that your project's organization policy is still being provisioned. Just wait a couple minutes and try again.
 50 | 
 51 | And a new subscription:
 52 | 
 53 |     $ gcloud pubsub subscriptions create test-subscription --project test-pubsub --topic test-topic
 54 |     Created subscription [projects/test-pubsub/subscriptions/test-subscription].
 55 | 
 56 | We also need a [service account](https://cloud.google.com/iam/docs/service-accounts), an IAM
 57 | policy, as well as API credentials in order to programmatically work with the service. First, let's
 58 | create the service account:
 59 | 
 60 |     $ gcloud iam service-accounts create test-account --project test-pubsub
 61 |     Created service account [test-account].
 62 | 
 63 | Then the policy. For simplicity we add the general role `roles/editor`, but make sure to
 64 | examine the [available roles](https://cloud.google.com/iam/docs/understanding-roles#pubsub-roles)
 65 | and choose the one that best suits your use case:
 66 | 
 67 |     $ gcloud projects add-iam-policy-binding test-pubsub \
 68 |         --member serviceAccount:test-account@test-pubsub.iam.gserviceaccount.com \
 69 |         --role roles/editor
 70 |     Updated IAM policy for project [test-pubsub].
 71 |     (...)
 72 | 
 73 | And now the credentials:
 74 | 
 75 |     $ gcloud iam service-accounts keys create credentials.json --iam-account=test-account@test-pubsub.iam.gserviceaccount.com
 76 |     created key [xxx] of type [json] as [key] for [test-account@test-pubsub.iam.gserviceaccount.com]
 77 | 
 78 | This command generated a `credentials.json` file which will be useful later. Note, the IAM account
 79 | pattern is `<account>@<project>.iam.gserviceaccount.com`. Run `gcloud iam service-accounts list --project test-pubsub`
 80 | to see all service accounts associated with the given project.
 81 | 
 82 | Finally, we need to enable Pub/Sub for our project:
 83 | 
 84 |     $ gcloud services enable pubsub --project test-pubsub
 85 |     Operation "operations/xxx" finished successfully.
 86 | 
 87 | ## Configure the project
 88 | 
 89 | In this guide we're going to use [BroadwayCloudPubSub](https://github.com/dashbitco/broadway_cloud_pub_sub),
 90 | which is a Broadway Cloud Pub/Sub Connector provided by [Dashbit](https://dashbit.co/).
 91 | 
 92 | ### Starting a new project
 93 | 
 94 | If you plan to start a new project, just run:
 95 | 
 96 |     $ mix new my_app --sup
 97 | 
 98 | The `--sup` flag instructs Elixir to generate an application with a supervision tree.
 99 | 
100 | ### Setting up dependencies
101 | 
102 | Add `:broadway_cloud_pub_sub` to the list of dependencies in `mix.exs`, along with the Google
103 | Cloud authentication library of your choice (defaults to `:goth`):
104 | 
105 |     defp deps() do
106 |       [
107 |         ...
108 |         {:broadway_cloud_pub_sub, "~> 0.7"},
109 |         {:goth, "~> 1.0"}
110 |       ]
111 |     end
112 | 
113 | Don't forget to check for the latest version of dependencies.
114 | 
115 | ## Define the pipeline configuration
116 | 
117 | Broadway is a process-based behaviour and to define a Broadway pipeline, we need to define three
118 | functions: `start_link/1`, `handle_message/3` and `handle_batch/4`. We will cover `start_link/1`
119 | in this section and the `handle_` callbacks in the next one.
120 | 
121 | Similar to other process-based behaviour, `start_link/1` simply delegates to
122 | `Broadway.start_link/2`, which should define the producers, processors, and batchers in the
123 | Broadway pipeline. Assuming we want to consume messages from the `test-subscription`, the minimal
124 | configuration would be:
125 | 
126 |     defmodule MyBroadway do
127 |       use Broadway
128 | 
129 |       alias Broadway.Message
130 | 
131 |       def start_link(_opts) do
132 |         Broadway.start_link(__MODULE__,
133 |           name: __MODULE__,
134 |           producer: [
135 |             module:
136 |               {BroadwayCloudPubSub.Producer,
137 |                subscription: "projects/test-pubsub/subscriptions/test-subscription"}
138 |           ],
139 |           processors: [
140 |             default: []
141 |           ],
142 |           batchers: [
143 |             default: [
144 |               batch_size: 10,
145 |               batch_timeout: 2_000
146 |             ]
147 |           ]
148 |         )
149 |       end
150 | 
151 |       ...callbacks...
152 |     end
153 | 
154 | For a full list of options for `BroadwayCloudPubSub.Producer`, please see [the
155 | documentation](https://hexdocs.pm/broadway_cloud_pub_sub).
156 | 
157 | For general information about setting up Broadway, see `Broadway` module docs as well as
158 | `Broadway.start_link/2`.
159 | 
160 | > Note: Even though batching is optional since Broadway v0.2, we recommend all Cloud Pub/Sub
161 | > pipelines to have at least a default batcher, as that allows you to control the exact batch
162 | > size and frequency that messages are acknowledged to Cloud Pub/Sub, which often leads to
163 | > pipelines that are more cost and time efficient.
164 | 
165 | ## Implement Broadway callbacks
166 | 
167 | In order to process incoming messages, we need to implement the required callbacks. For the sake
168 | of simplicity, we're considering that all messages received from the queue are strings and our
169 | processor calls `String.upcase/1` on them:
170 | 
171 |     defmodule MyBroadway do
172 |       use Broadway
173 | 
174 |       alias Broadway.Message
175 | 
176 |       ...start_link...
177 | 
178 |       def handle_message(_, %Message{data: data} = message, _) do
179 |         message
180 |         |> Message.update_data(fn data -> String.upcase(data) end)
181 |       end
182 | 
183 |       def handle_batch(_, messages, _, _) do
184 |         list = messages |> Enum.map(fn e -> e.data end)
185 |         IO.inspect(list, label: "Got batch of finished jobs from processors, sending ACKs to Pub/Sub as a batch.")
186 |         messages
187 |       end
188 |     end
189 | 
190 | We are not doing anything fancy here, but it should be enough for our purpose. First we update the
191 | message's data individually inside `handle_message/3` and then we print each batch inside
192 | `handle_batch/4`.
193 | 
194 | For more information, see `c:Broadway.handle_message/3` and `c:Broadway.handle_batch/4`.
195 | 
196 | ## Run the Broadway pipeline
197 | 
198 | To run your `Broadway` pipeline, you need to add it as a child in a supervision tree. Most
199 | applications have a supervision tree defined at `lib/my_app/application.ex`. You can add Broadway
200 | as a child to a supervisor as follows:
201 | 
202 |     children = [
203 |       {MyBroadway, []}
204 |     ]
205 | 
206 |     Supervisor.start_link(children, strategy: :one_for_one)
207 | 
208 | The final step is to configure credentials. You can set the following environment variable:
209 | 
210 |     export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json
211 | 
212 | See [Goth](https://github.com/peburrows/goth) documentation for alternative ways of authenticating
213 | with the API.
214 | 
215 | Now the Broadway pipeline should be started when your application starts. Also, if your Broadway
216 | pipeline has any dependency (for example, it needs to talk to the database), make sure that
217 | it is listed *after* its dependencies in the supervision tree.
218 | 
219 | If you followed the previous section about setting the project with `gcloud`, you can now test the
220 | the pipeline. In one terminal tab start the application:
221 | 
222 |     $ iex -S mix
223 | 
224 | And in another tab, send a couple of test messages to Pub/Sub:
225 | 
226 |     $ gcloud pubsub topics publish  projects/test-pubsub/topics/test-topic --message "test 1"
227 |     messageIds:
228 |     - '651428033718119'
229 | 
230 |     gcloud pubsub topics publish  projects/test-pubsub/topics/test-topic --message "test 2"
231 |     messageIds:
232 |     - '651427034966696'
233 | 
234 | Now, In the first tab, you should see output similar to:
235 | 
236 | ```
237 | Got batch of finished jobs from processors, sending ACKs to Pub/Sub as a batch.: ["TEST 1", "TEST 2"]
238 | ```
239 | 
240 | ## Tuning the configuration
241 | 
242 | Some of the configuration options available for Broadway come already with a
243 | "reasonable" default value. However those values might not suit your
244 | requirements. Depending on the number of messages you get, how much processing
245 | they need and how much IO work is going to take place, you might need completely
246 | different values to optimize the flow of your pipeline. The `concurrency` option
247 | available for every set of producers, processors and batchers, among with
248 | `max_demand`, `batch_size`, and `batch_timeout` can give you a great deal
249 | of flexibility.
250 | 
251 | The `concurrency` option controls the concurrency level in each layer of
252 | the pipeline.
253 | See the notes on [`Producer concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-producer-concurrency)
254 | and [`Batcher concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-batcher-concurrency)
255 | for details.
256 | 
257 | Here's an example on how you could tune them according to
258 | your needs.
259 | 
260 |     defmodule MyBroadway do
261 |       use Broadway
262 | 
263 |       def start_link(_opts) do
264 |         Broadway.start_link(__MODULE__,
265 |           name: __MODULE__,
266 |           producer: [
267 |             ...
268 |             concurrency: 10,
269 |           ],
270 |           processors: [
271 |             default: [
272 |               concurrency: 100,
273 |               max_demand: 1,
274 |             ]
275 |           ],
276 |           batchers: [
277 |             default: [
278 |               batch_size: 10,
279 |               concurrency: 10,
280 |             ]
281 |           ]
282 |         )
283 |       end
284 | 
285 |       ...callbacks...
286 |     end
287 | 
288 | In order to get a good set of configurations for your pipeline, it's
289 | important to respect the limitations of the servers you're running,
290 | as well as the limitations of the services you're providing/consuming
291 | data to/from. Broadway comes with telemetry, so you can measure your
292 | pipeline and help ensure your changes are effective.
293 | 


--------------------------------------------------------------------------------
/guides/examples/introduction.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | `Broadway` is a library for building concurrent and multi-stage data ingestion and data processing pipelines with Elixir. Broadway pipelines are concurrent and robust, thanks to the Erlang VM and its actors. It features:
 4 | 
 5 |   * Back-pressure
 6 |   * Automatic acknowledgements at the end of the pipeline
 7 |   * Batching
 8 |   * Fault tolerance
 9 |   * Graceful shutdown
10 |   * Built-in testing
11 |   * Custom failure handling
12 |   * Ordering and partitioning
13 |   * Rate-limiting
14 |   * Metrics
15 | 
16 | ## Official Producers
17 | 
18 | Currently we officially support four Broadway producers:
19 | 
20 |   * Amazon SQS: [Source](https://github.com/dashbitco/broadway_sqs) - [Guide](amazon-sqs.md)
21 |   * Apache Kafka: [Source](https://github.com/dashbitco/broadway_kafka) - [Guide](apache-kafka.md)
22 |   * Google Cloud Pub/Sub: [Source](https://github.com/dashbitco/broadway_cloud_pub_sub) - [Guide](google-cloud-pubsub.md)
23 |   * RabbitMQ: [Source](https://github.com/dashbitco/broadway_rabbitmq) - [Guide](rabbitmq.md)
24 | 
25 | The guides links above will help you get started with your adapter of choice. For API reference, you can check out the `Broadway` module.
26 | 
27 | ## Non-official (Off-Broadway) Producers
28 | 
29 | For those interested in rolling their own Broadway Producers (which we actively encourage!), we recommend using the `OffBroadway` namespace, mirroring the [Off-Broadway theaters](https://en.wikipedia.org/wiki/Off-Broadway). For example, if you want to publish your own integration with Amazon SQS, you can package it as `off_broadway_sqs`, which uses the `OffBroadway.SQS` namespace.
30 | 
31 | The following Off-Broadway libraries are available (feel free to send a PR adding your own in alphabetical order):
32 | 
33 |   * [off_broadway_amqp10](https://github.com/highmobility/off_broadway_amqp10): [Guide](https://hexdocs.pm/off_broadway_amqp10/)
34 |   * [off_broadway_elasticsearch](https://github.com/jonlunsford/off_broadway_elasticsearch): [Guide](https://hexdocs.pm/off_broadway_elasticsearch/)
35 |   * [off_broadway_kafka](https://github.com/bbalser/off_broadway_kafka): [Guide](https://hexdocs.pm/off_broadway_kafka/)
36 |   * [off_broadway_memory](https://github.com/elliotekj/off_broadway_memory): [Guide](https://hexdocs.pm/off_broadway_memory/)
37 |   * [off_broadway_redis](https://github.com/amokan/off_broadway_redis): [Guide](https://hexdocs.pm/off_broadway_redis/)
38 |   * [off_broadway_redis_stream](https://github.com/akash-akya/off_broadway_redis_stream): [Guide](https://hexdocs.pm/off_broadway_redis_stream/)
39 |   * [off_broadway_splunk](https://github.com/Intility/off_broadway_splunk): [Guide](https://hexdocs.pm/off_broadway_splunk/)
40 | 


--------------------------------------------------------------------------------
/guides/examples/rabbitmq.md:
--------------------------------------------------------------------------------
  1 | # RabbitMQ
  2 | 
  3 | RabbitMQ is an open source message broker designed to be highly scalable and
  4 | distributed. It supports multiple protocols including the Advanced Message
  5 | Queuing Protocol (AMQP).
  6 | 
  7 | ## Getting Started
  8 | 
  9 | In order to use Broadway with RabbitMQ, we need to:
 10 | 
 11 |   1. [Create a queue](#create-a-queue) (or use an existing one)
 12 |   1. [Configure our Elixir project to use Broadway](#configure-the-project)
 13 |   1. [Define your pipeline configuration](#define-the-pipeline-configuration)
 14 |   1. [Implement Broadway callbacks](#implement-broadway-callbacks)
 15 |   1. [Run the Broadway pipeline](#run-the-broadway-pipeline)
 16 |   1. [Tuning the configuration](#tuning-the-configuration) (Optional)
 17 | 
 18 | In case you want to work with an existing queue, you can skip [step 1](#create-a-queue)
 19 | and jump to [Configure the project](#configure-the-project).
 20 | 
 21 | > Note: `BroadwayRabbitMQ` does not automatically create any queue. If you
 22 | configure a pipeline with a non-existent queue, the producers will crash,
 23 | bringing down the pipeline.
 24 | 
 25 | ## Create a queue
 26 | 
 27 | RabbitMQ runs on many operating systems. Please see
 28 | [Downloading and Installing RabbitMQ](https://www.rabbitmq.com/download.html) for
 29 | further information. Also, make sure you have the
 30 | [Management](https://www.rabbitmq.com/management.html) plugin enabled, which ships
 31 | with the command line tool, `rabbitmqadmin`.
 32 | 
 33 | After successfully installing RabbitMQ, you can declare a new queue with the
 34 | following command:
 35 | 
 36 |     $ rabbitmqadmin declare queue name=my_queue durable=true
 37 | 
 38 | You can list all declared queues to see our the one we've just created:
 39 | 
 40 |     $ rabbitmqctl list_queues
 41 |     Timeout: 60.0 seconds ...
 42 |     Listing queues for vhost / ...
 43 |     name      messages
 44 |     my_queue  0
 45 | 
 46 | ## Configure the project
 47 | 
 48 | In this guide, we're going to use [BroadwayRabbitMQ](https://github.com/dashbitco/broadway_rabbitmq),
 49 | which is a Broadway RabbitMQ Connector provided by [Dashbit](https://dashbit.co/).
 50 | 
 51 | ### Starting a new project
 52 | 
 53 | If you're creating a new project, run:
 54 | 
 55 |     $ mix new my_app --sup
 56 | 
 57 | The `--sup` flag instructs Elixir to generate an application with a supervision tree.
 58 | 
 59 | ### Setting up dependencies
 60 | 
 61 | Add `:broadway_rabbitmq` to the list of dependencies in `mix.exs`:
 62 | 
 63 |     def deps do
 64 |       [
 65 |         ...
 66 |         {:broadway_rabbitmq, "~> 0.7"},
 67 |       ]
 68 |     end
 69 | 
 70 | Don't forget to check for the latest version of dependencies.
 71 | 
 72 | ## Define the pipeline configuration
 73 | 
 74 | Broadway is a process-based behaviour and to define a Broadway pipeline,
 75 | we need to define three functions: `start_link/1`, `handle_message/3`
 76 | and optionally `handle_batch/4`. We will cover `start_link/1` in this
 77 | section and the `handle_` callbacks in the next one.
 78 | 
 79 | Similar to other process-based behaviours, `start_link/1` simply
 80 | delegates to `Broadway.start_link/2`, which should define the
 81 | producers, processors, and batchers in the Broadway pipeline.
 82 | Assuming we want to consume messages from a queue called
 83 | `my_queue`, one possible configuration would be:
 84 | 
 85 |     defmodule MyBroadway do
 86 |       use Broadway
 87 | 
 88 |       alias Broadway.Message
 89 | 
 90 |       def start_link(_opts) do
 91 |         Broadway.start_link(__MODULE__,
 92 |           name: MyBroadway,
 93 |           producer: [
 94 |             module: {BroadwayRabbitMQ.Producer,
 95 |               queue: "my_queue",
 96 |               qos: [
 97 |                 prefetch_count: 50,
 98 |               ]
 99 |             },
100 |             concurrency: 1
101 |           ],
102 |           processors: [
103 |             default: [
104 |               concurrency: 50
105 |             ]
106 |           ],
107 |           batchers: [
108 |             default: [
109 |               batch_size: 10,
110 |               batch_timeout: 1500,
111 |               concurrency: 5
112 |             ]
113 |           ]
114 |         )
115 |       end
116 | 
117 |       ...callbacks...
118 |     end
119 | 
120 | If you're consuming data from an existing broker that requires authorization,
121 | you'll need to provide your credentials using the `connection` option:
122 | 
123 |     ...
124 |     producer: [
125 |       module: {BroadwayRabbitMQ.Producer,
126 |         queue: "my_queue",
127 |         connection: [
128 |           username: "user",
129 |           password: "password",
130 |         ]
131 |         ...
132 |       }
133 |     ]
134 |     ...
135 | 
136 | For the full list of `connection` options, please see
137 | [`AMQP.Connection.open/1`](https://hexdocs.pm/amqp/1.1.1/AMQP.Connection.html#open/1)
138 | 
139 | For general information about setting up Broadway, see `Broadway`
140 | module docs as well as `Broadway.start_link/2`.
141 | 
142 | ## Implement Broadway callbacks
143 | 
144 | In order to process incoming messages, we need to implement the
145 | required callbacks. For the sake of simplicity, we're considering that
146 | all messages received from the queue are just numbers:
147 | 
148 |     defmodule MyBroadway do
149 |       use Broadway
150 | 
151 |       alias Broadway.Message
152 | 
153 |       ...start_link...
154 | 
155 |       @impl true
156 |       def handle_message(_, message, _) do
157 |         message
158 |         |> Message.update_data(fn data -> {data, String.to_integer(data) * 2} end)
159 |       end
160 | 
161 |       @impl true
162 |       def handle_batch(_, messages, _, _) do
163 |         list = messages |> Enum.map(fn e -> e.data end)
164 |         IO.inspect(list, label: "Got batch")
165 |         messages
166 |       end
167 |     end
168 | 
169 | We are not doing anything fancy here, but it should be enough for our
170 | purpose. First, we update the message's data individually inside
171 | `handle_message/3` and then we print each batch inside `handle_batch/4`.
172 | 
173 | For more information, see `c:Broadway.handle_message/3` and
174 | `c:Broadway.handle_batch/4`.
175 | 
176 | > Note: Since Broadway v0.2, batching is optional. In case you don't need to
177 | > group messages as batches for further processing/publishing, you can remove
178 | > the `:batchers` configuration along with the `handle_batch/4` callback. This
179 | > is perfectly fine for RabbitMQ, where messages are acknowledged individually
180 | > and never as a batch.
181 | 
182 | ## Run the Broadway pipeline
183 | 
184 | To run your `Broadway` pipeline, you just need to add as a child in
185 | a supervision tree. Most applications have a supervision tree defined
186 | at `lib/my_app/application.ex`. You can add Broadway as a child to a
187 | supervisor as follows:
188 | 
189 |     children = [
190 |       {MyBroadway, []}
191 |     ]
192 | 
193 |     Supervisor.start_link(children, strategy: :one_for_one)
194 | 
195 | Now the Broadway pipeline should be started when your application starts.
196 | Also, if your Broadway has any dependency (for example, it needs to talk
197 | to the database), make sure that Broadway is listed *after* its dependencies
198 | in the supervision tree.
199 | 
200 | You can now test your pipeline by entering an `iex` session:
201 | 
202 |     $ iex -S mix
203 | 
204 | If everything went fine, you should see lots of `info` log messages from the `amqp`
205 | supervisors. If you think that's too verbose and want to do something
206 | about it, please take a look at the _"Log related to amqp supervisors are too verbose"_
207 | subsection in the `amqp`'s  [Troubleshooting](https://hexdocs.pm/amqp/readme.html#troubleshooting)
208 | documentation.
209 | 
210 | Finally, let's generate some sample messages to be consumed by Broadway with the
211 | following code:
212 | 
213 |     {:ok, connection} = AMQP.Connection.open
214 |     {:ok, channel} = AMQP.Channel.open(connection)
215 |     AMQP.Queue.declare(channel, "my_queue", durable: true)
216 | 
217 |     Enum.each(1..5000, fn i ->
218 |       AMQP.Basic.publish(channel, "", "my_queue", "#{i}")
219 |     end)
220 |     AMQP.Connection.close(connection)
221 | 
222 | You should see the output showing the generated batches:
223 | 
224 |     Got batch: [
225 |       {"7", 14},
226 |       {"5", 10},
227 |       {"8", 16},
228 |       {"98", 196},
229 |       {"6", 12},
230 |       {"97", 194},
231 |       {"9", 18},
232 |       {"99", 198},
233 |       {"10", 20},
234 |       {"100", 200}
235 |     ]
236 |     Got batch: [
237 |       {"29", 58},
238 |       {"32", 64},
239 |       ...
240 |     ]
241 | 
242 | ## Tuning the configuration
243 | 
244 | Some of the configuration options available for Broadway come already with a
245 | "reasonable" default value. However, those values might not suit your
246 | requirements. Depending on the number of messages you get, how much processing
247 | they need and how much IO work is going to take place, you might need completely
248 | different values to optimize the flow of your pipeline. The `concurrency` option
249 | available for every set of producers, processors and batchers, among with
250 | `max_demand`, `batch_size`, and `batch_timeout` can give you a great deal
251 | of flexibility. The `concurrency` option controls the concurrency level in
252 | each layer of the pipeline.
253 | See the notes on [`Producer concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-producer-concurrency)
254 | and [`Batcher concurrency`](https://hexdocs.pm/broadway/Broadway.html#module-batcher-concurrency)
255 | for details.
256 | 
257 | Another important option to take into account is the `:prefetch_count`.
258 | RabbitMQ will continually push new messages to Broadway as it receives them.
259 | The `:prefetch_count` setting provides back-pressure by instructing RabbitMQ to [limit the number of unacknowledged messages a consumer will have at a given moment](https://www.rabbitmq.com/consumer-prefetch.html).
260 | See the ["Back-pressure and :prefetch_count"](https://hexdocs.pm/broadway_rabbitmq/BroadwayRabbitMQ.Producer.html#module-back-pressure-and-prefetch_count)
261 | section of the `BroadwayRabbitMQ` documentation for details.
262 | 
263 | In order to get a good set of configurations for your pipeline, it's
264 | important to respect the limitations of the servers you're running,
265 | as well as the limitations of the services you're providing/consuming
266 | data to/from. Broadway comes with telemetry, so you can measure your
267 | pipeline and help ensure your changes are effective.
268 | 


--------------------------------------------------------------------------------
/guides/internals/architecture.md:
--------------------------------------------------------------------------------
  1 | # Architecture
  2 | 
  3 | Broadway's architecture is built on top of GenStage. That means we structure
  4 | our processing units as independent stages that are responsible for one
  5 | individual task in the pipeline. By implementing the `Broadway` behaviour,
  6 | we define a `GenServer` process that wraps a `Supervisor` to manage and
  7 | own our pipeline.
  8 | 
  9 | ## The pipeline model
 10 | 
 11 | ```asciidoc
 12 |                                        [producers]   <- pulls data from SQS, RabbitMQ, etc.
 13 |                                             |
 14 |                                             |   (demand dispatcher)
 15 |                                             |
 16 |    handle_message/3 and  ---------->   [processors]
 17 |    prepare_messages/2 run here             / \
 18 |                                           /   \   (partition dispatcher)
 19 |                                          /     \
 20 |                                    [batcher]   [batcher]   <- one for each batcher key
 21 |                                        |           |
 22 |                                        |           |   (demand dispatcher)
 23 |                                        |           |
 24 | handle_batch/4 runs here -> [batch processor][batch processor]
 25 | ```
 26 | 
 27 | ## Internal stages
 28 | 
 29 |   * `Broadway.Producer` - A wrapper around the actual producer defined by
 30 |     the user. It serves as the source of the pipeline.
 31 |   * `Broadway.Processor` - This is where messages are processed, e.g. do
 32 |     calculations, convert data into a custom json format etc. Here is where
 33 |     the code from `handle_message/3` runs.
 34 |   * `Broadway.Batcher` - Creates batches of messages based on the
 35 |     batcher's key. One batcher for each key will be created.
 36 |   * `Broadway.BatchProcessor` - This is where the code from `handle_batch/4` runs.
 37 | 
 38 | ## The supervision tree
 39 | 
 40 | Broadway was designed to always go back to a working state in case
 41 | of failures thanks to the use of supervisors. Our supervision tree
 42 | is designed as follows:
 43 | 
 44 | ```asciidoc
 45 |                                    [Broadway GenServer]
 46 |                                             |
 47 |                                             |
 48 |                                             |
 49 |                               [Broadway Pipeline Supervisor]
 50 |                              /    /   (:rest_for_one)   \    \
 51 |                            /     |                       |      \
 52 |                          /       |                       |         \
 53 |                        /         |                       |            \
 54 |                      /           |                       |               \
 55 |                    /             |                       |                  \
 56 |   [ProducerSupervisor]  [ProcessorSupervisor]   [BatchersSupervisor]    [Terminator]
 57 |     (:one_for_one)        (:one_for_all)           (:one_for_one)
 58 |          / \                    / \                /            \
 59 |         /   \                  /   \              /              \
 60 |        /     \                /     \            /                \
 61 |       /       \              /       \          /                  \
 62 | [Producer_1]  ...    [Processor_1]  ...  [BatcherSupervisor_1]     ...
 63 |                                             (:rest_for_one)
 64 |                                             /             \
 65 |                                            /               \
 66 |                                           /                 \
 67 |                                      [Batcher]   [BatchProcessorSupervisor]
 68 |                                                        (:one_for_all)
 69 |                                                         /           \
 70 |                                                        /             \
 71 |                                                       /               \
 72 |                                             [BatchProcessor_1]        ...
 73 | ```
 74 | 
 75 | 
 76 | Both `ProcessorSupervisor` and `BatchProcessorSupervisor` are set with
 77 | `max_restarts` to 0. The idea is that if any process fails, we want
 78 | to restart the rest of the tree. Since Broadway callbacks are
 79 | stateless, we can handle errors and provide reports without crashing
 80 | processes. This means that the supervision tree will only shutdown
 81 | in case of unforeseen errors in Broadway's implementation.
 82 | 
 83 | The only exception are the producers, which contain external code
 84 | and are expected to fail. If a producer crashes, it will be restarted
 85 | by its supervisor without cascading failures until its max restarts
 86 | is reached. Broadway automatically handles those failures by making
 87 | processors automatically resubscribe to producers in case of crashes.
 88 | 
 89 | ## Graceful shutdowns
 90 | 
 91 | The cascading failures aspect also provides safe semantics for graceful
 92 | shutdown. We know that either all processes are running OR they are all
 93 | being shutdown. Therefore, to gracefully shutdown the supervision tree,
 94 | a terminator process is activated, which starts the following steps:
 95 | 
 96 |   1. It notifies the first layer of processors that they should not
 97 |      resubscribe to producers once they exit
 98 | 
 99 |   2. It tells all producers to no longer accept demand, flush all
100 |      current events, and then shutdown
101 | 
102 |   3. It then monitors and waits for a confirmation message from batch
103 |      processors. At this point, the terminator is effectively blocking
104 |      the supervisor until all events have been processed
105 | 
106 | This triggers a cascade effect where processors notice all of its producers
107 | have been cancelled, causing them to flush their own events and cancels the
108 | stages downstream, and so on and so on. This happens until batch processors
109 | notice all of their producers have been cancelled, effectively notifying the
110 | terminator to shutdown, allowing the outer most supervisor to go on and fully
111 | terminate all stages, which at this point have flushed all events.
112 | 


--------------------------------------------------------------------------------
/lib/broadway/acknowledger.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Acknowledger do
  2 |   @moduledoc """
  3 |   A behaviour used to acknowledge that the received messages
  4 |   were successfully processed or failed.
  5 | 
  6 |   When implementing a new connector for Broadway, you should
  7 |   implement this behaviour and consider how the technology
  8 |   you're working with handles message acknowledgement.
  9 | 
 10 |   The `c:ack/3` callback must be implemented in order to notify
 11 |   the origin of the data that a message can be safely removed
 12 |   after been successfully processed and published. In case of
 13 |   failed messages or messages without acknowledgement, depending
 14 |   on the technology chosen, the messages can be either moved back
 15 |   in the queue or, alternatively, moved to a *dead-letter queue*.
 16 |   """
 17 | 
 18 |   alias Broadway.Message
 19 | 
 20 |   require Logger
 21 | 
 22 |   @doc """
 23 |   Invoked to acknowledge successful and failed messages.
 24 | 
 25 |     * `ack_ref` is a term that uniquely identifies how messages
 26 |       should be grouped and sent for acknowledgement. Imagine
 27 |       you have a scenario where messages are coming from
 28 |       different producers. Broadway will use this information
 29 |       to correctly identify the acknowledger and pass it among
 30 |       with the messages so you can properly communicate with
 31 |       the source of the data for acknowledgement. `ack_ref` is
 32 |       part of `t:Broadway.Message.acknowledger/0`.
 33 | 
 34 |     * `successful` is the list of messages that were
 35 |       successfully processed and published.
 36 | 
 37 |     * `failed` is the list of messages that, for some reason,
 38 |       could not be processed or published.
 39 | 
 40 |   """
 41 |   @callback ack(ack_ref :: term, successful :: [Message.t()], failed :: [Message.t()]) ::
 42 |               :ok
 43 | 
 44 |   @doc """
 45 |   Configures the acknowledger with new `options`.
 46 | 
 47 |   Every acknowledger can decide how to incorporate the given `options` into its
 48 |   `ack_data`. The `ack_data` is the current acknowledger's data. The return value
 49 |   of this function is `{:ok, new_ack_data}` where `new_ack_data` is the updated
 50 |   data for the acknowledger.
 51 | 
 52 |   Note that `options` are different for every acknowledger, as the acknowledger
 53 |   is what specifies what are the supported options. Check the documentation for the
 54 |   acknowledger you're using to see the supported options.
 55 | 
 56 |   `ack_ref` and `ack_data` are part of `t:Broadway.Message.acknowledger/0`.
 57 |   """
 58 |   @callback configure(ack_ref :: term, ack_data :: term, options :: keyword) ::
 59 |               {:ok, new_ack_data :: term}
 60 | 
 61 |   @optional_callbacks [configure: 3]
 62 | 
 63 |   @doc false
 64 |   @spec ack_messages([Message.t()], [Message.t()]) :: no_return
 65 |   def ack_messages(successful, failed) do
 66 |     %{}
 67 |     |> group_by_acknowledger(successful, :successful)
 68 |     |> group_by_acknowledger(failed, :failed)
 69 |     |> Enum.each(&call_ack/1)
 70 |   end
 71 | 
 72 |   defp group_by_acknowledger(ackers, messages, key) do
 73 |     Enum.reduce(messages, ackers, fn %{acknowledger: {acknowledger, ack_ref, _}} = msg, acc ->
 74 |       ack_info = {acknowledger, ack_ref}
 75 |       pdict_key = {ack_info, key}
 76 |       Process.put(pdict_key, [msg | Process.get(pdict_key, [])])
 77 |       Map.put(acc, ack_info, true)
 78 |     end)
 79 |   end
 80 | 
 81 |   defp call_ack({{acknowledger, ack_ref} = ack_info, true}) do
 82 |     successful = Process.delete({ack_info, :successful}) || []
 83 |     failed = Process.delete({ack_info, :failed}) || []
 84 |     acknowledger.ack(ack_ref, Enum.reverse(successful), Enum.reverse(failed))
 85 |   end
 86 | 
 87 |   @doc false
 88 |   # Builds a crash reason used in Logger reporting.
 89 |   def crash_reason(:throw, reason, stack), do: {{:nocatch, reason}, stack}
 90 |   def crash_reason(:error, reason, stack), do: {Exception.normalize(:error, reason, stack), stack}
 91 |   def crash_reason(:exit, reason, stack), do: {reason, stack}
 92 | 
 93 |   # Used by the processor and the batcher to maybe call c:handle_failed/2
 94 |   # on failed messages.
 95 |   @doc false
 96 |   def maybe_handle_failed_messages(messages, module, context) do
 97 |     if function_exported?(module, :handle_failed, 2) and messages != [] do
 98 |       handle_failed_messages(messages, module, context)
 99 |     else
100 |       messages
101 |     end
102 |   end
103 | 
104 |   defp handle_failed_messages(messages, module, context) do
105 |     module.handle_failed(messages, context)
106 |   catch
107 |     kind, reason ->
108 |       Logger.error(Exception.format(kind, reason, __STACKTRACE__),
109 |         crash_reason: crash_reason(kind, reason, __STACKTRACE__)
110 |       )
111 | 
112 |       messages
113 |   else
114 |     return_messages when is_list(return_messages) ->
115 |       size = length(messages)
116 |       return_size = length(return_messages)
117 | 
118 |       if return_size != size do
119 |         Logger.error(
120 |           "#{inspect(module)}.handle_failed/2 received #{size} messages and " <>
121 |             "returned only #{return_size}. All messages given to handle_failed/2 " <>
122 |             "must be returned"
123 |         )
124 |       end
125 | 
126 |       return_messages
127 | 
128 |     _other ->
129 |       Logger.error(
130 |         "#{inspect(module)}.handle_failed/2 didn't return a list of messages, " <>
131 |           "so ignoring its return value"
132 |       )
133 | 
134 |       messages
135 |   end
136 | end
137 | 


--------------------------------------------------------------------------------
/lib/broadway/application.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.Application do
 2 |   use Application
 3 | 
 4 |   def start(_type, _args) do
 5 |     config_storage = Broadway.ConfigStorage.get_module()
 6 | 
 7 |     if Code.ensure_loaded?(config_storage) and function_exported?(config_storage, :setup, 0) do
 8 |       config_storage.setup()
 9 |     end
10 | 
11 |     opts = [strategy: :one_for_one, name: Broadway.Supervisor]
12 |     Supervisor.start_link([], opts)
13 |   end
14 | end
15 | 


--------------------------------------------------------------------------------
/lib/broadway/batch_info.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.BatchInfo do
 2 |   @moduledoc """
 3 |   A struct used to hold information about a generated batch.
 4 | 
 5 |   An instance of this struct containing the related info will
 6 |   be passed to the `c:Broadway.handle_batch/4` callback of the
 7 |   module implementing the `Broadway` behaviour.
 8 | 
 9 |   See the documentation for [`%Broadway.BatchInfo{}`](`__struct__/0`)
10 |   for information on the fields.
11 |   """
12 | 
13 |   @typedoc """
14 |   The type for a batch info struct.
15 |   """
16 |   @type t :: %__MODULE__{
17 |           batcher: atom,
18 |           batch_key: term,
19 |           partition: non_neg_integer | nil,
20 |           size: pos_integer,
21 |           trigger: atom
22 |         }
23 | 
24 |   @doc """
25 |   The batch info struct.
26 | 
27 |   The fields are:
28 | 
29 |     * `:batcher` - is the key that defined the batcher. This value can
30 |       be set in the `c:Broadway.handle_message/3` callback using
31 |       `Broadway.Message.put_batcher/2`.
32 | 
33 |     * `:batch_key` - identifies the batch key for this batch.
34 |       See `Broadway.Message.put_batch_key/2`.
35 | 
36 |     * `:partition` - the partition, if present.
37 | 
38 |     * `:size` - the number of messages in the batch.
39 | 
40 |     * `:trigger` - the trigger that generated the batch, like `:timeout`
41 |       or `:flush`.
42 | 
43 |   """
44 |   defstruct [
45 |     :batcher,
46 |     :batch_key,
47 |     :partition,
48 |     :size,
49 |     :trigger
50 |   ]
51 | end
52 | 


--------------------------------------------------------------------------------
/lib/broadway/caller_acknowledger.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.CallerAcknowledger do
 2 |   @moduledoc """
 3 |   A simple acknowledger that sends a message back to a caller.
 4 | 
 5 |   If you want to use this acknowledger in messages produced by your
 6 |   `Broadway.Producer`, you can get its configuration by calling
 7 |   the `init/0` function. For example, you can use it in
 8 |   `Broadway.test_message/3`:
 9 | 
10 |       some_ref = make_ref()
11 | 
12 |       Broadway.test_message(
13 |         MyPipeline,
14 |         "some data",
15 |         acknowledger: Broadway.CallerAcknowledger.init({self(), some_ref}, :ignored)
16 |       )
17 | 
18 |   The first parameter is a tuple with the PID to receive the messages
19 |   and a unique identifier (usually a reference). Such unique identifier
20 |   is then included in the messages sent to the PID. The second parameter,
21 |   which is per message, is ignored.
22 | 
23 |   It sends a message in the format:
24 | 
25 |       {:ack, ref, successful_messages, failed_messages}
26 | 
27 |   If `Broadway.Message.configure_ack/2` is called on a message that
28 |   uses this acknowledger, then the following message is sent:
29 | 
30 |       {:configure, ref, options}
31 | 
32 |   """
33 | 
34 |   @behaviour Broadway.Acknowledger
35 | 
36 |   @doc """
37 |   Returns the acknowledger metadata.
38 | 
39 |   See the module documentation.
40 |   """
41 |   @spec init({pid, ref :: term}, ignored_term :: term) :: Broadway.Message.acknowledger()
42 |   def init({pid, ref} = _pid_and_ref, ignored_term) when is_pid(pid) do
43 |     {__MODULE__, {pid, ref}, ignored_term}
44 |   end
45 | 
46 |   @impl true
47 |   def ack({pid, ref}, successful, failed) do
48 |     send(pid, {:ack, ref, successful, failed})
49 |   end
50 | 
51 |   @impl true
52 |   def configure({pid, ref}, ack_data, options) do
53 |     send(pid, {:configure, ref, options})
54 |     {:ok, ack_data}
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/lib/broadway/config_storage.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.ConfigStorage do
 2 |   @moduledoc false
 3 | 
 4 |   @doc """
 5 |   Optional setup for the configuration storage.
 6 | 
 7 |   Invoked when Broadway boots.
 8 |   """
 9 |   @callback setup() :: :ok
10 | 
11 |   @doc """
12 |   Lists all broadway names in the config storage.
13 |   """
14 |   @callback list() :: [term()]
15 | 
16 |   @doc """
17 |   Puts the given key value pair in the underlying storage.
18 |   """
19 |   @callback put(server :: term(), value :: %Broadway.Topology{}) :: term()
20 | 
21 |   @doc """
22 |   Retrieves a configuration from the underlying storage.
23 |   """
24 |   @callback get(server :: term()) :: term()
25 | 
26 |   @doc """
27 |   Deletes a configuration from the underlying storage.
28 |   """
29 |   @callback delete(server :: term()) :: boolean()
30 | 
31 |   @optional_callbacks setup: 0
32 | 
33 |   @doc """
34 |   Retrieves the configured module based on the `:config_storage` key.
35 |   """
36 |   @spec get_module() :: module()
37 |   def get_module() do
38 |     case Application.fetch_env!(:broadway, :config_storage) do
39 |       :ets -> Broadway.ConfigStorage.ETS
40 |       :persistent_term -> Broadway.ConfigStorage.PersistentTerm
41 |       mod -> mod
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/broadway/config_storage/ets.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.ConfigStorage.ETS do
 2 |   @moduledoc false
 3 | 
 4 |   @behaviour Broadway.ConfigStorage
 5 | 
 6 |   @table __MODULE__
 7 | 
 8 |   # Used in tests.
 9 |   def table, do: @table
10 | 
11 |   @impl true
12 |   def setup do
13 |     :ets.new(@table, [:named_table, :public, :set, {:read_concurrency, true}])
14 |     :ok
15 |   end
16 | 
17 |   @impl true
18 |   def list do
19 |     :ets.select(@table, [{{:"$1", :_}, [], [:"$1"]}])
20 |   end
21 | 
22 |   @impl true
23 |   def get(server) do
24 |     case :ets.match(@table, {server, :"$1"}) do
25 |       [[topology]] -> topology
26 |       _ -> nil
27 |     end
28 |   end
29 | 
30 |   @impl true
31 |   def put(server, topology) do
32 |     :ets.insert(@table, {server, topology})
33 |   end
34 | 
35 |   @impl true
36 |   def delete(server) do
37 |     :ets.delete(@table, server)
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/broadway/config_storage/persistent_term.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.ConfigStorage.PersistentTerm do
 2 |   @moduledoc false
 3 |   @behaviour Broadway.ConfigStorage
 4 | 
 5 |   @impl true
 6 |   def setup do
 7 |     if not Code.ensure_loaded?(:persistent_term) do
 8 |       require Logger
 9 |       Logger.error("Broadway requires Erlang/OTP 21.3+")
10 |       raise "Broadway requires Erlang/OTP 21.3+"
11 |     end
12 | 
13 |     :ok
14 |   end
15 | 
16 |   @impl true
17 |   def list do
18 |     for {{Broadway, name}, %Broadway.Topology{}} <- :persistent_term.get() do
19 |       name
20 |     end
21 |   end
22 | 
23 |   @impl true
24 |   def get(server) do
25 |     :persistent_term.get({Broadway, server}, nil)
26 |   end
27 | 
28 |   @impl true
29 |   def put(server, topology) do
30 |     :persistent_term.put({Broadway, server}, topology)
31 |   end
32 | 
33 |   @impl true
34 |   def delete(_server) do
35 |     # We don't delete from persistent term on purpose. Since the process is
36 |     # named, we can assume it does not start dynamically, so it will either
37 |     # restart or the amount of memory it uses is negligibla to justify the
38 |     # process purging done by persistent_term. If the repo is restarted and
39 |     # stores the same metadata, then no purging happens either.
40 |     # :persistent_term.erase({Broadway, server})
41 |     true
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/broadway/dummy_producer.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.DummyProducer do
 2 |   @moduledoc """
 3 |   A producer that does nothing, used mostly for testing.
 4 | 
 5 |   See "Testing" section in `Broadway` module documentation for more information.
 6 |   """
 7 | 
 8 |   use GenStage
 9 |   @behaviour Broadway.Producer
10 | 
11 |   @impl true
12 |   def init(_args) do
13 |     {:producer, []}
14 |   end
15 | 
16 |   @impl true
17 |   def handle_demand(_demand, state) do
18 |     {:noreply, [], state}
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/broadway/message.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Message do
  2 |   @moduledoc """
  3 |   This struct holds all information about a message.
  4 | 
  5 |   A message is first created by the producers. It is then
  6 |   sent downstream and gets updated multiple times, either
  7 |   by a module implementing the `Broadway` behaviour
  8 |   through the `c:Broadway.handle_message/3` callback
  9 |   or internally by one of the built-in stages of Broadway.
 10 | 
 11 |   Instead of modifying the struct directly, you should use the functions
 12 |   provided by this module to manipulate messages. However, if you are implementing
 13 |   a `Broadway.Producer` of your own, see `t:t/0` to see what fields you should set.
 14 |   """
 15 | 
 16 |   alias __MODULE__, as: Message
 17 |   alias Broadway.{Acknowledger, NoopAcknowledger}
 18 | 
 19 |   @typedoc """
 20 |   The acknowledger of the message.
 21 | 
 22 |   This tuple contains:
 23 | 
 24 |     * A module implementing the `Broadway.Acknowledger` behaviour.
 25 | 
 26 |     * An ack reference that is passed to the `c:Broadway.Acknowledger.ack/3`
 27 |       callback. See `c:Broadway.Acknowledger.ack/3` for more information.
 28 | 
 29 |     * An arbitrary term that is passed to the optional
 30 |       `c:Broadway.Acknowledger.configure/3` callback.
 31 | 
 32 |   """
 33 |   @typedoc since: "1.1.0"
 34 |   @type acknowledger :: {module, ack_ref :: term, data :: term}
 35 | 
 36 |   @typedoc """
 37 |   The Broadway message struct.
 38 | 
 39 |   Most of these fields are manipulated by Broadway itself. You can
 40 |   *read* the `:metadata` field, and you can use the functions in this
 41 |   module to update most of the other fields. If you are implementing
 42 |   your own producer, see the `Broadway.Producer` documentation
 43 |   for more information on how to create and manipulate message structs.
 44 |   """
 45 |   @type t :: %Message{
 46 |           data: term,
 47 |           metadata: %{optional(atom) => term},
 48 |           acknowledger: acknowledger,
 49 |           batcher: atom,
 50 |           batch_key: term,
 51 |           batch_mode: :bulk | :flush,
 52 |           status:
 53 |             :ok
 54 |             | {:failed, reason :: term}
 55 |             | {:throw | :error | :exit, term, Exception.stacktrace()}
 56 |         }
 57 | 
 58 |   @enforce_keys [:data, :acknowledger]
 59 |   defstruct data: nil,
 60 |             metadata: %{},
 61 |             acknowledger: nil,
 62 |             batcher: :default,
 63 |             batch_key: :default,
 64 |             batch_mode: :bulk,
 65 |             status: :ok
 66 | 
 67 |   @doc """
 68 |   Updates the data in the message.
 69 | 
 70 |   This function is usually used inside the `c:Broadway.handle_message/3` implementation
 71 |   to update data with new processed data.
 72 |   """
 73 |   @spec update_data(message :: Message.t(), fun :: (term -> term)) :: Message.t()
 74 |   def update_data(%Message{} = message, fun) when is_function(fun, 1) do
 75 |     %{message | data: fun.(message.data)}
 76 |   end
 77 | 
 78 |   @doc """
 79 |   Stores the given data in the message.
 80 | 
 81 |   This function is usually used inside the `c:Broadway.handle_message/3` implementation
 82 |   to replace data with new processed data.
 83 |   """
 84 |   @doc since: "1.0.0"
 85 |   @spec put_data(message :: Message.t(), term) :: Message.t()
 86 |   def put_data(%Message{} = message, data) do
 87 |     %{message | data: data}
 88 |   end
 89 | 
 90 |   @doc """
 91 |   Defines the target batcher which the message should be forwarded to.
 92 |   """
 93 |   @spec put_batcher(message :: Message.t(), batcher :: atom) :: Message.t()
 94 |   def put_batcher(%Message{} = message, batcher) when is_atom(batcher) do
 95 |     %{message | batcher: batcher}
 96 |   end
 97 | 
 98 |   @doc """
 99 |   Defines the message batch key.
100 | 
101 |   The batch key identifies the batch the message belongs to, within
102 |   a given batcher. Each batcher then groups batches with the same
103 |   `batch_key`, with size of at most `batch_size` within period
104 |   `batch_timeout`. Both `batch_size` and `batch_timeout` are managed
105 |   per batch key, so a batcher is capable of grouping multiple batch
106 |   keys at the same time, regardless of the concurrency level.
107 | 
108 |   If a given batcher has multiple batch processors (concurrency > 1),
109 |   all messages with the same batch key are routed to the same processor.
110 |   So different batch keys may run concurrently but the same batch key
111 |   is always run serially and in the same batcher processor.
112 |   """
113 |   @spec put_batch_key(message :: Message.t(), batch_key :: term) :: Message.t()
114 |   def put_batch_key(%Message{} = message, batch_key) do
115 |     %{message | batch_key: batch_key}
116 |   end
117 | 
118 |   @doc """
119 |   Sets the batching mode for the message.
120 | 
121 |   When the mode is `:bulk`, the batch that the message is in is delivered after
122 |   the batch size or batch timeout is reached.
123 | 
124 |   When the mode is `:flush`, the batch that the message is in is delivered
125 |   immediately after processing. Note it doesn't mean the batch contains only a single element
126 |   but rather that all messages received from the processor are delivered without waiting.
127 | 
128 |   The default mode for messages is `:bulk`.
129 |   """
130 |   @spec put_batch_mode(message :: Message.t(), mode :: :bulk | :flush) :: Message.t()
131 |   def put_batch_mode(%Message{} = message, mode) when mode in [:bulk, :flush] do
132 |     %{message | batch_mode: mode}
133 |   end
134 | 
135 |   @doc """
136 |   Configures the acknowledger of this message.
137 | 
138 |   This function calls the `c:Broadway.Acknowledger.configure/3` callback to
139 |   change the configuration of the acknowledger for the given `message`.
140 | 
141 |   This function can only be called if the acknowledger implements the `configure/3`
142 |   callback. If it doesn't, an error is raised.
143 |   """
144 |   @doc since: "0.5.0"
145 |   @spec configure_ack(message :: Message.t(), options :: keyword) :: Message.t()
146 |   def configure_ack(%Message{} = message, options) when is_list(options) do
147 |     %{acknowledger: {module, ack_ref, ack_data}} = message
148 | 
149 |     if Code.ensure_loaded?(module) and function_exported?(module, :configure, 3) do
150 |       {:ok, ack_data} = module.configure(ack_ref, ack_data, options)
151 |       %{message | acknowledger: {module, ack_ref, ack_data}}
152 |     else
153 |       raise "the configure/3 callback is not defined by acknowledger #{inspect(module)}"
154 |     end
155 |   end
156 | 
157 |   @doc """
158 |   Mark a message as failed.
159 | 
160 |   Failed messages are sent directly to the related acknowledger at the end
161 |   of this step and therefore they're not forwarded to the next step in the
162 |   pipeline.
163 | 
164 |   Failing a message does not emit any log but it does trigger the
165 |   `c:Broadway.handle_failed/2` callback.
166 |   """
167 |   @spec failed(message :: Message.t(), reason :: term) :: Message.t()
168 |   def failed(%Message{} = message, reason) do
169 |     %{message | status: {:failed, reason}}
170 |   end
171 | 
172 |   @doc """
173 |   Immediately acknowledges the given message or list of messages.
174 | 
175 |   This function can be used to acknowledge a message (or list of messages)
176 |   immediately without waiting for the rest of the pipeline.
177 | 
178 |   Acknowledging a message sets that message's acknowledger to a no-op
179 |   acknowledger so that it's safe to ack at the end of the pipeline.
180 | 
181 |   Returns the updated acked message if a message is passed in,
182 |   or the updated list of acked messages if a list of messages is passed in.
183 |   """
184 |   @doc since: "0.5.0"
185 |   @spec ack_immediately(message :: Message.t()) :: Message.t()
186 |   @spec ack_immediately(messages :: [Message.t(), ...]) :: [Message.t(), ...]
187 |   def ack_immediately(message_or_messages)
188 | 
189 |   def ack_immediately(%Message{} = message) do
190 |     [message] = ack_immediately([message])
191 |     message
192 |   end
193 | 
194 |   def ack_immediately(messages) when is_list(messages) and messages != [] do
195 |     {successful, failed} = Enum.split_with(messages, &(&1.status == :ok))
196 |     _ = Acknowledger.ack_messages(successful, failed)
197 | 
198 |     for message <- messages do
199 |       %{message | acknowledger: NoopAcknowledger.init()}
200 |     end
201 |   end
202 | end
203 | 


--------------------------------------------------------------------------------
/lib/broadway/noop_acknowledger.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.NoopAcknowledger do
 2 |   @moduledoc """
 3 |   An acknowledger that does nothing.
 4 | 
 5 |   If you want to use this acknowledger in messages produced by your
 6 |   `Broadway.Producer`, you can get its configuration by calling
 7 |   the `init/0` function. For example, you can use it in
 8 |   `Broadway.test_message/3`:
 9 | 
10 |       Broadway.test_message(MyPipeline, "some data", acknowledger: Broadway.NoopAcknowledger.init())
11 | 
12 |   Broadway sets this acknowledger automatically on messages that have been acked
13 |   via `Broadway.Message.ack_immediately/1`.
14 |   """
15 | 
16 |   @behaviour Broadway.Acknowledger
17 | 
18 |   @doc """
19 |   Returns the acknowledger metadata.
20 |   """
21 |   @spec init() :: Broadway.Message.acknowledger()
22 |   def init do
23 |     {__MODULE__, _ack_ref = nil, _data = nil}
24 |   end
25 | 
26 |   @impl true
27 |   def ack(_ack_ref = nil, _successful, _failed) do
28 |     :ok
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/broadway/options.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Options do
  2 |   @moduledoc false
  3 | 
  4 |   definition = [
  5 |     name: [
  6 |       required: true,
  7 |       type: {:custom, __MODULE__, :validate_name, []},
  8 |       doc: """
  9 |       Used for name registration. When an atom, all processes/stages
 10 |       created will be named using this value as prefix.
 11 |       """
 12 |     ],
 13 |     shutdown: [
 14 |       type: :pos_integer,
 15 |       default: 30000,
 16 |       doc: """
 17 |       Optional. The time in milliseconds given for Broadway to
 18 |       gracefully shutdown without discarding events.
 19 |       """
 20 |     ],
 21 |     max_restarts: [type: :non_neg_integer, default: 3],
 22 |     max_seconds: [type: :pos_integer, default: 5],
 23 |     resubscribe_interval: [
 24 |       type: :non_neg_integer,
 25 |       default: 100,
 26 |       doc: """
 27 |       The interval in milliseconds that
 28 |       processors wait until they resubscribe to a failed producers.
 29 |       """
 30 |     ],
 31 |     context: [
 32 |       type: :any,
 33 |       default: :context_not_set,
 34 |       doc: """
 35 |       A user defined data structure that will be passed to handle_message/3 and handle_batch/4.
 36 |       """
 37 |     ],
 38 |     producer: [
 39 |       required: true,
 40 |       type: :non_empty_keyword_list,
 41 |       doc: """
 42 |       A keyword list of options. See ["Producers options"](#start_link/2-producers-options)
 43 |       section below. Only a single producer is allowed.
 44 |       """,
 45 |       subsection: """
 46 |       ### Producers options
 47 | 
 48 |       The producer options allow users to set up the producer.
 49 | 
 50 |       The available options are:
 51 |       """,
 52 |       keys: [
 53 |         module: [
 54 |           required: true,
 55 |           type: :mod_arg,
 56 |           doc: """
 57 |           A tuple representing a GenStage producer.
 58 |           The tuple format should be `{mod, arg}`, where `mod` is the module
 59 |           that implements the GenStage behaviour and `arg` the argument that will
 60 |           be passed to the `init/1` callback of the producer. See `Broadway.Producer`
 61 |           for more information.
 62 |           """
 63 |         ],
 64 |         concurrency: [
 65 |           type: :pos_integer,
 66 |           default: 1,
 67 |           doc: """
 68 |           The number of concurrent producers that
 69 |           will be started by Broadway. Use this option to control the concurrency
 70 |           level of each set of producers.
 71 |           """
 72 |         ],
 73 |         transformer: [
 74 |           type: :mfa,
 75 |           default: nil,
 76 |           doc: """
 77 |           A tuple representing a transformer that translates a produced GenStage event into a
 78 |           `%Broadway.Message{}`. The tuple format should be `{mod, fun, opts}` and the function
 79 |           should have the following spec `(event :: term, opts :: term) :: Broadway.Message.t`
 80 |           This function must be used sparingly and exclusively to convert regular
 81 |           messages into `Broadway.Message`. That's because a failure in the
 82 |           `:transformer` callback will cause the whole producer to terminate,
 83 |           possibly leaving unacknowledged messages along the way.
 84 |           """
 85 |         ],
 86 |         spawn_opt: [
 87 |           type: :keyword_list,
 88 |           doc: """
 89 |           Overrides the top-level `:spawn_opt`.
 90 |           """
 91 |         ],
 92 |         hibernate_after: [
 93 |           type: :pos_integer,
 94 |           doc: """
 95 |           Overrides the top-level `:hibernate_after`.
 96 |           """
 97 |         ],
 98 |         rate_limiting: [
 99 |           type: :non_empty_keyword_list,
100 |           doc: """
101 |           A list of options to enable and configure rate limiting for producing.
102 |           If this option is present, rate limiting is enabled, otherwise it isn't.
103 |           Rate limiting refers to the rate at which producers will forward
104 |           messages to the rest of the pipeline. The rate limiting is applied to
105 |           and shared by all producers within the time limit.
106 |           The following options are supported:
107 |           """,
108 |           keys: [
109 |             allowed_messages: [
110 |               required: true,
111 |               type: :pos_integer,
112 |               doc: """
113 |               An integer that describes how many messages are allowed in the specified interval.
114 |               """
115 |             ],
116 |             interval: [
117 |               required: true,
118 |               type: :pos_integer,
119 |               doc: """
120 |               An integer that describes the interval (in milliseconds)
121 |               during which the number of allowed messages is allowed.
122 |               If the producer produces more than `allowed_messages`
123 |               in `interval`, only `allowed_messages` will be published until
124 |               the end of `interval`, and then more messages will be published.
125 |               """
126 |             ]
127 |           ]
128 |         ]
129 |       ]
130 |     ],
131 |     processors: [
132 |       required: true,
133 |       type: :non_empty_keyword_list,
134 |       doc: """
135 |       A keyword list of named processors where the key is an atom as identifier and
136 |       the value is another keyword list of options.
137 |       See ["Processors options"](#start_link/2-processors-options)
138 |       section below. Currently only a single processor is allowed.
139 |       """,
140 |       subsection: """
141 |       ### Processors options
142 | 
143 |       > #### You don't need multiple processors {: .info}
144 |       >
145 |       > A common misconception is that, if your data requires multiple
146 |       > transformations, each with a different concern, then you must
147 |       > have several processors.
148 |       >
149 |       > However, that's not quite true. Separation of concerns is modeled
150 |       > by defining several modules and functions, not processors. Processors
151 |       > are ultimately about moving data around and you should only do it
152 |       > when necessary. Using processors for code organization purposes would
153 |       > lead to inefficient pipelines.
154 | 
155 |       """,
156 |       keys: [
157 |         *: [
158 |           type: :keyword_list,
159 |           keys: [
160 |             concurrency: [
161 |               type: :pos_integer,
162 |               doc: """
163 |               The number of concurrent process that will
164 |               be started by Broadway. Use this option to control the concurrency level
165 |               of the processors. The default value is `System.schedulers_online() * 2`.
166 |               """
167 |             ],
168 |             min_demand: [
169 |               type: :non_neg_integer,
170 |               doc: """
171 |               Set the minimum demand of all processors stages.
172 |               """
173 |             ],
174 |             max_demand: [
175 |               type: :non_neg_integer,
176 |               default: 10,
177 |               doc: """
178 |               Set the maximum demand of all processors stages.
179 |               """
180 |             ],
181 |             partition_by: [
182 |               type: {:fun, 1},
183 |               doc: """
184 |               Overrides the top-level `:partition_by`.
185 |               """
186 |             ],
187 |             spawn_opt: [
188 |               type: :keyword_list,
189 |               doc: """
190 |               Overrides the top-level `:spawn_opt`.
191 |               """
192 |             ],
193 |             hibernate_after: [
194 |               type: :pos_integer,
195 |               doc: """
196 |               Overrides the top-level `:hibernate_after`.
197 |               """
198 |             ]
199 |           ]
200 |         ]
201 |       ]
202 |     ],
203 |     batchers: [
204 |       default: [],
205 |       type: :keyword_list,
206 |       doc: """
207 |       A keyword list of named batchers
208 |       where the key is an atom as identifier and the value is another
209 |       keyword list of options. See ["Batchers options"](#start_link/2-batchers-options)
210 |       section below.
211 |       """,
212 |       subsection: """
213 |       ### Batchers options
214 | 
215 |       """,
216 |       keys: [
217 |         *: [
218 |           type: :keyword_list,
219 |           keys: [
220 |             concurrency: [
221 |               type: :pos_integer,
222 |               default: 1,
223 |               doc: """
224 |               The number of concurrent batch processors
225 |               that will be started by Broadway. Use this option to control the
226 |               concurrency level. Note that this only sets the numbers of batch
227 |               processors for each batcher group, not the number of batchers.
228 |               The number of batchers will always be one for each batcher key
229 |               defined.
230 |               """
231 |             ],
232 |             batch_size: [
233 |               type: {:custom, __MODULE__, :validate_batch_size, []},
234 |               default: 100,
235 |               doc: """
236 |               The size of the generated batches. Default value is `100`. It is typically an
237 |               integer but it can also be tuple of `{init_acc, fun}`
238 |               where `fun` receives two arguments: a `Broadway.Message` and
239 |               an `acc`. The function must return either `{:emit, acc}` to indicate
240 |               all batched messages must be emitted or `{:cont, acc}` to continue
241 |               batching. `init_acc` is the initial accumulator used on the first call. You can
242 |               consider that setting the accumulator to an integer is the equivalent to custom
243 |               batching function of:
244 | 
245 |                   {batch_size,
246 |                    fn
247 |                      _message, 1 -> {:emit, batch_size}
248 |                      _message, count -> {:cont, count - 1}
249 |                    end}
250 | 
251 |               We start with the batch size as the accumulator, and then we go down for every
252 |               event. When we get down to `1`, we emit the batch and *reset* the accumulator
253 |               to the batch size. That's because when returning `{:emit, acc}`, `acc` is
254 |               used for the next call to the `:batch_size` function.
255 | 
256 |               > #### When is this called {: .info}
257 |               >
258 |               > If you pass a function as the batch size, that function is invoked *after*
259 |               > `c:handle_message/3`.
260 | 
261 |               """
262 |             ],
263 |             max_demand: [
264 |               type: :pos_integer,
265 |               doc: """
266 |               Sets the maximum demand of batcher stages.
267 |               By default it is set to `:batch_size`, if `:batch_size` is an integer.
268 |               Must be set if the `:batch_size` is a function.
269 |               """
270 |             ],
271 |             batch_timeout: [
272 |               type: :pos_integer,
273 |               default: 1000,
274 |               doc: """
275 |               The time, in milliseconds, that the batcher waits before flushing
276 |               the list of messages. When this timeout is reached, a new batch
277 |               is generated and sent downstream, no matter if the `:batch_size`
278 |               has been reached or not.
279 |               """
280 |             ],
281 |             partition_by: [
282 |               type: {:fun, 1},
283 |               doc: """
284 |               Optional. Overrides the top-level `:partition_by`.
285 |               """
286 |             ],
287 |             spawn_opt: [
288 |               type: :keyword_list,
289 |               doc: """
290 |               Overrides the top-level `:spawn_opt`.
291 |               """
292 |             ],
293 |             hibernate_after: [
294 |               type: :pos_integer,
295 |               doc: """
296 |               Overrides the top-level `:hibernate_after`.
297 |               """
298 |             ]
299 |           ]
300 |         ]
301 |       ]
302 |     ],
303 |     partition_by: [
304 |       type: {:fun, 1},
305 |       doc: """
306 |       A function that controls how data is
307 |       partitioned across all processors and batchers. It receives a
308 |       `Broadway.Message` and it must return a non-negative integer,
309 |       starting with zero, that will be mapped to one of the existing
310 |       processors. See ["Ordering and Partitioning"](#module-ordering-and-partitioning)
311 |       in the module docs for more information and known pitfalls.
312 |       """
313 |     ],
314 |     spawn_opt: [
315 |       type: :keyword_list,
316 |       doc: """
317 |       Low-level options given when starting a
318 |       process. Applies to producers, processors, and batchers.
319 |       See `erlang:spawn_opt/2` for more information.
320 |       """
321 |     ],
322 |     hibernate_after: [
323 |       type: :pos_integer,
324 |       default: 15_000,
325 |       doc: """
326 |       If a process does not receive any message within this interval, it will hibernate,
327 |       compacting memory. Applies to producers, processors, and batchers.
328 |       Defaults to `15_000` (millisecond).
329 |       """
330 |     ]
331 |   ]
332 | 
333 |   @definition NimbleOptions.new!(definition)
334 | 
335 |   def definition() do
336 |     @definition
337 |   end
338 | 
339 |   def validate_name(name) when is_atom(name), do: {:ok, name}
340 | 
341 |   def validate_name({:via, module, _term} = via) when is_atom(module), do: {:ok, via}
342 | 
343 |   def validate_name(name) do
344 |     {:error,
345 |      "expected :name to be an atom or a {:via, module, term} tuple, got: #{inspect(name)}"}
346 |   end
347 | 
348 |   def validate_batch_size(size) when is_integer(size) and size > 0, do: {:ok, size}
349 | 
350 |   def validate_batch_size({_acc, func} = batch_splitter) when is_function(func) do
351 |     if is_function(func, 2) do
352 |       {:ok, batch_splitter}
353 |     else
354 |       {:error, "expected `:batch_size` to include a function of 2 arity, got: #{inspect(func)}\n"}
355 |     end
356 |   end
357 | 
358 |   def validate_batch_size(batch_size) do
359 |     {:error,
360 |      "expected :batch_size to be a positive integer or a {acc, &fun/2} tuple, got: #{inspect(batch_size)}\n"}
361 |   end
362 | end
363 | 


--------------------------------------------------------------------------------
/lib/broadway/producer.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Producer do
  2 |   @moduledoc """
  3 |   A Broadway producer is a `GenStage` producer that emits
  4 |   `Broadway.Message` structs as events.
  5 | 
  6 |   The `Broadway.Producer` is declared in a Broadway topology
  7 |   via the `:module` option (see `Broadway.start_link/2`):
  8 | 
  9 |       producer: [
 10 |         module: {MyProducer, options}
 11 |       ]
 12 | 
 13 |   Once declared, `MyProducer` is expected to implement and
 14 |   behave as a `GenStage` producer. When Broadway starts,
 15 |   the `c:GenStage.init/1` callback will be invoked directly with the
 16 |   given `options`.
 17 | 
 18 |   ## Injected Broadway configuration
 19 | 
 20 |   If `options` is a keyword list, Broadway injects a `:broadway` option
 21 |   into the keyword list. This option contains the configuration for the
 22 |   complete Broadway topology (see `Broadway.start_link/2`). For example,
 23 |   you can use `options[:broadway][:name]` to uniquely identify the topology.
 24 | 
 25 |   The `:broadway` configuration also has an `:index` key. This
 26 |   is the index of the producer in its supervision tree (starting
 27 |   from `0`). This allows features such as having even producers
 28 |   connect to some server while odd producers connect to another.
 29 | 
 30 |   If `options` is any other term, it is passed as is to the `c:GenStage.init/1`
 31 |   callback. All other functions behave precisely as in `GenStage`
 32 |   with the requirements that all emitted events must be `Broadway.Message`
 33 |   structs.
 34 | 
 35 |   ## Optional callbacks
 36 | 
 37 |   A `Broadway.Producer` can implement two optional Broadway callbacks,
 38 |   `c:prepare_for_start/2` and `c:prepare_for_draining/1`, which are useful
 39 |   for booting up and shutting down Broadway topologies respectively.
 40 | 
 41 |   ## Producing Broadway messages
 42 | 
 43 |   You should generally modify `Broadway.Message` structs by using the functions
 44 |   in the `Broadway.Message` module. However, if you are implementing your
 45 |   own producer, you **can manipulate** some of the struct's fields directly.
 46 | 
 47 |   These fields are:
 48 | 
 49 |     * `:data` (required) - the data of the message. Even though the function
 50 |       `Broadway.Message.put_data/2` exists, when creating a `%Broadway.Message{}`
 51 |       struct from scratch you will have to pass in the `:data` field directly.
 52 | 
 53 |     * `:acknowledger` (required) - the acknowledger of the message, of type
 54 |       `t:Broadway.Message.acknowledger/0`.
 55 | 
 56 |     * `:metadata` (optional) - metadata about the message that your producer
 57 |       can attach to the message. This is useful when you want to add some metadata
 58 |       to messages, and document it for users to use in their pipelines.
 59 | 
 60 |   For example, a producer could create a message by doing something like this:
 61 | 
 62 |       %Broadway.Message{
 63 |         data: "some data here",
 64 |         acknowledger: Broadway.NoopAcknowledger.init()
 65 |       }
 66 | 
 67 |   """
 68 | 
 69 |   @doc """
 70 |   Invoked once by Broadway during `Broadway.start_link/2`.
 71 | 
 72 |   The goal of this callback is to manipulate the general topology options,
 73 |   if necessary at all, and introduce any new child specs that will be
 74 |   started **before** the producers' supervisor in Broadway's supervision tree.
 75 |   Broadway's supervision tree is a `rest_for_one` supervisor (see the documentation
 76 |   for `Supervisor`), which means that if the children returned from this callback
 77 |   crash they will bring down the rest of the pipeline before being restarted.
 78 | 
 79 |   This callback is guaranteed to be invoked inside the Broadway main process.
 80 | 
 81 |   `module` is the Broadway module passed as the first argument to
 82 |   `Broadway.start_link/2`. `options` is all of Broadway topology options passed
 83 |   as the second argument to `Broadway.start_link/2`.
 84 | 
 85 |   The return value of this callback is a tuple `{child_specs, options}`. `child_specs`
 86 |   is the list of child specs to be started under Broadway's supervision tree.
 87 |   `updated_options` is a potentially-updated list of Broadway options
 88 |   that will be used instead of the ones passed to `Broadway.start_link/2`. This can be
 89 |   used to modify the characteristics of the Broadway topology to accommodate
 90 |   the children started here.
 91 | 
 92 |   ## Examples
 93 | 
 94 |       defmodule MyProducer do
 95 |         @behaviour Broadway.Producer
 96 | 
 97 |         # other callbacks...
 98 | 
 99 |         @impl true
100 |         def prepare_for_start(_module, broadway_options) do
101 |            children = [
102 |              {DynamicSupervisor, strategy: :one_for_one, name: MyApp.DynamicSupervisor}
103 |            ]
104 |             updated_options = put_in(broadway_options, [:producer, :rate_limiting], [interval: 1000, allowed_messages: 10])
105 | 
106 |            {children, updated_options}
107 |         end
108 |       end
109 | 
110 |   """
111 |   @doc since: "0.5.0"
112 |   @callback prepare_for_start(module :: atom, options :: keyword) ::
113 |               {[child_spec], updated_options :: keyword}
114 |             when child_spec: :supervisor.child_spec() | {module, any} | module
115 | 
116 |   @doc """
117 |   Invoked by the terminator right before Broadway starts draining in-flight
118 |   messages during shutdown.
119 | 
120 |   This callback should be implemented by producers that need to do additional
121 |   work before shutting down. That includes active producers like RabbitMQ that
122 |   must ask the data provider to stop sending messages. It will be invoked for
123 |   each producer stage.
124 | 
125 |   `state` is the current state of the producer.
126 |   """
127 |   @callback prepare_for_draining(state :: any) ::
128 |               {:noreply, [event], new_state}
129 |               | {:noreply, [event], new_state, :hibernate}
130 |               | {:stop, reason :: term, new_state}
131 |             when new_state: term, event: term
132 | 
133 |   @optional_callbacks prepare_for_start: 2, prepare_for_draining: 1
134 | end
135 | 


--------------------------------------------------------------------------------
/lib/broadway/topology.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology do
  2 |   @moduledoc false
  3 |   @behaviour GenServer
  4 | 
  5 |   alias Broadway.Topology.{
  6 |     ProducerStage,
  7 |     ProcessorStage,
  8 |     BatcherStage,
  9 |     BatchProcessorStage,
 10 |     Terminator,
 11 |     RateLimiter
 12 |   }
 13 | 
 14 |   alias Broadway.ConfigStorage
 15 | 
 16 |   defstruct [:context, :topology, :producer_names, :batchers_names, :rate_limiter_name]
 17 | 
 18 |   def start_link(module, opts) do
 19 |     GenServer.start_link(__MODULE__, {module, opts}, opts)
 20 |   end
 21 | 
 22 |   def producer_names(server) do
 23 |     config(server).producer_names
 24 |   end
 25 | 
 26 |   def get_rate_limiter(server) do
 27 |     if name = config(server).rate_limiter_name do
 28 |       {:ok, name}
 29 |     else
 30 |       {:error, :rate_limiting_not_enabled}
 31 |     end
 32 |   end
 33 | 
 34 |   def topology(server) do
 35 |     config(server).topology
 36 |   end
 37 | 
 38 |   defp config(server) do
 39 |     config_storage = ConfigStorage.get_module()
 40 | 
 41 |     config_storage.get(server) ||
 42 |       exit({:noproc, {__MODULE__, :config, [server]}})
 43 |   end
 44 | 
 45 |   ## Callbacks
 46 | 
 47 |   @impl true
 48 |   def init({module, opts}) do
 49 |     Process.flag(:trap_exit, true)
 50 |     config_storage = ConfigStorage.get_module()
 51 | 
 52 |     # We want to invoke this as early as possible otherwise the
 53 |     # stacktrace gets deeper and deeper in case of errors.
 54 |     {child_specs, opts} = prepare_for_start(module, opts)
 55 | 
 56 |     config = init_config(module, opts)
 57 |     {:ok, supervisor_pid} = start_supervisor(child_specs, config, opts)
 58 | 
 59 |     emit_init_event(opts, supervisor_pid)
 60 | 
 61 |     config_storage.put(config.name, %__MODULE__{
 62 |       context: config.context,
 63 |       topology: build_topology_details(config),
 64 |       producer_names: process_names(config, "Producer", config.producer_config),
 65 |       batchers_names:
 66 |         Enum.map(config.batchers_config, &process_name(config, "Batcher", elem(&1, 0))),
 67 |       rate_limiter_name: config.rate_limiter
 68 |     })
 69 | 
 70 |     {:ok,
 71 |      %{
 72 |        supervisor_pid: supervisor_pid,
 73 |        terminator: config.terminator,
 74 |        name: config.name
 75 |      }}
 76 |   end
 77 | 
 78 |   @impl true
 79 |   def handle_info({:EXIT, supervisor_pid, reason}, %{supervisor_pid: supervisor_pid} = state) do
 80 |     {:stop, reason, state}
 81 |   end
 82 | 
 83 |   def handle_info(_, state) do
 84 |     {:noreply, state}
 85 |   end
 86 | 
 87 |   @impl true
 88 |   def terminate(reason, %{name: name, supervisor_pid: supervisor_pid, terminator: terminator}) do
 89 |     Broadway.Topology.Terminator.trap_exit(terminator)
 90 |     ref = Process.monitor(supervisor_pid)
 91 |     Process.exit(supervisor_pid, reason_to_signal(reason))
 92 | 
 93 |     receive do
 94 |       {:DOWN, ^ref, _, _, _} ->
 95 |         config_storage = ConfigStorage.get_module()
 96 |         config_storage.delete(name)
 97 |         :ok
 98 |     end
 99 | 
100 |     :ok
101 |   end
102 | 
103 |   defp reason_to_signal(:killed), do: :kill
104 |   defp reason_to_signal(other), do: other
105 | 
106 |   defp prepare_for_start(module, opts) do
107 |     {producer_mod, _producer_opts} = opts[:producer][:module]
108 | 
109 |     if Code.ensure_loaded?(producer_mod) and
110 |          function_exported?(producer_mod, :prepare_for_start, 2) do
111 |       case producer_mod.prepare_for_start(module, opts) do
112 |         {child_specs, opts} when is_list(child_specs) ->
113 |           {child_specs, NimbleOptions.validate!(opts, Broadway.Options.definition())}
114 | 
115 |         other ->
116 |           raise ArgumentError,
117 |                 "expected #{Exception.format_mfa(producer_mod, :prepare_for_start, 2)} " <>
118 |                   "to return {child_specs, options}, got: #{inspect(other)}"
119 |       end
120 |     else
121 |       {[], opts}
122 |     end
123 |   end
124 | 
125 |   defp start_supervisor(child_specs, config, opts) do
126 |     {producers_names, producers_specs} = build_producers_specs(config, opts)
127 |     {processors_names, processors_specs} = build_processors_specs(config, producers_names)
128 | 
129 |     children =
130 |       [
131 |         build_rate_limiter_spec(config, producers_names),
132 |         build_producer_supervisor_spec(config, producers_specs),
133 |         build_processor_supervisor_spec(config, processors_specs)
134 |       ] ++
135 |         build_batchers_supervisor_and_terminator_specs(config, producers_names, processors_names)
136 | 
137 |     supervisor_opts = [
138 |       name: process_name(config, "Supervisor"),
139 |       max_restarts: config.max_restarts,
140 |       max_seconds: config.max_seconds,
141 |       strategy: :rest_for_one
142 |     ]
143 | 
144 |     Supervisor.start_link(child_specs ++ children, supervisor_opts)
145 |   end
146 | 
147 |   defp init_config(module, opts) do
148 |     %{
149 |       name: opts[:name],
150 |       module: module,
151 |       producer_config: opts[:producer],
152 |       processors_config: init_processors_config(opts[:processors]),
153 |       batchers_config: opts[:batchers],
154 |       context: opts[:context],
155 |       max_restarts: opts[:max_restarts],
156 |       max_seconds: opts[:max_seconds],
157 |       shutdown: opts[:shutdown],
158 |       resubscribe_interval: opts[:resubscribe_interval],
159 |       terminator: nil,
160 |       rate_limiter: nil
161 |     }
162 |     |> put_terminator()
163 |     |> put_rate_limiter(opts)
164 |   end
165 | 
166 |   defp put_terminator(config) do
167 |     Map.put(config, :terminator, process_name(config, "Terminator"))
168 |   end
169 | 
170 |   defp put_rate_limiter(config, opts) do
171 |     if opts[:producer][:rate_limiting] do
172 |       Map.put(config, :rate_limiter, process_name(config, "RateLimiter"))
173 |     else
174 |       config
175 |     end
176 |   end
177 | 
178 |   defp init_processors_config(config) do
179 |     Enum.map(config, fn {key, opts} ->
180 |       {key, Keyword.put_new(opts, :concurrency, System.schedulers_online() * 2)}
181 |     end)
182 |   end
183 | 
184 |   defp emit_init_event(user_config, supervisor_pid) do
185 |     measurements = %{system_time: System.monotonic_time()}
186 | 
187 |     metadata = %{
188 |       config: user_config,
189 |       supervisor_pid: supervisor_pid
190 |     }
191 | 
192 |     :telemetry.execute([:broadway, :topology, :init], measurements, metadata)
193 |   end
194 | 
195 |   defp start_options(name, config) do
196 |     [name: name] ++ Keyword.take(config, [:spawn_opt, :hibernate_after])
197 |   end
198 | 
199 |   defp build_rate_limiter_spec(config, producers_names) do
200 |     %{producer_config: producer_config} = config
201 | 
202 |     opts = [
203 |       name: process_name(config, "RateLimiter"),
204 |       rate_limiting: producer_config[:rate_limiting],
205 |       producers_names: producers_names
206 |     ]
207 | 
208 |     {RateLimiter, opts}
209 |   end
210 | 
211 |   defp build_producers_specs(config, opts) do
212 |     %{
213 |       producer_config: producer_config,
214 |       processors_config: processors_config,
215 |       shutdown: shutdown,
216 |       rate_limiter: rate_limiter
217 |     } = config
218 | 
219 |     n_producers = producer_config[:concurrency]
220 |     [{_, processor_config} | _other_processors] = processors_config
221 | 
222 |     # The partition of the producer depends on the processor, so we handle it here.
223 |     dispatcher =
224 |       case processor_config[:partition_by] do
225 |         nil ->
226 |           {GenStage.DemandDispatcher, shuffle_demands_on_first_dispatch: true}
227 | 
228 |         func ->
229 |           n_processors = processor_config[:concurrency]
230 |           hash_func = fn msg -> {msg, rem(func.(msg), n_processors)} end
231 |           {GenStage.PartitionDispatcher, partitions: 0..(n_processors - 1), hash: hash_func}
232 |       end
233 | 
234 |     args = [broadway: opts, dispatcher: dispatcher, rate_limiter: rate_limiter] ++ producer_config
235 | 
236 |     names_and_specs =
237 |       for index <- 0..(n_producers - 1) do
238 |         name = process_name(config, "Producer", index)
239 |         start_options = start_options(name, producer_config)
240 | 
241 |         spec = %{
242 |           start: {ProducerStage, :start_link, [args, index, start_options]},
243 |           id: name,
244 |           shutdown: shutdown
245 |         }
246 | 
247 |         {name, spec}
248 |       end
249 | 
250 |     # We want to return {names, specs} here.
251 |     Enum.unzip(names_and_specs)
252 |   end
253 | 
254 |   defp build_processors_specs(config, producers) do
255 |     %{
256 |       name: topology_name,
257 |       module: module,
258 |       processors_config: processors_config,
259 |       context: context,
260 |       batchers_config: batchers_config,
261 |       resubscribe_interval: resubscribe_interval,
262 |       terminator: terminator,
263 |       shutdown: shutdown,
264 |       producer_config: producer_config
265 |     } = config
266 | 
267 |     [{key, processor_config} | other_processors] = processors_config
268 | 
269 |     if other_processors != [] do
270 |       raise "Only one set of processors is allowed for now"
271 |     end
272 | 
273 |     names = process_names(config, "Processor_#{key}", processor_config)
274 | 
275 |     # The partition of the processor depends on the next processor or the batcher,
276 |     # so we handle it here.
277 |     {type, dispatcher, batchers} =
278 |       case Keyword.keys(batchers_config) do
279 |         [] ->
280 |           {:consumer, nil, :none}
281 | 
282 |         [_] = batchers ->
283 |           {:producer_consumer,
284 |            {GenStage.DemandDispatcher, shuffle_demands_on_first_dispatch: true}, batchers}
285 | 
286 |         [_ | _] = batchers ->
287 |           {:producer_consumer,
288 |            {GenStage.PartitionDispatcher, partitions: batchers, hash: &{&1, &1.batcher}},
289 |            batchers}
290 |       end
291 | 
292 |     args = [
293 |       topology_name: topology_name,
294 |       type: type,
295 |       resubscribe: resubscribe_interval,
296 |       terminator: terminator,
297 |       module: module,
298 |       context: context,
299 |       dispatcher: dispatcher,
300 |       processor_key: key,
301 |       processor_config: processor_config,
302 |       producers: producers,
303 |       producer: producer_config[:module],
304 |       batchers: batchers
305 |     ]
306 | 
307 |     specs =
308 |       for {name, index} <- Enum.with_index(names) do
309 |         start_options = start_options(name, processor_config)
310 |         args = [name: name, partition: index] ++ args
311 | 
312 |         %{
313 |           start: {ProcessorStage, :start_link, [args, start_options]},
314 |           id: name,
315 |           shutdown: shutdown
316 |         }
317 |       end
318 | 
319 |     {names, specs}
320 |   end
321 | 
322 |   defp build_batchers_supervisor_and_terminator_specs(config, producers_names, processors_names) do
323 |     if config.batchers_config == [] do
324 |       [build_terminator_spec(config, producers_names, processors_names, processors_names)]
325 |     else
326 |       {batch_processors_names, batcher_supervisors_specs} =
327 |         build_batcher_supervisors_specs(config, processors_names)
328 | 
329 |       [
330 |         build_batchers_supervisor_spec(config, batcher_supervisors_specs),
331 |         build_terminator_spec(config, producers_names, processors_names, batch_processors_names)
332 |       ]
333 |     end
334 |   end
335 | 
336 |   defp build_batcher_supervisors_specs(config, processors) do
337 |     names_and_specs =
338 |       for {key, _} = batcher_config <- config.batchers_config do
339 |         {batcher, batcher_spec} = build_batcher_spec(config, batcher_config, processors)
340 | 
341 |         {consumers_names, consumers_specs} =
342 |           build_batch_processors_specs(config, batcher_config, batcher)
343 | 
344 |         children = [
345 |           batcher_spec,
346 |           build_batch_processor_supervisor_spec(config, consumers_specs, key)
347 |         ]
348 | 
349 |         {consumers_names, build_batcher_supervisor_spec(config, children, key)}
350 |       end
351 | 
352 |     {names, specs} = Enum.unzip(names_and_specs)
353 |     {Enum.concat(names), specs}
354 |   end
355 | 
356 |   defp build_batcher_spec(config, batcher_config, processors) do
357 |     %{terminator: terminator, shutdown: shutdown} = config
358 |     {key, options} = batcher_config
359 |     name = process_name(config, "Batcher", key)
360 | 
361 |     args =
362 |       [
363 |         topology_name: config.name,
364 |         name: name,
365 |         resubscribe: :never,
366 |         terminator: terminator,
367 |         batcher: key,
368 |         partition: key,
369 |         processors: processors,
370 |         context: config[:context],
371 |         # Partitioning is handled inside the batcher since the batcher
372 |         # needs to associate the partition with the batcher key.
373 |         partition_by: options[:partition_by],
374 |         concurrency: options[:concurrency]
375 |       ] ++ options
376 | 
377 |     opts = start_options(name, options)
378 | 
379 |     spec = %{
380 |       start: {BatcherStage, :start_link, [args, opts]},
381 |       id: name,
382 |       shutdown: shutdown
383 |     }
384 | 
385 |     {name, spec}
386 |   end
387 | 
388 |   defp build_batch_processors_specs(config, {key, batcher_config}, batcher) do
389 |     %{
390 |       name: broadway_name,
391 |       module: module,
392 |       context: context,
393 |       terminator: terminator,
394 |       shutdown: shutdown,
395 |       producer_config: producer_config
396 |     } = config
397 | 
398 |     names = process_names(config, "BatchProcessor_#{key}", batcher_config)
399 | 
400 |     args = [
401 |       topology_name: broadway_name,
402 |       resubscribe: :never,
403 |       terminator: terminator,
404 |       module: module,
405 |       context: context,
406 |       batcher: batcher,
407 |       producer: producer_config[:module]
408 |     ]
409 | 
410 |     specs =
411 |       for {name, index} <- Enum.with_index(names) do
412 |         start_options = start_options(name, batcher_config)
413 | 
414 |         %{
415 |           start:
416 |             {BatchProcessorStage, :start_link,
417 |              [[name: name, partition: index] ++ args, start_options]},
418 |           id: name,
419 |           shutdown: shutdown
420 |         }
421 |       end
422 | 
423 |     {names, specs}
424 |   end
425 | 
426 |   defp build_terminator_spec(config, producers, first, last) do
427 |     %{
428 |       terminator: name,
429 |       shutdown: shutdown
430 |     } = config
431 | 
432 |     args = [
433 |       producers: producers,
434 |       first: first,
435 |       last: last
436 |     ]
437 | 
438 |     start_options = [name: name]
439 | 
440 |     %{
441 |       start: {Terminator, :start_link, [args, start_options]},
442 |       id: name,
443 |       shutdown: shutdown
444 |     }
445 |   end
446 | 
447 |   defp build_topology_details(config) do
448 |     [
449 |       producers: [
450 |         %{
451 |           name: process_name(config, "Producer"),
452 |           concurrency: config.producer_config[:concurrency]
453 |         }
454 |       ],
455 |       processors:
456 |         Enum.map(config.processors_config, fn {name, processor_config} ->
457 |           %{
458 |             name: process_name(config, "Processor", name),
459 |             processor_key: name,
460 |             concurrency: processor_config[:concurrency]
461 |           }
462 |         end),
463 |       batchers:
464 |         Enum.map(config.batchers_config, fn {name, batcher_config} ->
465 |           %{
466 |             batcher_name: process_name(config, "Batcher", name),
467 |             batcher_key: name,
468 |             name: process_name(config, "BatchProcessor", name),
469 |             concurrency: batcher_config[:concurrency]
470 |           }
471 |         end)
472 |     ]
473 |   end
474 | 
475 |   defp process_name(config, base_name, suffix) do
476 |     process_name(config, "#{base_name}_#{suffix}")
477 |   end
478 | 
479 |   defp process_name(%{module: module, name: broadway_name} = _config, base_name) do
480 |     if function_exported?(module, :process_name, 2) do
481 |       module.process_name(broadway_name, base_name)
482 |     else
483 |       default_process_name(broadway_name, base_name)
484 |     end
485 |   end
486 | 
487 |   defp default_process_name(broadway_name, base_name) when is_atom(broadway_name) do
488 |     :"#{broadway_name}.Broadway.#{base_name}"
489 |   end
490 | 
491 |   defp default_process_name(broadway_name, _base_name) do
492 |     raise ArgumentError, """
493 |     expected Broadway to be started with an atom :name, got: #{inspect(broadway_name)}
494 | 
495 |     If starting Broadway with a :name that is not an atom, you must define the \
496 |     process_name/2 callback in the module which calls "use Broadway" (see the documentation).
497 |     """
498 |   end
499 | 
500 |   defp process_names(config, type, processor_config) do
501 |     for index <- 0..(processor_config[:concurrency] - 1) do
502 |       process_name(config, type, index)
503 |     end
504 |   end
505 | 
506 |   defp build_producer_supervisor_spec(config, children) do
507 |     name = process_name(config, "ProducerSupervisor")
508 |     children_count = length(children)
509 | 
510 |     # TODO: Allow max_restarts and max_seconds as configuration
511 |     # options as well as shutdown and restart for each child.
512 |     build_supervisor_spec(children, name,
513 |       strategy: :one_for_one,
514 |       max_restarts: 2 * children_count,
515 |       max_seconds: children_count
516 |     )
517 |   end
518 | 
519 |   defp build_processor_supervisor_spec(config, children) do
520 |     build_supervisor_spec(
521 |       children,
522 |       process_name(config, "ProcessorSupervisor"),
523 |       strategy: :one_for_all,
524 |       max_restarts: 0
525 |     )
526 |   end
527 | 
528 |   defp build_batchers_supervisor_spec(config, children) do
529 |     children_count = length(children)
530 | 
531 |     build_supervisor_spec(
532 |       children,
533 |       process_name(config, "BatchersSupervisor"),
534 |       strategy: :one_for_one,
535 |       max_restarts: 2 * children_count,
536 |       max_seconds: children_count
537 |     )
538 |   end
539 | 
540 |   defp build_batcher_supervisor_spec(config, children, key) do
541 |     build_supervisor_spec(
542 |       children,
543 |       process_name(config, "BatcherSupervisor", key),
544 |       strategy: :rest_for_one,
545 |       max_restarts: 4,
546 |       max_seconds: 2
547 |     )
548 |   end
549 | 
550 |   defp build_batch_processor_supervisor_spec(config, children, key) do
551 |     build_supervisor_spec(
552 |       children,
553 |       process_name(config, "BatchProcessorSupervisor", key),
554 |       strategy: :one_for_all,
555 |       max_restarts: 0
556 |     )
557 |   end
558 | 
559 |   defp build_supervisor_spec(children, name, opts) do
560 |     %{
561 |       id: make_ref(),
562 |       start: {Supervisor, :start_link, [children, [name: name] ++ opts]},
563 |       type: :supervisor
564 |     }
565 |   end
566 | end
567 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/batch_processor_stage.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.BatchProcessorStage do
  2 |   @moduledoc false
  3 |   use GenStage
  4 |   require Logger
  5 |   alias Broadway.{Acknowledger, Message}
  6 |   @subscription_options [max_demand: 1, min_demand: 0]
  7 | 
  8 |   @spec start_link(term, GenServer.options()) :: GenServer.on_start()
  9 |   def start_link(args, stage_options) do
 10 |     Broadway.Topology.Subscriber.start_link(
 11 |       __MODULE__,
 12 |       [args[:batcher]],
 13 |       args,
 14 |       @subscription_options,
 15 |       stage_options
 16 |     )
 17 |   end
 18 | 
 19 |   @impl true
 20 |   def init(args) do
 21 |     Process.flag(:trap_exit, true)
 22 | 
 23 |     state = %{
 24 |       topology_name: args[:topology_name],
 25 |       name: args[:name],
 26 |       partition: args[:partition],
 27 |       module: args[:module],
 28 |       context: args[:context],
 29 |       producer: args[:producer]
 30 |     }
 31 | 
 32 |     {:consumer, state, []}
 33 |   end
 34 | 
 35 |   @impl true
 36 |   def handle_info({:EXIT, pid, reason}, state) when reason not in [:normal, :shutdown] do
 37 |     Logger.error(
 38 |       "Batch processor received a trapped exit from #{inspect(pid)} with reason: " <>
 39 |         Exception.format_exit(reason)
 40 |     )
 41 | 
 42 |     {:noreply, [], state}
 43 |   end
 44 | 
 45 |   def handle_info(_msg, state) do
 46 |     {:noreply, [], state}
 47 |   end
 48 | 
 49 |   @impl true
 50 |   def handle_events(events, _from, state) do
 51 |     [{messages, batch_info}] = events
 52 |     %Broadway.BatchInfo{batcher: batcher, size: size} = batch_info
 53 | 
 54 |     :telemetry.span(
 55 |       [:broadway, :batch_processor],
 56 |       %{
 57 |         topology_name: state.topology_name,
 58 |         name: state.name,
 59 |         index: state.partition,
 60 |         messages: messages,
 61 |         batch_info: batch_info,
 62 |         context: state.context,
 63 |         producer: state.producer
 64 |       },
 65 |       fn ->
 66 |         {successful_messages, failed_messages, returned} =
 67 |           handle_batch(batcher, messages, batch_info, state)
 68 | 
 69 |         failed_messages =
 70 |           Acknowledger.maybe_handle_failed_messages(
 71 |             failed_messages,
 72 |             state.module,
 73 |             state.context
 74 |           )
 75 | 
 76 |         if returned != size do
 77 |           Logger.error(
 78 |             "#{inspect(state.module)}.handle_batch/4 received #{size} messages and " <>
 79 |               "returned only #{returned}. All messages given to handle_batch/4 " <>
 80 |               "must be returned"
 81 |           )
 82 |         end
 83 | 
 84 |         try do
 85 |           Acknowledger.ack_messages(successful_messages, failed_messages)
 86 |         catch
 87 |           kind, reason ->
 88 |             Logger.error(Exception.format(kind, reason, __STACKTRACE__),
 89 |               crash_reason: Acknowledger.crash_reason(kind, reason, __STACKTRACE__)
 90 |             )
 91 |         end
 92 | 
 93 |         {{:noreply, [], state},
 94 |          %{
 95 |            topology_name: state.topology_name,
 96 |            name: state.name,
 97 |            index: state.partition,
 98 |            successful_messages: successful_messages,
 99 |            failed_messages: failed_messages,
100 |            batch_info: batch_info,
101 |            context: state.context,
102 |            producer: state.producer
103 |          }}
104 |       end
105 |     )
106 |   end
107 | 
108 |   defp handle_batch(batcher, messages, batch_info, state) do
109 |     %{module: module, context: context} = state
110 | 
111 |     try do
112 |       module.handle_batch(batcher, messages, batch_info, context)
113 |       |> split_by_status([], [], 0)
114 |     catch
115 |       kind, reason ->
116 |         reason = Exception.normalize(kind, reason, __STACKTRACE__)
117 | 
118 |         Logger.error(Exception.format(kind, reason, __STACKTRACE__),
119 |           crash_reason: Acknowledger.crash_reason(kind, reason, __STACKTRACE__)
120 |         )
121 | 
122 |         messages = Enum.map(messages, &%{&1 | status: {kind, reason, __STACKTRACE__}})
123 |         {[], messages, batch_info.size}
124 |     end
125 |   end
126 | 
127 |   defp split_by_status([], successful, failed, count) do
128 |     {Enum.reverse(successful), Enum.reverse(failed), count}
129 |   end
130 | 
131 |   defp split_by_status([%Message{status: :ok} = message | rest], successful, failed, count) do
132 |     split_by_status(rest, [message | successful], failed, count + 1)
133 |   end
134 | 
135 |   defp split_by_status([%Message{} = message | rest], successful, failed, count) do
136 |     split_by_status(rest, successful, [message | failed], count + 1)
137 |   end
138 | 
139 |   defp split_by_status([other | _rest], _successful, _failed, _count) do
140 |     raise "handle_batch/4 must return a list of %Broadway.Message{} structs, " <>
141 |             "but one element was: #{inspect(other)}"
142 |   end
143 | 
144 |   defp split_by_status(other, _successful, _failed, _count) do
145 |     raise "handle_batch/4 must return a list of %Broadway.Message{} structs, got: #{inspect(other)}"
146 |   end
147 | end
148 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/batcher_stage.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.BatcherStage do
  2 |   @moduledoc false
  3 |   use GenStage
  4 |   alias Broadway.BatchInfo
  5 | 
  6 |   @all_batches __MODULE__.All
  7 | 
  8 |   @spec start_link(term, GenServer.options()) :: GenServer.on_start()
  9 |   def start_link(args, stage_options) do
 10 |     Broadway.Topology.Subscriber.start_link(
 11 |       __MODULE__,
 12 |       args[:processors],
 13 |       args,
 14 |       [max_demand: args[:max_demand] || args[:batch_size]],
 15 |       stage_options
 16 |     )
 17 |   end
 18 | 
 19 |   @impl true
 20 |   def init(args) do
 21 |     Process.put(@all_batches, %{})
 22 | 
 23 |     {dispatcher, partition_by} =
 24 |       case args[:partition_by] do
 25 |         nil ->
 26 |           {GenStage.DemandDispatcher, nil}
 27 | 
 28 |         func ->
 29 |           concurrency = args[:concurrency]
 30 |           hash_fun = fn {_, %{partition: partition}} = payload -> {payload, partition} end
 31 | 
 32 |           dispatcher =
 33 |             {GenStage.PartitionDispatcher, partitions: 0..(concurrency - 1), hash: hash_fun}
 34 | 
 35 |           {dispatcher, fn msg -> rem(func.(msg), concurrency) end}
 36 |       end
 37 | 
 38 |     state = %{
 39 |       topology_name: args[:topology_name],
 40 |       name: args[:name],
 41 |       batcher: args[:batcher],
 42 |       batch_size: args[:batch_size],
 43 |       batch_timeout: args[:batch_timeout],
 44 |       partition_by: partition_by,
 45 |       context: args[:context]
 46 |     }
 47 | 
 48 |     {:producer_consumer, state, dispatcher: dispatcher}
 49 |   end
 50 | 
 51 |   @impl true
 52 |   def handle_events(events, _from, state) do
 53 |     batches =
 54 |       :telemetry.span(
 55 |         [:broadway, :batcher],
 56 |         %{
 57 |           topology_name: state.topology_name,
 58 |           name: state.name,
 59 |           batcher_key: state.batcher,
 60 |           messages: events,
 61 |           context: state.context
 62 |         },
 63 |         fn ->
 64 |           {handle_events_per_batch_key(events, [], state),
 65 |            %{
 66 |              topology_name: state.topology_name,
 67 |              name: state.name,
 68 |              batcher_key: state.batcher,
 69 |              context: state.context
 70 |            }}
 71 |         end
 72 |       )
 73 | 
 74 |     {:noreply, batches, state}
 75 |   end
 76 | 
 77 |   @impl true
 78 |   def handle_info({:timeout, _timer, ref}, state) do
 79 |     case all_batches() do
 80 |       %{^ref => batch_key} ->
 81 |         {current, _, _, _, _} = delete_batch(batch_key, ref)
 82 |         {:noreply, [wrap_for_delivery(batch_key, current, :timeout, state)], state}
 83 | 
 84 |       %{} ->
 85 |         {:noreply, [], state}
 86 |     end
 87 |   end
 88 | 
 89 |   def handle_info(:cancel_consumers, state) do
 90 |     events =
 91 |       for {ref, batch_key} <- all_batches() do
 92 |         {current, _, _, timer, _} = delete_batch(batch_key, ref)
 93 |         cancel_batch_timeout(timer)
 94 |         wrap_for_delivery(batch_key, current, :flush, state)
 95 |       end
 96 | 
 97 |     {:noreply, events, state}
 98 |   end
 99 | 
100 |   def handle_info(_msg, state) do
101 |     {:noreply, [], state}
102 |   end
103 | 
104 |   ## Default batch handling
105 | 
106 |   defp handle_events_per_batch_key([], acc, _state) do
107 |     Enum.reverse(acc)
108 |   end
109 | 
110 |   defp handle_events_per_batch_key([event | _] = events, acc, state) do
111 |     %{partition_by: partition_by} = state
112 |     batch_key = batch_key(event, partition_by)
113 |     {current, batch_state, batch_splitter, timer, ref} = init_or_get_batch(batch_key, state)
114 | 
115 |     {current, batch_state, events, flush} =
116 |       split_counting(
117 |         batch_key,
118 |         events,
119 |         batch_state,
120 |         batch_splitter,
121 |         nil,
122 |         current,
123 |         partition_by
124 |       )
125 | 
126 |     acc =
127 |       if flush do
128 |         deliver_batch(batch_key, current, flush, timer, ref, acc, state)
129 |       else
130 |         put_batch(batch_key, {current, batch_state, batch_splitter, timer, ref})
131 |         acc
132 |       end
133 | 
134 |     handle_events_per_batch_key(events, acc, state)
135 |   end
136 | 
137 |   defp split_counting(_batch_key, [], count, _batch_splitter, flush?, acc, _partition_by) do
138 |     {acc, count, [], flush?}
139 |   end
140 | 
141 |   defp split_counting(
142 |          batch_key,
143 |          [event | remained] = events,
144 |          batch_state,
145 |          batch_splitter,
146 |          flush,
147 |          acc,
148 |          partition_by
149 |        ) do
150 |     event_batch_key = batch_key(event, partition_by)
151 | 
152 |     # Switch to a different batch key
153 |     if event_batch_key != batch_key do
154 |       {acc, batch_state, events, flush}
155 |     else
156 |       case batch_splitter.(event, batch_state) do
157 |         # Batch splitter indicates a full batch
158 |         {:emit, next_state} ->
159 |           {[event | acc], next_state, remained, :size}
160 | 
161 |         # Same batch key but not fulfill one batch size yet
162 |         {:cont, next_state} ->
163 |           split_counting(
164 |             batch_key,
165 |             remained,
166 |             next_state,
167 |             batch_splitter,
168 |             flush || flush_batch(event),
169 |             [event | acc],
170 |             partition_by
171 |           )
172 |       end
173 |     end
174 |   end
175 | 
176 |   defp flush_batch(%{batch_mode: :flush}), do: :flush
177 |   defp flush_batch(%{}), do: nil
178 | 
179 |   defp deliver_batch(batch_key, current, trigger, timer, ref, acc, state) do
180 |     delete_batch(batch_key, ref)
181 |     cancel_batch_timeout(timer)
182 |     [wrap_for_delivery(batch_key, current, trigger, state) | acc]
183 |   end
184 | 
185 |   ## General batch handling
186 | 
187 |   @compile {:inline, batch_key: 2}
188 | 
189 |   defp batch_key(%{batch_key: batch_key}, nil),
190 |     do: batch_key
191 | 
192 |   defp batch_key(%{batch_key: batch_key} = event, partition_by),
193 |     do: [batch_key | partition_by.(event)]
194 | 
195 |   defp init_or_get_batch(batch_key, state) do
196 |     if batch = Process.get(batch_key) do
197 |       batch
198 |     else
199 |       %{batch_size: batch_size, batch_timeout: batch_timeout} = state
200 | 
201 |       {batch_state, batch_splitter} = get_batch_splitter(batch_size)
202 |       {timer, ref} = schedule_batch_timeout(batch_timeout)
203 |       update_all_batches(&Map.put(&1, ref, batch_key))
204 |       {[], batch_state, batch_splitter, timer, ref}
205 |     end
206 |   end
207 | 
208 |   defp get_batch_splitter(batch_size) do
209 |     if is_number(batch_size) do
210 |       {batch_size,
211 |        fn
212 |          _message, 1 -> {:emit, batch_size}
213 |          _message, count -> {:cont, count - 1}
214 |        end}
215 |     else
216 |       # Customized batch splitter with initial state and function
217 |       batch_size
218 |     end
219 |   end
220 | 
221 |   defp put_batch(batch_key, {_, _, _, _, _} = batch) do
222 |     Process.put(batch_key, batch)
223 |   end
224 | 
225 |   defp delete_batch(batch_key, ref) do
226 |     update_all_batches(&Map.delete(&1, ref))
227 |     Process.delete(batch_key)
228 |   end
229 | 
230 |   defp all_batches do
231 |     Process.get(@all_batches)
232 |   end
233 | 
234 |   defp update_all_batches(fun) do
235 |     Process.put(@all_batches, fun.(Process.get(@all_batches)))
236 |   end
237 | 
238 |   defp schedule_batch_timeout(batch_timeout) do
239 |     ref = make_ref()
240 |     {:erlang.start_timer(batch_timeout, self(), ref), ref}
241 |   end
242 | 
243 |   defp cancel_batch_timeout(timer) do
244 |     case :erlang.cancel_timer(timer) do
245 |       false ->
246 |         receive do
247 |           {:timeout, ^timer, _} -> :ok
248 |         after
249 |           0 -> :ok
250 |         end
251 | 
252 |       _ ->
253 |         :ok
254 |     end
255 |   end
256 | 
257 |   defp wrap_for_delivery(batch_key, reversed_events, trigger, %{partition_by: nil} = state) do
258 |     wrap_for_delivery(batch_key, nil, reversed_events, trigger, state)
259 |   end
260 | 
261 |   defp wrap_for_delivery([batch_key | partition], reversed_events, trigger, state) do
262 |     wrap_for_delivery(batch_key, partition, reversed_events, trigger, state)
263 |   end
264 | 
265 |   defp wrap_for_delivery(batch_key, partition, reversed_events, trigger, state) do
266 |     %{batcher: batcher} = state
267 | 
268 |     batch_info = %BatchInfo{
269 |       batcher: batcher,
270 |       batch_key: batch_key,
271 |       partition: partition,
272 |       size: length(reversed_events),
273 |       trigger: trigger
274 |     }
275 | 
276 |     {Enum.reverse(reversed_events), batch_info}
277 |   end
278 | end
279 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/processor_stage.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.ProcessorStage do
  2 |   @moduledoc false
  3 |   use GenStage
  4 | 
  5 |   require Logger
  6 |   alias Broadway.{Message, Acknowledger}
  7 | 
  8 |   @spec start_link(term, GenServer.options()) :: GenServer.on_start()
  9 |   def start_link(args, stage_options) do
 10 |     Broadway.Topology.Subscriber.start_link(
 11 |       __MODULE__,
 12 |       args[:producers],
 13 |       args,
 14 |       Keyword.take(args[:processor_config], [:min_demand, :max_demand]),
 15 |       stage_options
 16 |     )
 17 |   end
 18 | 
 19 |   @impl true
 20 |   def init(args) do
 21 |     Process.flag(:trap_exit, true)
 22 |     type = args[:type]
 23 | 
 24 |     state = %{
 25 |       topology_name: args[:topology_name],
 26 |       name: args[:name],
 27 |       partition: args[:partition],
 28 |       type: type,
 29 |       module: args[:module],
 30 |       context: args[:context],
 31 |       processor_key: args[:processor_key],
 32 |       batchers: args[:batchers],
 33 |       producer: args[:producer]
 34 |     }
 35 | 
 36 |     case type do
 37 |       :consumer ->
 38 |         {:consumer, state, []}
 39 | 
 40 |       :producer_consumer ->
 41 |         {:producer_consumer, state, dispatcher: args[:dispatcher]}
 42 |     end
 43 |   end
 44 | 
 45 |   @impl true
 46 |   def handle_info({:EXIT, pid, reason}, state) when reason not in [:normal, :shutdown] do
 47 |     Logger.error(
 48 |       "Processor received a trapped exit from #{inspect(pid)} with reason: " <>
 49 |         Exception.format_exit(reason)
 50 |     )
 51 | 
 52 |     {:noreply, [], state}
 53 |   end
 54 | 
 55 |   def handle_info(_msg, state) do
 56 |     {:noreply, [], state}
 57 |   end
 58 | 
 59 |   @impl true
 60 |   def handle_events(messages, _from, state) do
 61 |     :telemetry.span(
 62 |       [:broadway, :processor],
 63 |       %{
 64 |         topology_name: state.topology_name,
 65 |         name: state.name,
 66 |         index: state.partition,
 67 |         processor_key: state.processor_key,
 68 |         messages: messages,
 69 |         context: state.context,
 70 |         producer: state.producer
 71 |       },
 72 |       fn ->
 73 |         {prepared_messages, prepared_failed_messages} = maybe_prepare_messages(messages, state)
 74 |         {successful_messages, failed_messages} = handle_messages(prepared_messages, [], [], state)
 75 |         failed_messages = prepared_failed_messages ++ failed_messages
 76 | 
 77 |         {successful_messages_to_forward, successful_messages_to_ack} =
 78 |           case state do
 79 |             %{type: :consumer} ->
 80 |               {[], successful_messages}
 81 | 
 82 |             %{} ->
 83 |               {successful_messages, []}
 84 |           end
 85 | 
 86 |         failed_messages =
 87 |           Acknowledger.maybe_handle_failed_messages(
 88 |             failed_messages,
 89 |             state.module,
 90 |             state.context
 91 |           )
 92 | 
 93 |         try do
 94 |           Acknowledger.ack_messages(successful_messages_to_ack, failed_messages)
 95 |         catch
 96 |           kind, reason ->
 97 |             Logger.error(Exception.format(kind, reason, __STACKTRACE__),
 98 |               crash_reason: Acknowledger.crash_reason(kind, reason, __STACKTRACE__)
 99 |             )
100 |         end
101 | 
102 |         {{:noreply, successful_messages_to_forward, state},
103 |          %{
104 |            topology_name: state.topology_name,
105 |            name: state.name,
106 |            index: state.partition,
107 |            successful_messages_to_ack: successful_messages_to_ack,
108 |            successful_messages_to_forward: successful_messages_to_forward,
109 |            processor_key: state.processor_key,
110 |            failed_messages: failed_messages,
111 |            context: state.context,
112 |            producer: state.producer
113 |          }}
114 |       end
115 |     )
116 |   end
117 | 
118 |   defp maybe_prepare_messages(messages, state) do
119 |     %{module: module, context: context} = state
120 | 
121 |     if function_exported?(module, :prepare_messages, 2) do
122 |       try do
123 |         prepared_messages =
124 |           messages
125 |           |> module.prepare_messages(context)
126 |           |> validate_prepared_messages(messages)
127 | 
128 |         {prepared_messages, []}
129 |       catch
130 |         kind, reason ->
131 |           reason = Exception.normalize(kind, reason, __STACKTRACE__)
132 | 
133 |           Logger.error(Exception.format(kind, reason, __STACKTRACE__),
134 |             crash_reason: Acknowledger.crash_reason(kind, reason, __STACKTRACE__)
135 |           )
136 | 
137 |           messages = Enum.map(messages, &%{&1 | status: {kind, reason, __STACKTRACE__}})
138 |           {[], messages}
139 |       end
140 |     else
141 |       {messages, []}
142 |     end
143 |   end
144 | 
145 |   defp handle_messages([message | messages], successful, failed, state) do
146 |     %{
147 |       module: module,
148 |       context: context,
149 |       processor_key: processor_key,
150 |       batchers: batchers
151 |     } = state
152 | 
153 |     {successful, failed} =
154 |       try do
155 |         :telemetry.span(
156 |           [:broadway, :processor, :message],
157 |           %{
158 |             processor_key: state.processor_key,
159 |             topology_name: state.topology_name,
160 |             index: state.partition,
161 |             name: state.name,
162 |             message: message,
163 |             context: state.context
164 |           },
165 |           fn ->
166 |             updated_message =
167 |               processor_key
168 |               |> module.handle_message(message, context)
169 |               |> validate_message(batchers)
170 | 
171 |             {updated_message,
172 |              %{
173 |                processor_key: state.processor_key,
174 |                topology_name: state.topology_name,
175 |                index: state.partition,
176 |                name: state.name,
177 |                message: updated_message,
178 |                context: state.context
179 |              }}
180 |           end
181 |         )
182 |       catch
183 |         kind, reason ->
184 |           reason = Exception.normalize(kind, reason, __STACKTRACE__)
185 | 
186 |           Logger.error(Exception.format(kind, reason, __STACKTRACE__),
187 |             crash_reason: Acknowledger.crash_reason(kind, reason, __STACKTRACE__)
188 |           )
189 | 
190 |           message = %{message | status: {kind, reason, __STACKTRACE__}}
191 |           {successful, [message | failed]}
192 |       else
193 |         %{status: :ok} = message ->
194 |           {[message | successful], failed}
195 | 
196 |         %{status: {:failed, _}} = message ->
197 |           {successful, [message | failed]}
198 |       end
199 | 
200 |     handle_messages(messages, successful, failed, state)
201 |   end
202 | 
203 |   defp handle_messages([], successful, failed, _state) do
204 |     {Enum.reverse(successful), Enum.reverse(failed)}
205 |   end
206 | 
207 |   defp validate_message(%Message{batcher: batcher, status: status} = message, batchers) do
208 |     if status == :ok and batchers != :none and batcher not in batchers do
209 |       raise "message was set to unknown batcher #{inspect(batcher)}. " <>
210 |               "The known batchers are #{inspect(batchers)}"
211 |     end
212 | 
213 |     message
214 |   end
215 | 
216 |   defp validate_message(message, _batchers) do
217 |     raise "expected a Broadway.Message from handle_message/3, got #{inspect(message)}"
218 |   end
219 | 
220 |   defp validate_prepared_messages(prepared_messages, messages) do
221 |     if length(prepared_messages) != length(messages) do
222 |       raise "expected all messages to be returned from prepared_messages/2"
223 |     end
224 | 
225 |     prepared_messages
226 |   end
227 | end
228 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/producer_stage.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.ProducerStage do
  2 |   @moduledoc false
  3 |   use GenStage
  4 | 
  5 |   alias Broadway.Message
  6 |   alias Broadway.Topology.RateLimiter
  7 | 
  8 |   @spec start_link(term, non_neg_integer, GenServer.options()) :: GenServer.on_start()
  9 |   def start_link(args, index, opts \\ []) do
 10 |     GenStage.start_link(__MODULE__, {args, index}, opts)
 11 |   end
 12 | 
 13 |   @spec push_messages(GenServer.server(), [Message.t()]) :: :ok
 14 |   def push_messages(producer, messages) do
 15 |     GenStage.call(producer, {__MODULE__, :push_messages, messages})
 16 |   end
 17 | 
 18 |   @spec drain(GenServer.server()) :: :ok
 19 |   def drain(producer) do
 20 |     GenStage.cast(producer, {__MODULE__, :prepare_for_draining})
 21 |     GenStage.async_info(producer, {__MODULE__, :cancel_consumers})
 22 |   end
 23 | 
 24 |   @impl true
 25 |   def init({args, index}) do
 26 |     {module, arg} = args[:module]
 27 |     transformer = args[:transformer]
 28 |     dispatcher = args[:dispatcher]
 29 |     rate_limiter = args[:rate_limiter]
 30 | 
 31 |     # Inject the topology index only if the args are a keyword list.
 32 |     arg =
 33 |       if Keyword.keyword?(arg) do
 34 |         Keyword.put(arg, :broadway, Keyword.put(args[:broadway], :index, index))
 35 |       else
 36 |         arg
 37 |       end
 38 | 
 39 |     rate_limiting_state =
 40 |       if rate_limiter do
 41 |         rate_limiter_ref = RateLimiter.get_rate_limiter_ref(rate_limiter)
 42 | 
 43 |         %{
 44 |           state: :open,
 45 |           draining?: false,
 46 |           rate_limiter: rate_limiter_ref,
 47 |           # A queue of "batches" of messages that we buffered.
 48 |           message_buffer: :queue.new(),
 49 |           # A queue of demands (integers) that we buffered.
 50 |           demand_buffer: :queue.new()
 51 |         }
 52 |       else
 53 |         nil
 54 |       end
 55 | 
 56 |     state = %{
 57 |       module: module,
 58 |       module_state: nil,
 59 |       transformer: transformer,
 60 |       consumers: [],
 61 |       rate_limiting: rate_limiting_state
 62 |     }
 63 | 
 64 |     case module.init(arg) do
 65 |       {:producer, module_state} ->
 66 |         {:producer, %{state | module_state: module_state}, dispatcher: dispatcher}
 67 | 
 68 |       {:producer, module_state, options} ->
 69 |         if options[:dispatcher] && options[:dispatcher] != dispatcher do
 70 |           raise "#{inspect(module)} is setting dispatcher to #{inspect(options[:dispatcher])}, " <>
 71 |                   "which is different from dispatcher #{inspect(dispatcher)} expected by Broadway"
 72 |         end
 73 | 
 74 |         {:producer, %{state | module_state: module_state}, [dispatcher: dispatcher] ++ options}
 75 | 
 76 |       return_value ->
 77 |         {:stop, {:bad_return_value, return_value}}
 78 |     end
 79 |   end
 80 | 
 81 |   @impl true
 82 |   def handle_subscribe(:consumer, _, from, state) do
 83 |     {:automatic, update_in(state.consumers, &[from | &1])}
 84 |   end
 85 | 
 86 |   @impl true
 87 |   def handle_cancel(_, from, state) do
 88 |     {:noreply, [], update_in(state.consumers, &List.delete(&1, from))}
 89 |   end
 90 | 
 91 |   # If we're rate limited, we store the demand in the buffer instead of forwarding it.
 92 |   # We'll forward it once the rate limit is lifted.
 93 |   @impl true
 94 |   def handle_demand(demand, %{rate_limiting: %{state: :closed}} = state) do
 95 |     state = update_in(state.rate_limiting.demand_buffer, &:queue.in(demand, &1))
 96 |     {:noreply, [], state}
 97 |   end
 98 | 
 99 |   def handle_demand(demand, state) do
100 |     %{module: module, module_state: module_state} = state
101 |     handle_no_reply(module.handle_demand(demand, module_state), state)
102 |   end
103 | 
104 |   @impl true
105 |   def handle_call({__MODULE__, :push_messages, messages}, _from, state) do
106 |     {:reply, :ok, messages, state}
107 |   end
108 | 
109 |   def handle_call(message, from, state) do
110 |     %{module: module, module_state: module_state} = state
111 | 
112 |     message
113 |     |> module.handle_call(from, module_state)
114 |     |> case do
115 |       {:reply, reply, events, new_module_state} ->
116 |         messages = transform_events(events, state.transformer)
117 |         {state, messages} = maybe_rate_limit_and_buffer_messages(state, messages)
118 |         {:reply, reply, messages, %{state | module_state: new_module_state}}
119 | 
120 |       {:reply, reply, events, new_module_state, :hibernate} ->
121 |         messages = transform_events(events, state.transformer)
122 |         {state, messages} = maybe_rate_limit_and_buffer_messages(state, messages)
123 |         {:reply, reply, messages, %{state | module_state: new_module_state}, :hibernate}
124 | 
125 |       {:stop, reason, reply, new_module_state} ->
126 |         {:stop, reason, reply, %{state | module_state: new_module_state}}
127 | 
128 |       other ->
129 |         handle_no_reply(other, state)
130 |     end
131 |   end
132 | 
133 |   @impl true
134 |   def handle_cast({__MODULE__, :prepare_for_draining}, state) do
135 |     %{module: module, module_state: module_state} = state
136 | 
137 |     if function_exported?(module, :prepare_for_draining, 1) do
138 |       module_state
139 |       |> module.prepare_for_draining()
140 |       |> handle_no_reply(state)
141 |     else
142 |       {:noreply, [], state}
143 |     end
144 |   end
145 | 
146 |   def handle_cast(message, state) do
147 |     %{module: module, module_state: module_state} = state
148 | 
149 |     message
150 |     |> module.handle_cast(module_state)
151 |     |> handle_no_reply(state)
152 |   end
153 | 
154 |   @impl true
155 |   def handle_info({__MODULE__, :cancel_consumers}, %{rate_limiting: %{} = rate_limiting} = state) do
156 |     rate_limiting = %{rate_limiting | draining?: true}
157 | 
158 |     if :queue.is_empty(rate_limiting.message_buffer) do
159 |       cancel_consumers(state)
160 |     end
161 | 
162 |     {:noreply, [], %{state | rate_limiting: rate_limiting}}
163 |   end
164 | 
165 |   def handle_info({__MODULE__, :cancel_consumers}, state) do
166 |     cancel_consumers(state)
167 |     {:noreply, [], state}
168 |   end
169 | 
170 |   # Don't forward buffered demand when we're draining or when the rate limiting is closed.
171 |   def handle_info(
172 |         {__MODULE__, :handle_next_demand},
173 |         %{rate_limiting: %{draining?: draining?, state: rl_state}} = state
174 |       )
175 |       when draining? or rl_state == :closed do
176 |     {:noreply, [], state}
177 |   end
178 | 
179 |   def handle_info({__MODULE__, :handle_next_demand}, state) do
180 |     case get_and_update_in(state.rate_limiting.demand_buffer, &:queue.out/1) do
181 |       {{:value, demand}, state} ->
182 |         case handle_demand(demand, state) do
183 |           {:noreply, messages, state} ->
184 |             schedule_next_handle_demand_if_any(state)
185 |             {:noreply, messages, state}
186 | 
187 |           {:noreply, messages, state, :hibernate} ->
188 |             schedule_next_handle_demand_if_any(state)
189 |             {:noreply, messages, state, :hibernate}
190 | 
191 |           {:stop, reason, state} ->
192 |             {:stop, reason, state}
193 |         end
194 | 
195 |       {:empty, state} ->
196 |         {:noreply, [], state}
197 |     end
198 |   end
199 | 
200 |   # If the rate limit is lifted but our rate limiting state was "open",
201 |   # we don't need to do anything since we don't have anything in the buffer.
202 |   def handle_info({RateLimiter, :reset_rate_limiting}, %{rate_limiting: %{state: :open}} = state) do
203 |     {:noreply, [], state}
204 |   end
205 | 
206 |   def handle_info({RateLimiter, :reset_rate_limiting}, state) do
207 |     state = put_in(state.rate_limiting.state, :open)
208 | 
209 |     {state, messages} = rate_limit_and_buffer_messages(state)
210 | 
211 |     # We'll schedule to handle the buffered demand regardless of
212 |     # the state of rate limiting. We'll check if we can forward it
213 |     # when handling the message.
214 |     schedule_next_handle_demand_if_any(state)
215 | 
216 |     {:noreply, messages, state}
217 |   end
218 | 
219 |   def handle_info(message, state) do
220 |     %{module: module, module_state: module_state} = state
221 | 
222 |     message
223 |     |> module.handle_info(module_state)
224 |     |> handle_no_reply(state)
225 |   end
226 | 
227 |   @impl true
228 |   def format_discarded(discarded, state) do
229 |     %{module: module, module_state: module_state} = state
230 | 
231 |     if function_exported?(module, :format_discarded, 2) do
232 |       module.format_discarded(discarded, module_state)
233 |     else
234 |       true
235 |     end
236 |   end
237 | 
238 |   @impl true
239 |   def terminate(reason, %{module: module, module_state: module_state}) do
240 |     if function_exported?(module, :terminate, 2) do
241 |       module.terminate(reason, module_state)
242 |     else
243 |       :ok
244 |     end
245 |   end
246 | 
247 |   defp handle_no_reply(reply, %{transformer: transformer} = state) do
248 |     case reply do
249 |       {:noreply, events, new_module_state} when is_list(events) ->
250 |         messages = transform_events(events, transformer)
251 |         {state, messages} = maybe_rate_limit_and_buffer_messages(state, messages)
252 |         {:noreply, messages, %{state | module_state: new_module_state}}
253 | 
254 |       {:noreply, events, new_module_state, :hibernate} ->
255 |         messages = transform_events(events, transformer)
256 |         {state, messages} = maybe_rate_limit_and_buffer_messages(state, messages)
257 |         {:noreply, messages, %{state | module_state: new_module_state}, :hibernate}
258 | 
259 |       {:stop, reason, new_module_state} ->
260 |         {:stop, reason, %{state | module_state: new_module_state}}
261 |     end
262 |   end
263 | 
264 |   defp transform_events(events, nil) do
265 |     case events do
266 |       [] -> :ok
267 |       [message | _] -> validate_message(message)
268 |     end
269 | 
270 |     events
271 |   end
272 | 
273 |   defp transform_events(events, {m, f, opts}) do
274 |     for event <- events do
275 |       message = apply(m, f, [event, opts])
276 |       validate_message(message)
277 |     end
278 |   end
279 | 
280 |   defp validate_message(%Message{} = message) do
281 |     message
282 |   end
283 | 
284 |   defp validate_message(_message) do
285 |     raise "the produced message is invalid. All messages must be a %Broadway.Message{} " <>
286 |             "struct. In case you're using a standard GenStage producer, please set the " <>
287 |             ":transformer option to transform produced events into message structs"
288 |   end
289 | 
290 |   defp maybe_rate_limit_and_buffer_messages(state, messages) do
291 |     if state.rate_limiting && messages != [] do
292 |       state = update_in(state.rate_limiting.message_buffer, &enqueue_batch(&1, messages))
293 |       rate_limit_and_buffer_messages(state)
294 |     else
295 |       {state, messages}
296 |     end
297 |   end
298 | 
299 |   defp rate_limit_and_buffer_messages(%{rate_limiting: %{state: :closed}} = state) do
300 |     {state, []}
301 |   end
302 | 
303 |   defp rate_limit_and_buffer_messages(%{rate_limiting: rate_limiting} = state) do
304 |     %{message_buffer: buffer, rate_limiter: rate_limiter, draining?: draining?} = rate_limiting
305 | 
306 |     {rate_limiting, messages_to_emit} =
307 |       case RateLimiter.get_currently_allowed(rate_limiter) do
308 |         # No point in trying to emit messages if no messages are allowed. In that case,
309 |         # we close the rate limiting and don't emit anything.
310 |         allowed when allowed <= 0 ->
311 |           {%{rate_limiting | state: :closed}, []}
312 | 
313 |         allowed ->
314 |           {allowed_left, probably_emittable, buffer} = dequeue_many(buffer, allowed, [])
315 | 
316 |           {rate_limiting_state, messages_to_emit, messages_to_buffer} =
317 |             rate_limit_messages(
318 |               rate_limiter,
319 |               probably_emittable,
320 |               _probably_emittable_count = allowed - allowed_left
321 |             )
322 | 
323 |           new_buffer = enqueue_batch_r(buffer, messages_to_buffer)
324 | 
325 |           rate_limiting = %{
326 |             rate_limiting
327 |             | message_buffer: new_buffer,
328 |               state: rate_limiting_state
329 |           }
330 | 
331 |           if draining? and :queue.is_empty(new_buffer) do
332 |             cancel_consumers(state)
333 |           end
334 | 
335 |           {rate_limiting, messages_to_emit}
336 |       end
337 | 
338 |     {%{state | rate_limiting: rate_limiting}, messages_to_emit}
339 |   end
340 | 
341 |   defp reverse_split_demand(rest, 0, acc) do
342 |     {0, acc, rest}
343 |   end
344 | 
345 |   defp reverse_split_demand([], demand, acc) do
346 |     {demand, acc, []}
347 |   end
348 | 
349 |   defp reverse_split_demand([head | tail], demand, acc) do
350 |     reverse_split_demand(tail, demand - 1, [head | acc])
351 |   end
352 | 
353 |   defp dequeue_many(queue, demand, acc) do
354 |     case :queue.out(queue) do
355 |       {{:value, list}, queue} ->
356 |         case reverse_split_demand(list, demand, acc) do
357 |           {0, acc, []} ->
358 |             {0, Enum.reverse(acc), queue}
359 | 
360 |           {0, acc, rest} ->
361 |             {0, Enum.reverse(acc), :queue.in_r(rest, queue)}
362 | 
363 |           {demand, acc, []} ->
364 |             dequeue_many(queue, demand, acc)
365 |         end
366 | 
367 |       {:empty, queue} ->
368 |         {demand, Enum.reverse(acc), queue}
369 |     end
370 |   end
371 | 
372 |   defp enqueue_batch(queue, _list = []), do: queue
373 |   defp enqueue_batch(queue, list), do: :queue.in(list, queue)
374 | 
375 |   defp enqueue_batch_r(queue, _list = []), do: queue
376 |   defp enqueue_batch_r(queue, list), do: :queue.in_r(list, queue)
377 | 
378 |   defp rate_limit_messages(_state, [], _count) do
379 |     {:open, [], []}
380 |   end
381 | 
382 |   defp rate_limit_messages(rate_limiter, messages, message_count) do
383 |     case RateLimiter.rate_limit(rate_limiter, message_count) do
384 |       # If no more messages are allowed, we're rate limited but we're able
385 |       # to emit all messages that we have.
386 |       0 ->
387 |         {:closed, messages, _to_buffer = []}
388 | 
389 |       # We were able to emit all messages and still more messages are allowed,
390 |       # so the rate limiting is "open".
391 |       left when left > 0 ->
392 |         {:open, messages, _to_buffer = []}
393 | 
394 |       # We went over the rate limit, so we split (on negative index) the messages
395 |       # we were able to emit and close the rate limiting.
396 |       overflow when overflow < 0 ->
397 |         {emittable, to_buffer} = Enum.split(messages, overflow)
398 |         {:closed, emittable, to_buffer}
399 |     end
400 |   end
401 | 
402 |   defp schedule_next_handle_demand_if_any(state) do
403 |     if not :queue.is_empty(state.rate_limiting.demand_buffer) do
404 |       send(self(), {__MODULE__, :handle_next_demand})
405 |     end
406 |   end
407 | 
408 |   defp cancel_consumers(state) do
409 |     for from <- state.consumers do
410 |       send(self(), {:"$gen_producer", from, {:cancel, :shutdown}})
411 |     end
412 |   end
413 | end
414 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/rate_limiter.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.RateLimiter do
  2 |   @moduledoc false
  3 | 
  4 |   use GenServer
  5 | 
  6 |   @atomics_index 1
  7 | 
  8 |   def start_link(opts) do
  9 |     case Keyword.fetch!(opts, :rate_limiting) do
 10 |       # If we don't have rate limiting options, we don't even need to start this rate
 11 |       # limiter process.
 12 |       nil ->
 13 |         :ignore
 14 | 
 15 |       rate_limiting_opts ->
 16 |         name = Keyword.fetch!(opts, :name)
 17 |         producers_names = Keyword.fetch!(opts, :producers_names)
 18 |         args = {rate_limiting_opts, producers_names}
 19 |         GenServer.start_link(__MODULE__, args, name: name)
 20 |     end
 21 |   end
 22 | 
 23 |   def rate_limit(counter, amount)
 24 |       when is_reference(counter) and is_integer(amount) and amount > 0 do
 25 |     :atomics.sub_get(counter, @atomics_index, amount)
 26 |   end
 27 | 
 28 |   def get_currently_allowed(counter) when is_reference(counter) do
 29 |     :atomics.get(counter, @atomics_index)
 30 |   end
 31 | 
 32 |   def update_rate_limiting(rate_limiter, opts) do
 33 |     GenServer.call(rate_limiter, {:update_rate_limiting, opts})
 34 |   end
 35 | 
 36 |   def get_rate_limiting(rate_limiter) do
 37 |     GenServer.call(rate_limiter, :get_rate_limiting)
 38 |   end
 39 | 
 40 |   def get_rate_limiter_ref(rate_limiter) do
 41 |     GenServer.call(rate_limiter, :get_rate_limiter_ref)
 42 |   end
 43 | 
 44 |   @impl true
 45 |   def init({rate_limiting_opts, producers_names}) do
 46 |     interval = Keyword.fetch!(rate_limiting_opts, :interval)
 47 |     allowed = Keyword.fetch!(rate_limiting_opts, :allowed_messages)
 48 | 
 49 |     counter = :atomics.new(@atomics_index, [])
 50 |     :atomics.put(counter, @atomics_index, allowed)
 51 | 
 52 |     timer = schedule_next_reset(interval)
 53 | 
 54 |     state = %{
 55 |       interval: interval,
 56 |       allowed: allowed,
 57 |       producers_names: producers_names,
 58 |       counter: counter,
 59 |       reset_timer: timer
 60 |     }
 61 | 
 62 |     {:ok, state}
 63 |   end
 64 | 
 65 |   @impl true
 66 |   def handle_call({:update_rate_limiting, opts}, _from, state) do
 67 |     %{interval: interval, allowed: allowed, reset_timer: prev_timer} = state
 68 |     new_interval = Keyword.get(opts, :interval, interval)
 69 |     new_allowed = Keyword.get(opts, :allowed_messages, allowed)
 70 | 
 71 |     state = %{state | interval: new_interval, allowed: new_allowed}
 72 | 
 73 |     if Keyword.get(opts, :reset, false) do
 74 |       cancel_reset_limit_timer(prev_timer)
 75 |       timer = schedule_next_reset(0)
 76 |       {:reply, :ok, %{state | reset_timer: timer}}
 77 |     else
 78 |       {:reply, :ok, state}
 79 |     end
 80 |   end
 81 | 
 82 |   def handle_call(:get_rate_limiting, _from, state) do
 83 |     %{interval: interval, allowed: allowed} = state
 84 |     {:reply, %{interval: interval, allowed_messages: allowed}, state}
 85 |   end
 86 | 
 87 |   def handle_call(:get_rate_limiter_ref, _from, %{counter: counter} = state) do
 88 |     {:reply, counter, state}
 89 |   end
 90 | 
 91 |   @impl true
 92 |   def handle_info(:reset_limit, state) do
 93 |     %{producers_names: producers_names, interval: interval, allowed: allowed, counter: counter} =
 94 |       state
 95 | 
 96 |     :atomics.put(counter, @atomics_index, allowed)
 97 | 
 98 |     for name <- producers_names,
 99 |         pid = GenServer.whereis(name),
100 |         is_pid(pid),
101 |         do: send(pid, {__MODULE__, :reset_rate_limiting})
102 | 
103 |     timer = schedule_next_reset(interval)
104 | 
105 |     {:noreply, %{state | reset_timer: timer}}
106 |   end
107 | 
108 |   defp schedule_next_reset(interval) do
109 |     Process.send_after(self(), :reset_limit, interval)
110 |   end
111 | 
112 |   defp cancel_reset_limit_timer(timer) do
113 |     case Process.cancel_timer(timer) do
114 |       false ->
115 |         receive do
116 |           :reset_limit -> :ok
117 |         after
118 |           0 -> raise "unknown timer #{inspect(timer)}"
119 |         end
120 | 
121 |       _ ->
122 |         :ok
123 |     end
124 |   end
125 | end
126 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/subscriber.ex:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.Subscriber do
  2 |   # This modules defines conveniences for subscribing to producers
  3 |   # and how to resubscribe to them in case of crashes.
  4 |   #
  5 |   # In practice, only the first layer resubscribers in case of crashes
  6 |   # as the remaining ones are shutdown via the supervision tree which
  7 |   # is set as one_for_all and max_restarts of 0 to the inner most
  8 |   # supervisor while the outer most is rest for one. This guarantees
  9 |   # that either all processes are running or none of them.
 10 |   #
 11 |   # For graceful shutdowns, we rely on cancellations with the help
 12 |   # of the terminator.
 13 |   @moduledoc false
 14 |   @behaviour GenStage
 15 | 
 16 |   def start_link(module, names, options, subscriptions_options, stage_options) do
 17 |     GenStage.start_link(
 18 |       __MODULE__,
 19 |       {module, names, options, subscriptions_options},
 20 |       stage_options
 21 |     )
 22 |   end
 23 | 
 24 |   @impl true
 25 |   def init({module, names, options, subscription_options}) do
 26 |     {type, state, init_options} = module.init(options)
 27 | 
 28 |     terminator = Keyword.fetch!(options, :terminator)
 29 |     resubscribe = Keyword.fetch!(options, :resubscribe)
 30 |     partition = Keyword.fetch!(options, :partition)
 31 | 
 32 |     subscription_options =
 33 |       subscription_options
 34 |       |> Keyword.put(:partition, partition)
 35 |       |> Keyword.put_new(:cancel, :temporary)
 36 | 
 37 |     state =
 38 |       Map.merge(state, %{
 39 |         callback: module,
 40 |         terminator: if(type == :consumer, do: terminator),
 41 |         resubscribe: resubscribe,
 42 |         producers: %{},
 43 |         consumers: [],
 44 |         subscription_options: subscription_options
 45 |       })
 46 | 
 47 |     Enum.each(names, &subscribe(&1, state))
 48 | 
 49 |     extra_options = if type == :consumer, do: [], else: [buffer_size: :infinity]
 50 |     {type, state, extra_options ++ init_options}
 51 |   end
 52 | 
 53 |   @impl true
 54 |   def handle_events(events, from, %{callback: callback} = state) do
 55 |     callback.handle_events(events, from, state)
 56 |   end
 57 | 
 58 |   @impl true
 59 |   def handle_subscribe(:producer, opts, {_, ref}, state) do
 60 |     process_name = Keyword.fetch!(opts, :name)
 61 |     {:automatic, put_in(state.producers[ref], process_name)}
 62 |   end
 63 | 
 64 |   def handle_subscribe(:consumer, _, from, state) do
 65 |     {:automatic, update_in(state.consumers, &[from | &1])}
 66 |   end
 67 | 
 68 |   @impl true
 69 |   def handle_cancel(_, {_, ref} = from, state) do
 70 |     case pop_in(state.producers[ref]) do
 71 |       {nil, _} ->
 72 |         {:noreply, [], update_in(state.consumers, &List.delete(&1, from))}
 73 | 
 74 |       {process_name, state} ->
 75 |         maybe_resubscribe(process_name, state)
 76 |         maybe_cancel(state)
 77 |         {:noreply, [], state}
 78 |     end
 79 |   end
 80 | 
 81 |   @impl true
 82 |   def handle_info(:will_terminate, state) do
 83 |     state = %{state | resubscribe: :never}
 84 |     maybe_cancel(state)
 85 |     {:noreply, [], state}
 86 |   end
 87 | 
 88 |   def handle_info(:cancel_consumers, %{terminator: terminator} = state) when terminator != nil do
 89 |     if pid = GenServer.whereis(terminator) do
 90 |       send(pid, {:done, self()})
 91 |     end
 92 | 
 93 |     {:noreply, [], state}
 94 |   end
 95 | 
 96 |   def handle_info(:cancel_consumers, %{callback: callback} = state) do
 97 |     case callback.handle_info(:cancel_consumers, state) do
 98 |       # If there are no events to emit we are done
 99 |       {:noreply, [], state} ->
100 |         for from <- state.consumers do
101 |           send(self(), {:"$gen_producer", from, {:cancel, :shutdown}})
102 |         end
103 | 
104 |         {:noreply, [], state}
105 | 
106 |       # Otherwise we will try again later
107 |       other ->
108 |         GenStage.async_info(self(), :cancel_consumers)
109 |         other
110 |     end
111 |   end
112 | 
113 |   def handle_info({:resubscribe, process_name}, state) do
114 |     subscribe(process_name, state)
115 |     {:noreply, [], state}
116 |   end
117 | 
118 |   def handle_info(message, %{callback: callback} = state) do
119 |     callback.handle_info(message, state)
120 |   end
121 | 
122 |   ## Helpers
123 | 
124 |   defp subscribe(process_name, state) do
125 |     if pid = GenServer.whereis(process_name) do
126 |       opts = [to: pid, name: process_name] ++ state.subscription_options
127 |       GenStage.async_subscribe(self(), opts)
128 |       true
129 |     else
130 |       maybe_resubscribe(process_name, state)
131 |       false
132 |     end
133 |   end
134 | 
135 |   defp maybe_resubscribe(process_name, %{resubscribe: integer}) when is_integer(integer) do
136 |     Process.send_after(self(), {:resubscribe, process_name}, integer)
137 |     true
138 |   end
139 | 
140 |   defp maybe_resubscribe(_, _), do: false
141 | 
142 |   defp maybe_cancel(%{resubscribe: :never, producers: producers}) when producers == %{} do
143 |     GenStage.async_info(self(), :cancel_consumers)
144 |     true
145 |   end
146 | 
147 |   defp maybe_cancel(_), do: false
148 | end
149 | 


--------------------------------------------------------------------------------
/lib/broadway/topology/terminator.ex:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.Topology.Terminator do
 2 |   @moduledoc false
 3 |   use GenServer
 4 | 
 5 |   @spec start_link(term, GenServer.options()) :: GenServer.on_start()
 6 |   def start_link(args, opts) do
 7 |     GenServer.start_link(__MODULE__, args, opts)
 8 |   end
 9 | 
10 |   @spec trap_exit(GenServer.server()) :: :ok
11 |   def trap_exit(terminator) do
12 |     GenServer.cast(terminator, :trap_exit)
13 |   end
14 | 
15 |   @impl true
16 |   def init(args) do
17 |     state = %{
18 |       producers: args[:producers],
19 |       first: args[:first],
20 |       last: args[:last]
21 |     }
22 | 
23 |     {:ok, state}
24 |   end
25 | 
26 |   @impl true
27 |   def handle_cast(:trap_exit, state) do
28 |     Process.flag(:trap_exit, true)
29 |     {:noreply, state}
30 |   end
31 | 
32 |   @impl true
33 |   def handle_info(_, state) do
34 |     {:noreply, state}
35 |   end
36 | 
37 |   @impl true
38 |   def terminate(_, state) do
39 |     for name <- state.first, pid = GenServer.whereis(name) do
40 |       send(pid, :will_terminate)
41 |     end
42 | 
43 |     for name <- state.producers, pid = GenServer.whereis(name) do
44 |       Broadway.Topology.ProducerStage.drain(pid)
45 |     end
46 | 
47 |     for name <- state.last, pid = GenServer.whereis(name) do
48 |       ref = Process.monitor(pid)
49 | 
50 |       receive do
51 |         {:done, ^pid} -> :ok
52 |         {:DOWN, ^ref, _, _, _} -> :ok
53 |       end
54 |     end
55 | 
56 |     :ok
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   @version "1.2.1"
 5 |   @description "Build concurrent and multi-stage data ingestion and data processing pipelines"
 6 | 
 7 |   def project do
 8 |     [
 9 |       app: :broadway,
10 |       version: @version,
11 |       elixir: "~> 1.7",
12 |       name: "Broadway",
13 |       description: @description,
14 |       deps: deps(),
15 |       docs: docs(),
16 |       package: package(),
17 |       test_coverage: [tool: ExCoveralls],
18 |       preferred_cli_env: [docs: :docs]
19 |     ]
20 |   end
21 | 
22 |   def application do
23 |     [
24 |       extra_applications: [:logger],
25 |       env: [config_storage: :persistent_term],
26 |       mod: {Broadway.Application, []}
27 |     ]
28 |   end
29 | 
30 |   defp deps do
31 |     [
32 |       {:gen_stage, "~> 1.0"},
33 |       {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0"},
34 |       {:telemetry, "~> 0.4.3 or ~> 1.0"},
35 | 
36 |       # Dev/test dependencies.
37 |       {:castore, "~> 1.0", only: :test},
38 |       {:ex_doc, ">= 0.19.0", only: :docs},
39 |       {:excoveralls, "~> 0.18.0", only: :test}
40 |     ]
41 |   end
42 | 
43 |   defp docs do
44 |     [
45 |       main: "introduction",
46 |       source_ref: "v#{@version}",
47 |       source_url: "https://github.com/dashbitco/broadway",
48 |       extra_section: "Guides",
49 |       extras: [
50 |         "guides/examples/introduction.md",
51 |         "guides/examples/amazon-sqs.md",
52 |         "guides/examples/apache-kafka.md",
53 |         "guides/examples/google-cloud-pubsub.md",
54 |         "guides/examples/rabbitmq.md",
55 |         "guides/examples/custom-producers.md",
56 |         "guides/internals/architecture.md"
57 |       ],
58 |       groups_for_extras: [
59 |         Examples: Path.wildcard("guides/examples/*.md"),
60 |         Internals: Path.wildcard("guides/internals/*.md")
61 |       ],
62 |       groups_for_modules: [
63 |         # Ungrouped Modules:
64 |         #
65 |         # Broadway
66 |         # Broadway.Message
67 |         # Broadway.BatchInfo
68 | 
69 |         Acknowledgement: [
70 |           Broadway.Acknowledger,
71 |           Broadway.CallerAcknowledger,
72 |           Broadway.NoopAcknowledger
73 |         ],
74 |         Producers: [
75 |           Broadway.Producer,
76 |           Broadway.DummyProducer
77 |         ]
78 |       ]
79 |     ]
80 |   end
81 | 
82 |   defp package do
83 |     %{
84 |       licenses: ["Apache-2.0"],
85 |       maintainers: ["Marlus Saraiva", "José Valim"],
86 |       links: %{"GitHub" => "https://github.com/dashbitco/broadway"}
87 |     }
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
 3 |   "earmark_parser": {:hex, :earmark_parser, "1.4.43", "34b2f401fe473080e39ff2b90feb8ddfeef7639f8ee0bbf71bb41911831d77c5", [:mix], [], "hexpm", "970a3cd19503f5e8e527a190662be2cee5d98eed1ff72ed9b3d1a3d466692de8"},
 4 |   "ex_doc": {:hex, :ex_doc, "0.37.1", "65ca30d242082b95aa852b3b73c9d9914279fff56db5dc7b3859be5504417980", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "6774f75477733ea88ce861476db031f9399c110640752ca2b400dbbb50491224"},
 5 |   "excoveralls": {:hex, :excoveralls, "0.18.1", "a6f547570c6b24ec13f122a5634833a063aec49218f6fff27de9df693a15588c", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "d65f79db146bb20399f23046015974de0079668b9abb2f5aac074d078da60b8d"},
 6 |   "gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"},
 7 |   "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
 8 |   "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
 9 |   "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
10 |   "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"},
11 |   "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
12 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
13 |   "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
14 | }
15 | 


--------------------------------------------------------------------------------
/test/broadway/acknowledger_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.AcknowledgerTest do
 2 |   use ExUnit.Case
 3 | 
 4 |   describe "crash_reason/3" do
 5 |     test "exceptions" do
 6 |       {kind, reason, stack} = kind_reason_stack(fn -> raise "oops" end)
 7 | 
 8 |       assert {%RuntimeError{message: "oops"}, [_ | _]} =
 9 |                Broadway.Acknowledger.crash_reason(kind, reason, stack)
10 |     end
11 | 
12 |     test "exits" do
13 |       {kind, reason, stack} = kind_reason_stack(fn -> exit(:fatal_error) end)
14 | 
15 |       assert {:fatal_error, [_ | _]} = Broadway.Acknowledger.crash_reason(kind, reason, stack)
16 |     end
17 | 
18 |     test "throws" do
19 |       {kind, reason, stack} = kind_reason_stack(fn -> throw(:basketball) end)
20 | 
21 |       assert {{:nocatch, :basketball}, [_ | _]} =
22 |                Broadway.Acknowledger.crash_reason(kind, reason, stack)
23 |     end
24 | 
25 |     test "Erlang errors" do
26 |       {kind, reason, stack} = kind_reason_stack(fn -> :erlang.error(:boom) end)
27 | 
28 |       assert {%ErlangError{original: :boom}, [_ | _]} =
29 |                Broadway.Acknowledger.crash_reason(kind, reason, stack)
30 |     end
31 |   end
32 | 
33 |   defp kind_reason_stack(fun) do
34 |     fun.()
35 |   catch
36 |     kind, reason ->
37 |       {kind, reason, __STACKTRACE__}
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/test/broadway/config_storage_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.ConfigStorageTest do
 2 |   use ExUnit.Case, async: false
 3 | 
 4 |   alias Broadway.ConfigStorage.ETS
 5 | 
 6 |   setup do
 7 |     prev = Application.fetch_env!(:broadway, :config_storage)
 8 | 
 9 |     on_exit(fn ->
10 |       Application.put_env(:broadway, :config_storage, prev)
11 |     end)
12 |   end
13 | 
14 |   test "ets default options" do
15 |     Application.put_env(:broadway, :config_storage, :ets)
16 |     ETS.setup()
17 |     assert [] = ETS.list()
18 | 
19 |     assert ETS.put("some name", %Broadway.Topology{})
20 |     assert ["some name"] = ETS.list()
21 |     assert %Broadway.Topology{} = ETS.get("some name")
22 |     assert :ets.info(ETS.table(), :size) == 1
23 | 
24 |     ETS.delete("some name")
25 |     assert :ets.info(ETS.table(), :size) == 0
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/test/broadway/dummy_producer_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.DummyProducerTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   defmodule Handler do
 5 |     use Broadway
 6 | 
 7 |     def handle_message(_processor, message, _context) do
 8 |       message
 9 |     end
10 |   end
11 | 
12 |   test "send message through", c do
13 |     {:ok, _} =
14 |       Broadway.start_link(Handler,
15 |         name: c.test,
16 |         producer: [
17 |           module: {Broadway.DummyProducer, []}
18 |         ],
19 |         processors: [
20 |           default: [
21 |             concurrency: 1
22 |           ]
23 |         ]
24 |       )
25 | 
26 |     ref = Broadway.test_batch(c.test, [1, 2])
27 |     assert_receive {:ack, ^ref, [%{status: :ok}, %{status: :ok}], []}
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/broadway/topology/batcher_stage_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.Topology.BatcherStageTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   test "max_demand defaults to batch_size" do
 5 |     {:ok, pid} =
 6 |       Broadway.Topology.BatcherStage.start_link(
 7 |         [
 8 |           module: __MODULE__,
 9 |           context: %{},
10 |           type: :producer_consumer,
11 |           terminator: __MODULE__,
12 |           resubscribe: :never,
13 |           batcher: :default,
14 |           processors: [:some_processor],
15 |           batch_size: 123,
16 |           batch_timeout: 1000,
17 |           partition: 0
18 |         ],
19 |         []
20 |       )
21 | 
22 |     %{state: state} = :sys.get_state(pid)
23 |     assert state.subscription_options[:max_demand] == 123
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/test/broadway/topology/processor_stage_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Broadway.Topology.ProcessorStageTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   test "set custom min and max demand" do
 5 |     {:ok, pid} =
 6 |       Broadway.Topology.ProcessorStage.start_link(
 7 |         [
 8 |           module: __MODULE__,
 9 |           context: %{},
10 |           type: :producer_consumer,
11 |           terminator: __MODULE__,
12 |           resubscribe: :never,
13 |           processor_config: [min_demand: 3, max_demand: 6],
14 |           producers: [:sample],
15 |           partition: 0,
16 |           dispatcher: GenStage.DemandDispatcher
17 |         ],
18 |         []
19 |       )
20 | 
21 |     %{state: state} = :sys.get_state(pid)
22 |     assert state.subscription_options[:min_demand] == 3
23 |     assert state.subscription_options[:max_demand] == 6
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/test/broadway/topology/producer_stage_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Broadway.Topology.ProducerStageTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   alias Broadway.Message
  5 |   alias Broadway.Topology.ProducerStage
  6 | 
  7 |   defmodule FakeProducer do
  8 |     use GenStage
  9 | 
 10 |     def init(_), do: {:producer, nil}
 11 | 
 12 |     def handle_demand(demand, :return_no_reply) do
 13 |       {:noreply, [wrap_message(demand)], :new_module_state}
 14 |     end
 15 | 
 16 |     def handle_demand(demand, :return_no_reply_with_hibernate) do
 17 |       {:noreply, [wrap_message(demand)], :new_module_state, :hibernate}
 18 |     end
 19 | 
 20 |     def handle_demand(demand, :return_stop) do
 21 |       {:stop, "error_on_demand_#{demand}", :new_module_state}
 22 |     end
 23 | 
 24 |     def handle_demand(demand, :do_not_wrap_messages) do
 25 |       {:noreply, [demand], :new_module_state}
 26 |     end
 27 | 
 28 |     def handle_info(message, :return_no_reply) do
 29 |       {:noreply, [wrap_message(message)], :new_module_state}
 30 |     end
 31 | 
 32 |     def handle_info(message, :return_no_reply_with_hibernate) do
 33 |       {:noreply, [wrap_message(message)], :new_module_state, :hibernate}
 34 |     end
 35 | 
 36 |     def handle_info(message, :return_stop) do
 37 |       {:stop, "error_on_#{message}", :new_module_state}
 38 |     end
 39 | 
 40 |     def handle_info(message, :do_not_wrap_messages) do
 41 |       {:noreply, [message], :new_module_state}
 42 |     end
 43 | 
 44 |     def terminate(reason, state) do
 45 |       {reason, state}
 46 |     end
 47 | 
 48 |     def transformer(event, concat: text) do
 49 |       %Message{data: "#{event}#{text}", acknowledger: {__MODULE__, event}}
 50 |     end
 51 | 
 52 |     defp wrap_message(data) do
 53 |       %Message{data: data, acknowledger: {__MODULE__, data}}
 54 |     end
 55 |   end
 56 | 
 57 |   defmodule ProducerWithOutTerminate do
 58 |     use GenStage
 59 | 
 60 |     def init(_), do: {:producer, nil}
 61 |   end
 62 | 
 63 |   defmodule ProducerWithBadReturn do
 64 |     use GenStage
 65 | 
 66 |     def init(_), do: {:consumer, nil}
 67 |   end
 68 | 
 69 |   setup do
 70 |     %{
 71 |       state: %{
 72 |         module: FakeProducer,
 73 |         transformer: nil,
 74 |         module_state: nil,
 75 |         rate_limiting: nil
 76 |       }
 77 |     }
 78 |   end
 79 | 
 80 |   test "init with bad return" do
 81 |     args = %{module: {ProducerWithBadReturn, []}, broadway: []}
 82 | 
 83 |     assert ProducerStage.init({args, _index = 0}) ==
 84 |              {:stop, {:bad_return_value, {:consumer, nil}}}
 85 |   end
 86 | 
 87 |   describe "wrap handle_demand" do
 88 |     test "returning {:noreply, [event], new_state}", %{state: state} do
 89 |       state = %{state | module_state: :return_no_reply}
 90 |       new_state = %{state | module_state: :new_module_state}
 91 | 
 92 |       assert {:noreply, [%Message{data: 10}], ^new_state} = ProducerStage.handle_demand(10, state)
 93 |     end
 94 | 
 95 |     test "returning {:noreply, [event], new_state, :hibernate}", %{state: state} do
 96 |       state = %{state | module_state: :return_no_reply_with_hibernate}
 97 |       new_state = %{state | module_state: :new_module_state}
 98 | 
 99 |       assert {:noreply, [%Message{data: 10}], ^new_state, :hibernate} =
100 |                ProducerStage.handle_demand(10, state)
101 |     end
102 | 
103 |     test "returning {:stop, reason, new_state}", %{state: state} do
104 |       state = %{state | module_state: :return_stop}
105 |       new_state = %{state | module_state: :new_module_state}
106 | 
107 |       assert ProducerStage.handle_demand(10, state) == {:stop, "error_on_demand_10", new_state}
108 |     end
109 | 
110 |     test "raise an error if a message is not a %Message{}", %{state: state} do
111 |       state = %{state | module_state: :do_not_wrap_messages}
112 | 
113 |       assert_raise RuntimeError,
114 |                    ~r/the produced message is invalid/,
115 |                    fn -> ProducerStage.handle_demand(10, state) end
116 |     end
117 | 
118 |     test "transform events into %Message{} structs using a transformer", %{state: state} do
119 |       transformer = {FakeProducer, :transformer, [concat: " ok"]}
120 |       state = %{state | module_state: :do_not_wrap_messages, transformer: transformer}
121 |       new_state = %{state | module_state: :new_module_state}
122 | 
123 |       assert {:noreply, [%Message{data: "10 ok"}], ^new_state} =
124 |                ProducerStage.handle_demand(10, state)
125 |     end
126 |   end
127 | 
128 |   describe "wrap handle_info" do
129 |     test "returning {:noreply, [event], new_state}", %{state: state} do
130 |       state = %{state | module_state: :return_no_reply}
131 |       new_state = %{state | module_state: :new_module_state}
132 | 
133 |       assert {:noreply, [%Message{data: :a_message}], ^new_state} =
134 |                ProducerStage.handle_info(:a_message, state)
135 |     end
136 | 
137 |     test "returning {:noreply, [event], new_state, :hibernate}", %{state: state} do
138 |       state = %{state | module_state: :return_no_reply_with_hibernate}
139 |       new_state = %{state | module_state: :new_module_state}
140 | 
141 |       assert {:noreply, [%Message{data: :a_message}], ^new_state, :hibernate} =
142 |                ProducerStage.handle_info(:a_message, state)
143 |     end
144 | 
145 |     test "returning {:stop, reason, new_state}", %{state: state} do
146 |       state = %{state | module_state: :return_stop}
147 |       new_state = %{state | module_state: :new_module_state}
148 | 
149 |       assert ProducerStage.handle_info(:a_message, state) ==
150 |                {:stop, "error_on_a_message", new_state}
151 |     end
152 | 
153 |     test "raise an error if a message is not a %Message{}", %{state: state} do
154 |       state = %{state | module_state: :do_not_wrap_messages}
155 | 
156 |       assert_raise RuntimeError,
157 |                    ~r/the produced message is invalid/,
158 |                    fn -> ProducerStage.handle_info(:not_a_message, state) end
159 |     end
160 | 
161 |     test "transform events into %Message{} structs using a transformer", %{state: state} do
162 |       transformer = {FakeProducer, :transformer, [concat: " ok"]}
163 |       state = %{state | module_state: :do_not_wrap_messages, transformer: transformer}
164 |       new_state = %{state | module_state: :new_module_state}
165 | 
166 |       assert {:noreply, [%Message{data: "10 ok"}], ^new_state} =
167 |                ProducerStage.handle_info(10, state)
168 |     end
169 |   end
170 | 
171 |   describe "wrap terminate" do
172 |     test "forward call to wrapped module" do
173 |       state = %{module: FakeProducer, module_state: :module_state}
174 | 
175 |       assert ProducerStage.terminate(:normal, state) == {:normal, :module_state}
176 | 
177 |       assert ProducerStage.terminate({:shutdown, :a_term}, state) ==
178 |                {{:shutdown, :a_term}, :module_state}
179 |     end
180 | 
181 |     test "returns :ok when the wrapped module doesn't define a terminate/2 callback" do
182 |       state = %{module: ProducerWithOutTerminate, module_state: :module_state}
183 | 
184 |       assert ProducerStage.terminate(:normal, state) == :ok
185 |     end
186 |   end
187 | end
188 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start(capture_log: true, assert_receive_timeout: 2000)
2 | Logger.remove_backend(:console)
3 | 


--------------------------------------------------------------------------------