├── .formatter.exs ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── README.md ├── lib ├── flow.ex └── flow │ ├── coordinator.ex │ ├── map_reducer.ex │ ├── materialize.ex │ ├── window.ex │ └── window │ ├── count.ex │ ├── fixed.ex │ ├── global.ex │ └── periodic.ex ├── mix.exs ├── mix.lock └── test ├── flow ├── window │ ├── count_test.exs │ ├── fixed_test.exs │ ├── global_test.exs │ └── periodic_test.exs └── window_test.exs ├── flow_test.exs └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | [ 2 | inputs: ["{mix,.formatter}.exs", "{lib,test}/**/*.{ex,exs}"] 3 | ] 4 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-20.04 12 | env: 13 | MIX_ENV: test 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | include: 18 | - pair: 19 | elixir: "1.7" 20 | otp: "22" 21 | - pair: 22 | elixir: "1.18" 23 | otp: "27" 24 | lint: lint 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: erlef/setup-beam@v1 29 | with: 30 | otp-version: ${{matrix.pair.otp}} 31 | elixir-version: ${{matrix.pair.elixir}} 32 | 33 | - uses: actions/cache@v4 34 | with: 35 | path: | 36 | deps 37 | _build 38 | key: ${{ runner.os }}-mix-${{matrix.pair.elixir}}-${{matrix.pair.otp}}-${{ hashFiles('**/mix.lock') }} 39 | restore-keys: | 40 | ${{ runner.os }}-mix-${{matrix.pair.elixir}}-${{matrix.pair.otp}}- 41 | 42 | - run: mix deps.get --only test 43 | 44 | - run: mix format --check-formatted 45 | if: ${{ matrix.lint }} 46 | 47 | - run: mix deps.get && mix deps.unlock --check-unused 48 | if: ${{ matrix.lint }} 49 | 50 | - run: mix deps.compile 51 | 52 | - run: mix compile --warnings-as-errors 53 | if: ${{ matrix.lint }} 54 | 55 | - run: mix test 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /cover 3 | /deps 4 | /doc 5 | erl_crash.dump 6 | *.ez 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v1.2.4 (2023-03-21) 4 | 5 | * Bug fixes 6 | * Make sure flows are garbage collected when consumed as a stream which is halted 7 | 8 | ## v1.2.3 (2023-01-07) 9 | 10 | * Enhancements 11 | * Add `Flow.stream/2` and `Flow.run/2` which accept `link: false` 12 | 13 | ## v1.2.2 (2023-01-06) 14 | 15 | * Bug fix 16 | * Fix `Flow.merge/2` followed by consumer 17 | 18 | ## v1.2.1 (2022-12-29) 19 | 20 | * Enhancements 21 | * Allow stages to be ignored 22 | * Add `Flow.child_spec/1` 23 | 24 | ## v1.2.0 (2022-02-12) 25 | 26 | * Bug fixes 27 | * Fix bugs where shuffling would ignore partitions before/after 28 | * Ensure flow options effectively override producer dispatcher 29 | 30 | * Deprecations 31 | * Deprecate `Flow.map/2` and friends after `reduce` to avoid confusion regarding bookkeeping of state 32 | 33 | ## v1.1.0 (2020-12-09) 34 | 35 | * Enhancements 36 | * Add `Flow.map_batch/2` 37 | 38 | * Bug fixes 39 | * Do not leak flows while enumerating 40 | 41 | ## v1.0.0 (2020-02-03) 42 | 43 | * Enhancements 44 | * Require GenStage v1.0.0 45 | 46 | ## v0.15.0 (2019-10-28) 47 | 48 | * Enhancements 49 | * Add on_init callback to MapReducer 50 | * Deprecate Flow.each to avoid pitfalls 51 | * Remove previously deprecated code 52 | 53 | * Bug fixes 54 | * Set demand to accumulate before producer_consumer subscribe 55 | 56 | ## v0.14.3 (2018-10-25) 57 | 58 | * Bug fixes 59 | * Don't fuse mappers into enumerables (#62) 60 | * Trap exits to ensure event completion on shutdown 61 | * Fix `flat_map` followed by `emit_and_reduce` (#68) 62 | 63 | ## v0.14.2 (2018-07-24) 64 | 65 | * Bug fixes 66 | * Make sure consumers added via `into_specs/3` restart the flow in case of failures 67 | 68 | ## v0.14.1 (2018-07-17) 69 | 70 | * Deprecations 71 | * `Flow.filter_map/3` is deprecated in favor of filter+map 72 | * `Flow.from_stage/2` is deprecated in favor of `Flow.from_stages/2` 73 | * `Flow.merge/2` is deprecated in favor of `Flow.partition/2` or `Flow.shuffle/2` 74 | 75 | * Enhancements 76 | * Add `Flow.shuffle/2` to shuffle the stages into new ones 77 | * Add `Flow.through_stages/3` for hooking `producer_consumer`s into the flow 78 | * Add `Flow.from_specs/2`, `Flow.through_specs/3` and `Flow.into_specs/3` to start stages in the same supervision tree as the flow 79 | 80 | ## v0.14.0 (2018-06-10) 81 | 82 | This release includes a redesign of how triggers and the reducing accumulator works. 83 | 84 | Prior to this version, the `Flow` module was responsible for traversing events in the mapper stage and to accumulate the state in reducing stages. When working with unbound data, the `Flow.Window` was used to control exactly when to emit data from the reducing stages and when to reset the partition state. 85 | 86 | This approach meant that understanding which data is emitted and when the state was reset was hard because the logic was spread in multiple places. To make matters worse, if you wanted to have your own rules for emitting events, such as user session or sliding windows, it was only possible to achieve it via custom window implementations. 87 | 88 | This design limitation caused many users to drop Flow and use GenStage, as GenStage provides the necessary abstractions for tackling those problems. However, since Flow is built on top of GenStage, why not expose it directly through Flow? That's what v0.14.0 does. 89 | 90 | v0.14.0 introduces two new functions: `emit_and_reduce/3` and `on_trigger/2` which gives developers explicit control of when to emit data. The `on_trigger/2` function also allows developers to fully control the state that is kept in the reducing stage after the trigger. 91 | 92 | Unfortunately this change is incompatible (or rather, fully replaces) the following functionalities: 93 | 94 | * `each_state/2` and `map_state/2` - those two functions were only invoked when there was a trigger and they have now been replaced by a more explicitly named `on_trigger/2` function 95 | 96 | * The `:keep` and `:reset` argument to windows and triggers have been removed as you control the behaviour on `on_trigger/2` 97 | 98 | For example, if you used `map_state/2` (or `each_state/2`) and a `:reset` trigger, like this: 99 | 100 | |> Flow.map_state(fn acc -> do_something(acc) end) 101 | 102 | You can now replace this code by: 103 | 104 | |> Flow.on_trigger(fn acc -> {do_something(acc), []} end) 105 | 106 | Where the first element of the tuple returned by `on_trigger` is the data to emit and the second element is the new accumulator of the reducer stage. Similarly, if you were using `map_state/2` (or `each_state/2`) and a `:keep` trigger, like this: 107 | 108 | |> Flow.map_state(fn acc -> do_something(acc) end) 109 | 110 | You can now replace this code by: 111 | 112 | |> Flow.on_trigger(fn acc -> {do_something(acc), acc} end) 113 | 114 | Note that `on_trigger/2` can only be called once per partition. In case you were calling `map_state/2` and `each_state/2` multiple times, you can simply inline all calls inside the same `on_trigger/2`. 115 | 116 | We believe `emit_and_reduce/3` and `on_trigger/2` provide a conceptually simpler module to reason about flows while being more powerful. 117 | 118 | This release also deprecates `Flow.Window.session/3` as developers can trivially roll their own with more customization power and flexibility using `emit_and_reduce/3` and `on_trigger/2`. 119 | 120 | ### Notes 121 | 122 | * Enhancements 123 | * `use Flow` now defines a `child_spec/1` to be used under supervision 124 | * Added `emit_and_reduce/3` and `on_trigger/2` 125 | * Use `DemandDispatcher` when there is one stage in partition 126 | 127 | * Deprecations 128 | * Session windows are deprecated in favor of `Flow.emit_and_reduce/3` and `Flow.on_trigger/2` 129 | 130 | * Backwards incompatible changes 131 | * `Flow.map_state/2` was removed in favor of `Flow.on_trigger/2` 132 | * `Flow.each_state/2` was removed in favor of `Flow.on_trigger/2` 133 | * Passing `:keep` or `:reset` to triggers was removed in favor of explicit control via `Flow.on_trigger/2`. If you are passing or matching on those atoms, those entries can be removed 134 | 135 | ## v0.13.0 (2018-01-23) 136 | 137 | * Enhancements 138 | * Expose a timeout parameter for start_link and into_stages 139 | * Allow shutdown time for stages to be configured 140 | 141 | * Bug fixes 142 | * Ensure proper shutdown propagation on start_link, into_stages and friends (#40) 143 | * Ensure proper shutdown order in Flow (#35) 144 | 145 | ## v0.12.0 146 | 147 | * Enhancements 148 | * Allow late subscriptions to Flow returned by `Flow.into_stages` 149 | 150 | * Bug fixes 151 | * Cancel timer when termination is triggered on periodic window. This avoid invoking termination callbacks twice. 152 | 153 | ## v0.11.1 154 | 155 | * Enhancements 156 | * Add the ability to emit only certain events in a trigger 157 | 158 | * Bug fixes 159 | * Add `:gen_stage` to the applications list 160 | * Ensure we handle supervisor exits on flow coordinator 161 | * Ensure we do not unnecessary partition when fusing producer+streams 162 | 163 | ## v0.11.0 164 | 165 | Extracted from GenStage. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Flow [![Build Status](https://github.com/dashbitco/flow/workflows/CI/badge.svg)](https://github.com/dashbitco/flow/actions?query=workflow%3A%22CI%22) 2 | 3 | `Flow` allows developers to express computations on collections, similar to the `Enum` and `Stream` modules, although computations will be executed in parallel using multiple [`GenStage`](https://github.com/elixir-lang/gen_stage)s. 4 | 5 | Here is a quick example on how to count words in a document in parallel with Flow: 6 | 7 | ```elixir 8 | File.stream!("path/to/some/file") 9 | |> Flow.from_enumerable() 10 | |> Flow.flat_map(&String.split(&1, " ")) 11 | |> Flow.partition() 12 | |> Flow.reduce(fn -> %{} end, fn word, acc -> 13 | Map.update(acc, word, 1, & &1 + 1) 14 | end) 15 | |> Enum.to_list() 16 | ``` 17 | 18 | See documentation for [Flow](https://hexdocs.pm/flow) or [José Valim's keynote at ElixirConf 2016](https://youtu.be/srtMWzyqdp8?t=244) introducing the main concepts behind [GenStage](https://github.com/elixir-lang/gen_stage) and [Flow](https://hexdocs.pm/flow). 19 | 20 | ## Installation 21 | 22 | Flow requires Elixir v1.7 and Erlang/OTP 22+. Add `:flow` to your list of dependencies in mix.exs: 23 | 24 | ```elixir 25 | def deps do 26 | [{:flow, "~> 1.0"}] 27 | end 28 | ``` 29 | 30 | ### Usage in Livebook 31 | 32 | Flow pipelines starts several processes linked to the current process. This means that, if there is an error in your Flow, it will shut down the Livebook runtime. You can avoid this in your notebooks in two different ways: 33 | 34 | 1. Use `Flow.stream(flow, link: false)` to explicitly convert a Flow to a non-linked stream. You can them invoke `Enum` and `Stream` functions regularly: 35 | 36 | ```elixir 37 | Flow.from_enumerable([1, 2, 3]) 38 | |> Flow.map(& &1 * 2) 39 | |> Flow.stream(link: false) 40 | |> Enum.to_list() 41 | ``` 42 | 43 | 2. By trapping exits once before the Flow computation starts: 44 | 45 | ```elixir 46 | Process.flag(:trap_exit, true) 47 | ``` 48 | 49 | ## License 50 | 51 | Copyright 2017 Plataformatec \ 52 | Copyright 2020 Dashbit 53 | 54 | Licensed under the Apache License, Version 2.0 (the "License"); 55 | you may not use this file except in compliance with the License. 56 | You may obtain a copy of the License at 57 | 58 | http://www.apache.org/licenses/LICENSE-2.0 59 | 60 | Unless required by applicable law or agreed to in writing, software 61 | distributed under the License is distributed on an "AS IS" BASIS, 62 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 63 | See the License for the specific language governing permissions and 64 | limitations under the License. 65 | 66 | -------------------------------------------------------------------------------- /lib/flow/coordinator.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Coordinator do 2 | @moduledoc false 3 | use GenServer 4 | 5 | def start_link(flow, type, consumers, options) do 6 | filtered_options = 7 | Keyword.take(options, [:debug, :name, :timeout, :spawn_opt, :hibernate_after]) 8 | 9 | GenServer.start_link(__MODULE__, {flow, type, consumers, options}, filtered_options) 10 | end 11 | 12 | def start(flow, type, consumers, options) do 13 | filtered_options = 14 | Keyword.take(options, [:debug, :name, :timeout, :spawn_opt, :hibernate_after]) 15 | 16 | GenServer.start(__MODULE__, {flow, type, consumers, options}, filtered_options) 17 | end 18 | 19 | def stream(pid) do 20 | GenServer.call(pid, :stream, :infinity) 21 | end 22 | 23 | ## Callbacks 24 | 25 | def init({flow, type, {inner_or_outer, consumers}, options}) do 26 | Process.flag(:trap_exit, true) 27 | 28 | {:ok, supervisor} = start_supervisor() 29 | start_link = &start_child(supervisor, &1, restart: :temporary) 30 | demand = Keyword.get(options, :demand, :forward) 31 | dispatcher = Keyword.get(options, :dispatcher, GenStage.DemandDispatcher) 32 | 33 | {producers, intermediary} = 34 | Flow.Materialize.materialize(flow, demand, start_link, type, dispatcher) 35 | 36 | producers = for {pid, _} <- producers, pid != :undefined, do: pid 37 | 38 | if producers == [] do 39 | :ignore 40 | else 41 | timeout = Keyword.get(options, :subscribe_timeout, 5_000) 42 | consumers = consumers.(&start_child(supervisor, &1, [])) 43 | 44 | for {pid, _} <- intermediary, {consumer, opts} <- consumers do 45 | GenStage.sync_subscribe(consumer, [to: pid, cancel: :transient] ++ opts, timeout) 46 | end 47 | 48 | if demand == :forward do 49 | for producer <- producers, do: GenStage.demand(producer, demand) 50 | end 51 | 52 | to_ref = if inner_or_outer == :inner, do: consumers, else: intermediary 53 | refs = Enum.map(to_ref, fn {pid, _} -> Process.monitor(pid) end) 54 | 55 | state = %{ 56 | intermediary: intermediary, 57 | refs: refs, 58 | producers: producers, 59 | supervisor: supervisor 60 | } 61 | 62 | {:ok, state} 63 | end 64 | end 65 | 66 | # We have a supervisor for the whole flow. We always wait for an error 67 | # to propagate through the whole flow, and then we terminate. For this 68 | # to work all children are started as temporary, except the consumers 69 | # given via into_specs. Once those crash, they terminate the whole 70 | # flow according to their restart type. 71 | defp start_supervisor do 72 | Supervisor.start_link([], strategy: :one_for_one, max_restarts: 0) 73 | end 74 | 75 | defp start_child(supervisor, spec, opts) do 76 | spec = Supervisor.child_spec(spec, [id: make_ref()] ++ opts) 77 | Supervisor.start_child(supervisor, spec) 78 | end 79 | 80 | def handle_call(:stream, _from, %{producers: producers, intermediary: intermediary} = state) do 81 | {:reply, GenStage.stream(intermediary, producers: producers), state} 82 | end 83 | 84 | def handle_cast({:"$demand", demand}, %{producers: producers} = state) do 85 | for producer <- producers, do: GenStage.demand(producer, demand) 86 | {:noreply, state} 87 | end 88 | 89 | def handle_info({:"$gen_producer", {consumer, ref}, {:subscribe, _, opts}}, state) do 90 | for {pid, _} <- state.intermediary do 91 | GenStage.async_subscribe(consumer, [to: pid] ++ opts) 92 | end 93 | 94 | send(consumer, {:"$gen_consumer", {self(), ref}, {:cancel, :normal}}) 95 | {:noreply, state} 96 | end 97 | 98 | # Since consumers can send demand right after subscription, 99 | # we may still receive ask messages, which we promptly ignore. 100 | def handle_info({:"$gen_producer", _from, {:ask, _}}, state) do 101 | {:noreply, state} 102 | end 103 | 104 | def handle_info({:DOWN, ref, _, _, reason}, %{refs: refs} = state) do 105 | if ref in refs do 106 | refs = List.delete(refs, ref) 107 | state = %{state | refs: refs} 108 | 109 | non_normal_shutdown? = 110 | case reason do 111 | :normal -> false 112 | :shutdown -> false 113 | {:shutdown, _} -> false 114 | _ -> true 115 | end 116 | 117 | cond do 118 | non_normal_shutdown? -> {:stop, :shutdown, state} 119 | refs == [] -> {:stop, :normal, state} 120 | true -> {:noreply, state} 121 | end 122 | else 123 | {:noreply, state} 124 | end 125 | end 126 | 127 | def handle_info(_, state) do 128 | {:noreply, state} 129 | end 130 | 131 | def terminate(_reason, %{supervisor: supervisor}) do 132 | ref = Process.monitor(supervisor) 133 | Process.exit(supervisor, :shutdown) 134 | 135 | receive do 136 | {:DOWN, ^ref, _, _, _} -> :ok 137 | end 138 | end 139 | end 140 | -------------------------------------------------------------------------------- /lib/flow/map_reducer.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.MapReducer do 2 | @moduledoc false 3 | use GenStage 4 | 5 | def init({type, opts, index, trigger, acc, reducer}) do 6 | Process.flag(:trap_exit, true) 7 | 8 | {on_init, opts} = Keyword.pop(opts, :on_init, & &1) 9 | on_init.(index) 10 | 11 | {type, {%{}, build_status(type, trigger), index, acc.(), reducer}, opts} 12 | end 13 | 14 | def handle_subscribe(:producer, opts, {pid, ref}, {producers, status, index, acc, reducer}) do 15 | opts[:tag] && Process.put(ref, opts[:tag]) 16 | status = producer_status(pid, ref, status) 17 | state = {Map.put(producers, ref, nil), status, index, acc, reducer} 18 | 19 | if status.done? do 20 | GenStage.cancel({pid, ref}, :normal, [:noconnect]) 21 | {:manual, state} 22 | else 23 | {:automatic, state} 24 | end 25 | end 26 | 27 | def handle_subscribe(:consumer, _opts, {pid, ref}, {producers, status, index, acc, reducer}) do 28 | status = consumer_status(pid, ref, status) 29 | {:automatic, {producers, status, index, acc, reducer}} 30 | end 31 | 32 | def handle_cancel(_reason, {_, ref}, {producers, status, index, acc, reducer}) do 33 | case producers do 34 | %{^ref => _} -> 35 | Process.delete(ref) 36 | {events, acc, status} = done_status(status, index, acc, ref) 37 | {:noreply, events, {Map.delete(producers, ref), status, index, acc, reducer}} 38 | 39 | _ -> 40 | consumers = Map.delete(status.consumers, ref) 41 | status = %{status | consumers: consumers} 42 | 43 | if consumers == %{} do 44 | {:stop, :normal, {producers, status, index, acc, reducer}} 45 | else 46 | {:noreply, [], {producers, status, index, acc, reducer}} 47 | end 48 | end 49 | end 50 | 51 | def handle_info({:trigger, name}, {producers, status, index, acc, reducer}) do 52 | %{trigger: trigger} = status 53 | {events, acc} = trigger.(acc, index, name) 54 | {:noreply, events, {producers, status, index, acc, reducer}} 55 | end 56 | 57 | def handle_info(:stop, state) do 58 | {:stop, :normal, state} 59 | end 60 | 61 | def handle_info(_msg, state) do 62 | {:noreply, [], state} 63 | end 64 | 65 | def handle_events(events, {_, ref}, {producers, status, index, acc, reducer}) 66 | when is_function(reducer, 4) do 67 | {events, acc} = reducer.(ref, events, acc, index) 68 | {:noreply, events, {producers, status, index, acc, reducer}} 69 | end 70 | 71 | def handle_events(events, {_, ref}, {producers, status, index, acc, reducer}) do 72 | {producers, events, acc} = reducer.(producers, ref, events, acc, index) 73 | {:noreply, events, {producers, status, index, acc, reducer}} 74 | end 75 | 76 | ## Helpers 77 | 78 | defp build_status(_type, trigger) do 79 | %{producers: %{}, consumers: %{}, done?: false, trigger: trigger} 80 | end 81 | 82 | defp producer_status(pid, ref, %{producers: producers} = status) do 83 | %{status | producers: Map.put(producers, ref, pid)} 84 | end 85 | 86 | defp consumer_status(pid, ref, %{consumers: consumers} = status) do 87 | %{status | consumers: Map.put(consumers, ref, pid)} 88 | end 89 | 90 | defp done_status(%{producers: map, done?: true} = status, _index, acc, _ref) when map == %{} do 91 | {[], acc, status} 92 | end 93 | 94 | defp done_status(%{done?: false} = status, index, acc, ref) do 95 | %{trigger: trigger, producers: producers} = status 96 | 97 | case Map.delete(producers, ref) do 98 | new_producers when new_producers == %{} and producers != %{} -> 99 | {events, acc} = trigger.(acc, index, :done) 100 | GenStage.async_info(self(), :stop) 101 | {events, acc, %{status | producers: %{}, done?: true}} 102 | 103 | producers -> 104 | {[], acc, %{status | producers: producers}} 105 | end 106 | end 107 | end 108 | -------------------------------------------------------------------------------- /lib/flow/materialize.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Materialize do 2 | @moduledoc false 3 | 4 | @compile :inline_list_funcs 5 | @map_reducer_opts [:buffer_keep, :buffer_size, :dispatcher, :on_init] 6 | @supervisor_opts [:shutdown] 7 | 8 | def materialize(%Flow{producers: nil}, _, _, _, _) do 9 | raise ArgumentError, 10 | "cannot execute a flow without producers, " <> 11 | "please call \"from_enumerable\", \"from_stages\" or \"from_specs\" accordingly" 12 | end 13 | 14 | def materialize(%Flow{} = flow, demand, start_link, type, dispatcher) do 15 | %{operations: operations, options: options, producers: producers, window: window} = flow 16 | {ops, batchers} = compile_operations(operations) 17 | 18 | {producers, consumers, ops, window} = 19 | start_producers(producers, ops, start_link, window, options, dispatcher) 20 | 21 | if demand == :accumulate do 22 | for {producer, _} <- producers, do: GenStage.demand(producer, demand) 23 | end 24 | 25 | options = 26 | case type do 27 | # The flow itself may have a dispatcher set as option, so we must erase it 28 | :consumer -> Keyword.delete(options, :dispatcher) 29 | # Otherwise the dispatcher given as argument always overrides the one in options. 30 | # However, in some cases, the dispatcher is taken from the options itself 31 | # (such as the root of the tree) 32 | _ -> Keyword.put(options, :dispatcher, dispatcher) 33 | end 34 | 35 | {producers, start_stages(ops, window, consumers, start_link, type, batchers, options)} 36 | end 37 | 38 | ## Helpers 39 | 40 | @doc """ 41 | Splits the flow operations into layers of stages. 42 | """ 43 | def compile_operations([]) do 44 | {:none, []} 45 | end 46 | 47 | def compile_operations(operations) do 48 | {batchers, operations} = 49 | operations 50 | |> :lists.reverse() 51 | |> Enum.split_while(&match?({:batch, _}, &1)) 52 | 53 | if Enum.all?(operations, &match?({:mapper, _, _}, &1)) do 54 | {mapper_ops(operations), batchers} 55 | else 56 | {reducer_ops(operations), batchers} 57 | end 58 | end 59 | 60 | defp batcher_ops([], reducer), do: reducer 61 | 62 | defp batcher_ops(batchers, reducer) do 63 | funs = Enum.map(batchers, fn {:batch, fun} -> fun end) 64 | 65 | fn ref, events, acc, index -> 66 | reducer.(ref, :lists.foldl(& &1.(&2), events, funs), acc, index) 67 | end 68 | end 69 | 70 | defp start_stages(:none, window, producers, _start_link, _type, _batchers, _options) do 71 | if window != Flow.Window.global() do 72 | raise ArgumentError, "a window was set but no computation is happening on this partition" 73 | end 74 | 75 | for {producer, producer_opts} <- producers do 76 | {producer, [cancel: :transient] ++ producer_opts} 77 | end 78 | end 79 | 80 | defp start_stages( 81 | compiled_ops, 82 | window, 83 | producers, 84 | start_link, 85 | type, 86 | batchers, 87 | opts 88 | ) do 89 | {acc, reducer, trigger} = window_ops(window, compiled_ops, opts) 90 | reducer = batcher_ops(batchers, reducer) 91 | 92 | {stages, opts} = Keyword.pop(opts, :stages) 93 | {supervisor_opts, opts} = Keyword.split(opts, @supervisor_opts) 94 | {init_opts, subscribe_opts} = Keyword.split(opts, @map_reducer_opts) 95 | 96 | for i <- 0..(stages - 1) do 97 | subscriptions = 98 | for {producer, producer_opts} <- producers do 99 | opts = Keyword.merge(subscribe_opts, producer_opts) 100 | {producer, [partition: i, cancel: :transient] ++ opts} 101 | end 102 | 103 | arg = {type, [subscribe_to: subscriptions] ++ init_opts, {i, stages}, trigger, acc, reducer} 104 | {:ok, pid} = start_link.(map_reducer_spec(arg, supervisor_opts)) 105 | {pid, [cancel: :transient]} 106 | end 107 | end 108 | 109 | defp map_reducer_spec(arg, supervisor_opts) do 110 | shutdown = Keyword.get(supervisor_opts, :shutdown, 5000) 111 | 112 | %{ 113 | id: Flow.MapReducer, 114 | start: {GenStage, :start_link, [Flow.MapReducer, arg, []]}, 115 | modules: [Flow.MapReducer], 116 | shutdown: shutdown 117 | } 118 | end 119 | 120 | ## Producers 121 | 122 | defp start_producers( 123 | {:join, kind, left, right, left_key, right_key, join}, 124 | ops, 125 | start_link, 126 | window, 127 | options, 128 | _dispatcher 129 | ) do 130 | partitions = Keyword.fetch!(options, :stages) 131 | {left_producers, left_consumers} = start_join(:left, left, left_key, partitions, start_link) 132 | 133 | {right_producers, right_consumers} = 134 | start_join(:right, right, right_key, partitions, start_link) 135 | 136 | {acc, fun, trigger} = ensure_ops(ops) 137 | 138 | window = 139 | case window do 140 | %{by: by} -> %{window | by: fn x -> by.(elem(x, 1)) end} 141 | %{} -> window 142 | end 143 | 144 | producers = left_producers ++ right_producers 145 | consumers = left_consumers ++ right_consumers 146 | 147 | {producers, consumers, join_ops(kind, join, acc, fun, trigger), window} 148 | end 149 | 150 | defp start_producers( 151 | {:departition, flow, acc_fun, merge_fun, done_fun}, 152 | ops, 153 | start_link, 154 | window, 155 | _options, 156 | _dispatcher 157 | ) do 158 | {producers, consumers} = 159 | materialize(flow, :forward, start_link, :producer_consumer, GenStage.DemandDispatcher) 160 | 161 | {acc, fun, trigger} = ensure_ops(ops) 162 | 163 | stages = Keyword.fetch!(flow.options, :stages) 164 | partitions = Enum.to_list(0..(stages - 1)) 165 | 166 | {producers, consumers, 167 | departition_ops(acc, fun, trigger, partitions, acc_fun, merge_fun, done_fun), window} 168 | end 169 | 170 | defp start_producers({:flows, flows}, ops, start_link, window, options, dispatcher) do 171 | up_dispatcher = 172 | Keyword.get_lazy(options, :dispatcher, fn -> 173 | case Keyword.fetch!(options, :stages) do 174 | 1 -> 175 | GenStage.DemandDispatcher 176 | 177 | stages -> 178 | hash = options[:hash] || hash_by_key(options[:key], stages) 179 | dispatcher_opts = [partitions: 0..(stages - 1), hash: hash(hash)] 180 | {GenStage.PartitionDispatcher, dispatcher_opts} 181 | end 182 | end) 183 | 184 | {producers, consumers} = 185 | Enum.reduce(flows, {[], []}, fn flow, {producers_acc, consumers_acc} -> 186 | {producers, consumers} = 187 | materialize(flow, :forward, start_link, :producer_consumer, up_dispatcher) 188 | 189 | {producers ++ producers_acc, consumers ++ consumers_acc} 190 | end) 191 | 192 | {producers, consumers, ensure_ops(ops, up_dispatcher, dispatcher), window} 193 | end 194 | 195 | defp start_producers({:from_stages, producers}, ops, start_link, window, _options, dispatcher) do 196 | producers = producers.(start_link) 197 | {producers, producers, ensure_ops(ops, GenStage.DemandDispatcher, dispatcher), window} 198 | end 199 | 200 | defp start_producers( 201 | {:through_stages, flow, producers_consumers}, 202 | ops, 203 | start_link, 204 | window, 205 | options, 206 | dispatcher 207 | ) do 208 | up_dispatcher = options[:dispatcher] || GenStage.DemandDispatcher 209 | 210 | {producers, intermediary} = 211 | materialize(flow, :forward, start_link, :producer_consumer, up_dispatcher) 212 | 213 | timeout = Keyword.get(options, :subscribe_timeout, 5_000) 214 | producers_consumers = producers_consumers.(start_link) 215 | 216 | for {pid, _} <- intermediary, {producer_consumer, subscribe_opts} <- producers_consumers do 217 | subscribe_opts = [to: pid, cancel: :transient] ++ subscribe_opts 218 | GenStage.sync_subscribe(producer_consumer, subscribe_opts, timeout) 219 | end 220 | 221 | producers_consumers = 222 | for {producer_consumer, _} <- producers_consumers, do: {producer_consumer, []} 223 | 224 | # We need to ensure ops so we get proper map reducer consumers. 225 | {producers, producers_consumers, ensure_ops(ops, up_dispatcher, dispatcher), window} 226 | end 227 | 228 | defp start_producers({:enumerables, enumerables}, ops, start_link, window, options, dispatcher) do 229 | # If there are no ops, just start the enumerables with the options. 230 | # Otherwise it is a regular producer consumer with demand dispatcher. 231 | # In this case, options is used by subsequent mapper/reducer stages. 232 | streamer_opts = if ops == :none, do: Keyword.put(options, :dispatcher, dispatcher), else: [] 233 | 234 | producers = start_enumerables(enumerables, streamer_opts, start_link) 235 | {producers, producers, ops, window} 236 | end 237 | 238 | defp start_enumerables(enumerables, opts, start_link) do 239 | supervisor_opts = Keyword.take(opts, @supervisor_opts) 240 | opts = [demand: :accumulate] ++ Keyword.take(opts, @map_reducer_opts) 241 | 242 | for enumerable <- enumerables do 243 | {:ok, pid} = start_link.(streamer_spec(enumerable, opts, supervisor_opts)) 244 | {pid, []} 245 | end 246 | end 247 | 248 | defp streamer_spec(stream, opts, supervisor_opts) do 249 | shutdown = Keyword.get(supervisor_opts, :shutdown, 5000) 250 | 251 | %{ 252 | id: GenStage.Streamer, 253 | start: {GenStage, :from_enumerable, [stream, [on_cancel: :stop] ++ opts]}, 254 | shutdown: shutdown, 255 | modules: [GenStage.Streamer] 256 | } 257 | end 258 | 259 | defp hash(fun) when is_function(fun, 1) do 260 | fun 261 | end 262 | 263 | defp hash(other) do 264 | raise ArgumentError, 265 | "expected :hash to be a function that receives an event and " <> 266 | "returns a tuple with the event and its partition, got: #{inspect(other)}" 267 | end 268 | 269 | defp hash_by_key(nil, stages) do 270 | &{&1, :erlang.phash2(&1, stages)} 271 | end 272 | 273 | defp hash_by_key({:elem, pos}, stages) when pos >= 0 do 274 | pos = pos + 1 275 | &{&1, :erlang.phash2(:erlang.element(pos, &1), stages)} 276 | end 277 | 278 | defp hash_by_key({:key, key}, stages) do 279 | &{&1, :erlang.phash2(Map.fetch!(&1, key), stages)} 280 | end 281 | 282 | defp hash_by_key(fun, stages) when is_function(fun, 1) do 283 | &{&1, :erlang.phash2(fun.(&1), stages)} 284 | end 285 | 286 | defp hash_by_key(other, _) do 287 | raise ArgumentError, """ 288 | expected :key to be one of: 289 | 290 | * a function expecting an event and returning a key 291 | * {:elem, pos} when pos >= 0 292 | * {:key, key} 293 | 294 | instead got: #{inspect(other)} 295 | """ 296 | end 297 | 298 | # If the upstream dispatcher and the current dispatcher are the same, 299 | # we don't need to ensure ops and we can skip a layer of stages 300 | defp ensure_ops(ops, dispatcher, dispatcher), do: ops 301 | defp ensure_ops(ops, _up_dispatcher, _dispatcher), do: ensure_ops(ops) 302 | 303 | defp ensure_ops(:none), do: mapper_ops([]) 304 | defp ensure_ops(ops), do: ops 305 | 306 | ## Departition 307 | 308 | defp departition_ops( 309 | reducer_acc, 310 | reducer_fun, 311 | reducer_trigger, 312 | partitions, 313 | acc_fun, 314 | merge_fun, 315 | done_fun 316 | ) do 317 | acc = fn -> {reducer_acc.(), %{}} end 318 | 319 | events = fn ref, events, {acc, windows}, index -> 320 | {events, windows} = 321 | dispatch_departition(events, windows, partitions, acc_fun, merge_fun, done_fun) 322 | 323 | {events, acc} = reducer_fun.(ref, :lists.reverse(events), acc, index) 324 | {events, {acc, windows}} 325 | end 326 | 327 | trigger = fn 328 | {acc, windows}, index, {_, _, :done} = name -> 329 | done = 330 | for {window, {_partitions, acc}} <- :lists.sort(:maps.to_list(windows)) do 331 | done_fun.(acc, window) 332 | end 333 | 334 | {events, _} = reducer_trigger.(acc, index, name) 335 | {done ++ events, {reducer_acc.(), %{}}} 336 | 337 | {acc, windows}, index, name -> 338 | {events, acc} = reducer_trigger.(acc, index, name) 339 | {events, {acc, windows}} 340 | end 341 | 342 | {acc, events, trigger} 343 | end 344 | 345 | defp dispatch_departition(events, windows, partitions, acc_fun, merge_fun, done_fun) do 346 | fold_fun = fn {state, partition, {_, window, name}}, {events, windows} -> 347 | {partitions, acc} = get_window_data(windows, window, partitions, acc_fun) 348 | partitions = remove_partition_on_done(name, partitions, partition) 349 | acc = merge_fun.(state, acc) 350 | 351 | case partitions do 352 | [] -> 353 | {[done_fun.(acc, window) | events], Map.delete(windows, window)} 354 | 355 | _ -> 356 | {events, Map.put(windows, window, {partitions, acc})} 357 | end 358 | end 359 | 360 | :lists.foldl(fold_fun, {[], windows}, events) 361 | end 362 | 363 | defp remove_partition_on_done(:done, partitions, partition) do 364 | List.delete(partitions, partition) 365 | end 366 | 367 | defp remove_partition_on_done(_, partitions, _) do 368 | partitions 369 | end 370 | 371 | defp get_window_data(windows, window, partitions, acc_fun) do 372 | case windows do 373 | %{^window => value} -> value 374 | %{} -> {partitions, acc_fun.()} 375 | end 376 | end 377 | 378 | ## Joins 379 | 380 | defp start_join(side, flow, key_fun, stages, start_link) do 381 | hash = fn event -> 382 | key = key_fun.(event) 383 | {{key, event}, :erlang.phash2(key, stages)} 384 | end 385 | 386 | dispatcher = {GenStage.PartitionDispatcher, partitions: 0..(stages - 1), hash: hash} 387 | 388 | {producers, consumers} = 389 | materialize(flow, :forward, start_link, :producer_consumer, dispatcher) 390 | 391 | consumers = 392 | for {consumer, consumer_opts} <- consumers do 393 | {consumer, [tag: side] ++ consumer_opts} 394 | end 395 | 396 | {producers, consumers} 397 | end 398 | 399 | defp join_ops(kind, join, acc, fun, trigger) do 400 | acc = fn -> {%{}, %{}, acc.()} end 401 | 402 | events = fn ref, events, {left, right, acc}, index -> 403 | {events, left, right} = dispatch_join(events, Process.get(ref), left, right, join, []) 404 | {events, acc} = fun.(ref, events, acc, index) 405 | {events, {left, right, acc}} 406 | end 407 | 408 | ref = make_ref() 409 | 410 | trigger = fn 411 | {left, right, acc}, index, {_, _, :done} = name -> 412 | {kind_events, acc} = 413 | case kind do 414 | :inner -> 415 | {[], acc} 416 | 417 | :left_outer -> 418 | fun.(ref, left_events(Map.keys(left), Map.keys(right), left, join), acc, index) 419 | 420 | :right_outer -> 421 | fun.(ref, right_events(Map.keys(right), Map.keys(left), right, join), acc, index) 422 | 423 | :full_outer -> 424 | left_keys = Map.keys(left) 425 | right_keys = Map.keys(right) 426 | 427 | {left_events, acc} = 428 | fun.(ref, left_events(left_keys, right_keys, left, join), acc, index) 429 | 430 | {right_events, acc} = 431 | fun.(ref, right_events(right_keys, left_keys, right, join), acc, index) 432 | 433 | {left_events ++ right_events, acc} 434 | end 435 | 436 | {trigger_events, acc} = trigger.(acc, index, name) 437 | {kind_events ++ trigger_events, {left, right, acc}} 438 | 439 | {left, right, acc}, index, name -> 440 | {events, acc} = trigger.(acc, index, name) 441 | {events, {left, right, acc}} 442 | end 443 | 444 | {acc, events, trigger} 445 | end 446 | 447 | defp left_events(left, right, source, join) do 448 | for key <- left -- right, 449 | entry <- Map.fetch!(source, key), 450 | do: join.(entry, nil) 451 | end 452 | 453 | defp right_events(right, left, source, join) do 454 | for key <- right -- left, 455 | entry <- Map.fetch!(source, key), 456 | do: join.(nil, entry) 457 | end 458 | 459 | defp dispatch_join([{key, left} | rest], :left, left_acc, right_acc, join, acc) do 460 | acc = 461 | case right_acc do 462 | %{^key => rights} -> 463 | :lists.foldl(fn right, acc -> [join.(left, right) | acc] end, acc, rights) 464 | 465 | %{} -> 466 | acc 467 | end 468 | 469 | left_acc = Map.update(left_acc, key, [left], &[left | &1]) 470 | dispatch_join(rest, :left, left_acc, right_acc, join, acc) 471 | end 472 | 473 | defp dispatch_join([{key, right} | rest], :right, left_acc, right_acc, join, acc) do 474 | acc = 475 | case left_acc do 476 | %{^key => lefties} -> 477 | :lists.foldl(fn left, acc -> [join.(left, right) | acc] end, acc, lefties) 478 | 479 | %{} -> 480 | acc 481 | end 482 | 483 | right_acc = Map.update(right_acc, key, [right], &[right | &1]) 484 | dispatch_join(rest, :right, left_acc, right_acc, join, acc) 485 | end 486 | 487 | defp dispatch_join([], _, left_acc, right_acc, _join, acc) do 488 | {:lists.reverse(acc), left_acc, right_acc} 489 | end 490 | 491 | ## Windows 492 | 493 | defp window_ops( 494 | %{trigger: trigger, periodically: periodically} = window, 495 | {reducer_acc, reducer_fun, reducer_trigger}, 496 | options 497 | ) do 498 | {window_acc, window_fun, window_trigger} = 499 | window_trigger(trigger, reducer_acc, reducer_fun, reducer_trigger) 500 | 501 | {type_acc, type_fun, type_trigger} = 502 | window.__struct__.materialize(window, window_acc, window_fun, window_trigger, options) 503 | 504 | {window_periodically(type_acc, periodically), type_fun, type_trigger} 505 | end 506 | 507 | defp window_trigger(nil, reducer_acc, reducer_fun, reducer_trigger) do 508 | {reducer_acc, reducer_fun, reducer_trigger} 509 | end 510 | 511 | defp window_trigger( 512 | {punctuation_acc, punctuation_fun}, 513 | reducer_acc, 514 | reducer_fun, 515 | reducer_trigger 516 | ) do 517 | {fn -> {punctuation_acc.(), reducer_acc.()} end, 518 | build_punctuated_reducer(punctuation_fun, reducer_fun, reducer_trigger), 519 | build_punctuated_trigger(reducer_trigger)} 520 | end 521 | 522 | defp build_punctuated_reducer(punctuation_fun, red_fun, trigger) do 523 | fn ref, events, {pun_acc, red_acc}, index, name -> 524 | maybe_punctuate( 525 | ref, 526 | events, 527 | punctuation_fun, 528 | pun_acc, 529 | red_acc, 530 | red_fun, 531 | index, 532 | name, 533 | trigger, 534 | [] 535 | ) 536 | end 537 | end 538 | 539 | defp build_punctuated_trigger(trigger) do 540 | fn {trigger_acc, red_acc}, index, name -> 541 | {events, red_acc} = trigger.(red_acc, index, name) 542 | {events, {trigger_acc, red_acc}} 543 | end 544 | end 545 | 546 | defp maybe_punctuate( 547 | ref, 548 | events, 549 | punctuation_fun, 550 | pun_acc, 551 | red_acc, 552 | red_fun, 553 | index, 554 | name, 555 | trigger, 556 | collected 557 | ) do 558 | case punctuation_fun.(events, pun_acc) do 559 | {:trigger, trigger_name, pre, pos, pun_acc} -> 560 | {red_events, red_acc} = red_fun.(ref, pre, red_acc, index) 561 | {trigger_events, red_acc} = trigger.(red_acc, index, put_elem(name, 2, trigger_name)) 562 | 563 | maybe_punctuate( 564 | ref, 565 | pos, 566 | punctuation_fun, 567 | pun_acc, 568 | red_acc, 569 | red_fun, 570 | index, 571 | name, 572 | trigger, 573 | collected ++ trigger_events ++ red_events 574 | ) 575 | 576 | {:cont, [], pun_acc} -> 577 | {collected, {pun_acc, red_acc}} 578 | 579 | {:cont, emitted_events, pun_acc} -> 580 | {red_events, red_acc} = red_fun.(ref, emitted_events, red_acc, index) 581 | {collected ++ red_events, {pun_acc, red_acc}} 582 | 583 | {:cont, pun_acc} -> 584 | {red_events, red_acc} = red_fun.(ref, events, red_acc, index) 585 | {collected ++ red_events, {pun_acc, red_acc}} 586 | end 587 | end 588 | 589 | defp window_periodically(window_acc, []) do 590 | window_acc 591 | end 592 | 593 | defp window_periodically(window_acc, periodically) do 594 | fn -> 595 | for {time, name} <- periodically do 596 | {:ok, _} = :timer.send_interval(time, self(), {:trigger, name}) 597 | end 598 | 599 | window_acc.() 600 | end 601 | end 602 | 603 | ## Reducers 604 | 605 | defp reducer_ops(ops) do 606 | case take_mappers(ops, []) do 607 | {mappers, [{:emit_and_reduce, reducer_acc, reducer_fun} | ops]} -> 608 | {reducer_acc, build_emit_and_reducer(mappers, reducer_fun), build_trigger(ops)} 609 | 610 | {mappers, [{:reduce, reducer_acc, reducer_fun} | ops]} -> 611 | {reducer_acc, build_reducer(mappers, reducer_fun), build_trigger(ops)} 612 | 613 | {mappers, [{:uniq, uniq_by} | ops]} -> 614 | {acc, reducer, trigger} = reducer_ops(ops) 615 | uniq_reducer = build_uniq_reducer(mappers, reducer, uniq_by) 616 | uniq_trigger = build_uniq_trigger(trigger) 617 | {fn -> {%{}, acc.()} end, uniq_reducer, uniq_trigger} 618 | 619 | {mappers, ops} -> 620 | {fn -> [] end, build_reducer(mappers, &[&1 | &2]), build_trigger(ops)} 621 | end 622 | end 623 | 624 | defp build_emit_and_reducer(mappers, fun) do 625 | reducer = reducer_from_mappers(mappers) 626 | 627 | emit_and_reducer = fn event, {events, acc} -> 628 | :lists.foldl( 629 | fn x, {events, acc} -> 630 | case fun.(x, acc) do 631 | {[], acc} -> {events, acc} 632 | {current, acc} -> {[current | events], acc} 633 | end 634 | end, 635 | {events, acc}, 636 | reducer.(event, []) 637 | ) 638 | end 639 | 640 | fn _ref, events, acc, _index -> 641 | {events, acc} = :lists.foldl(emit_and_reducer, {[], acc}, events) 642 | {events |> :lists.reverse() |> :lists.append(), acc} 643 | end 644 | end 645 | 646 | defp build_reducer(mappers, fun) do 647 | reducer = reducer_from_mappers(mappers, fun) 648 | 649 | fn _ref, events, acc, _index -> 650 | {[], :lists.foldl(reducer, acc, events)} 651 | end 652 | end 653 | 654 | @protocol_undefined "Flow attempted to convert the stage accumulator into events but failed, " <> 655 | "to explicit convert your current state into events use on_trigger/2" 656 | 657 | defp build_trigger(ops) do 658 | case take_mappers(ops, []) do 659 | {[], [{:on_trigger, fun}]} -> 660 | fun 661 | 662 | {mappers, [{:on_trigger, fun}]} -> 663 | reducer = reducer_from_mappers(mappers) 664 | 665 | fn acc, index, trigger -> 666 | acc |> Enum.reduce([], reducer) |> Enum.reverse() |> fun.(index, trigger) 667 | end 668 | 669 | {[], []} -> 670 | fn acc, _, _ -> 671 | try do 672 | Enum.to_list(acc) 673 | rescue 674 | e in Protocol.UndefinedError -> 675 | msg = @protocol_undefined 676 | 677 | e = 678 | update_in(e.description, fn 679 | "" -> msg 680 | dc -> dc <> " (#{msg})" 681 | end) 682 | 683 | reraise e, __STACKTRACE__ 684 | else 685 | events -> {events, acc} 686 | end 687 | end 688 | 689 | {mappers, []} -> 690 | reducer = reducer_from_mappers(mappers) 691 | fn acc, _, _ -> {acc |> Enum.reduce([], reducer) |> Enum.reverse(), acc} end 692 | end 693 | end 694 | 695 | defp build_uniq_reducer(mappers, reducer, uniq_by) do 696 | uniq_by = reducer_from_mappers(mappers, uniq_by_reducer(uniq_by)) 697 | 698 | fn ref, events, {set, acc}, index -> 699 | {set, events} = :lists.foldl(uniq_by, {set, []}, events) 700 | {events, acc} = reducer.(ref, :lists.reverse(events), acc, index) 701 | {events, {set, acc}} 702 | end 703 | end 704 | 705 | defp uniq_by_reducer(uniq_by) do 706 | fn event, {set, acc} -> 707 | key = uniq_by.(event) 708 | 709 | case set do 710 | %{^key => true} -> {set, acc} 711 | %{} -> {Map.put(set, key, true), [event | acc]} 712 | end 713 | end 714 | end 715 | 716 | defp build_uniq_trigger(trigger) do 717 | fn {set, acc}, index, name -> 718 | {events, acc} = trigger.(acc, index, name) 719 | {events, {set, acc}} 720 | end 721 | end 722 | 723 | ## Mappers 724 | 725 | defp mapper_ops(ops) do 726 | reducer = reducer_from_mappers(ops) 727 | 728 | {fn -> [] end, 729 | fn _ref, events, [], _index -> {:lists.reverse(:lists.foldl(reducer, [], events)), []} end, 730 | fn _acc, _index, _trigger -> {[], []} end} 731 | end 732 | 733 | defp reducer_from_mappers(mappers, reducer \\ &[&1 | &2]) do 734 | :lists.foldr(&mapper/2, reducer, mappers) 735 | end 736 | 737 | defp mapper({:mapper, :each, [each]}, fun) do 738 | fn x, acc -> 739 | each.(x) 740 | fun.(x, acc) 741 | end 742 | end 743 | 744 | defp mapper({:mapper, :filter, [filter]}, fun) do 745 | fn x, acc -> 746 | if filter.(x) do 747 | fun.(x, acc) 748 | else 749 | acc 750 | end 751 | end 752 | end 753 | 754 | defp mapper({:mapper, :flat_map, [flat_mapper]}, fun) do 755 | fn x, acc -> 756 | Enum.reduce(flat_mapper.(x), acc, fun) 757 | end 758 | end 759 | 760 | defp mapper({:mapper, :map, [mapper]}, fun) do 761 | fn x, acc -> fun.(mapper.(x), acc) end 762 | end 763 | 764 | defp mapper({:mapper, :reject, [filter]}, fun) do 765 | fn x, acc -> 766 | if filter.(x) do 767 | acc 768 | else 769 | fun.(x, acc) 770 | end 771 | end 772 | end 773 | 774 | defp take_mappers([{:mapper, _, _} = mapper | ops], acc), do: take_mappers(ops, [mapper | acc]) 775 | defp take_mappers(ops, acc), do: {:lists.reverse(acc), ops} 776 | end 777 | -------------------------------------------------------------------------------- /lib/flow/window.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window do 2 | @moduledoc """ 3 | Splits a flow into windows that are materialized at certain triggers. 4 | 5 | Windows allow developers to split data so we can understand incoming 6 | data as time progresses. Once a window is created, we can specify 7 | triggers that allow us to customize when the data accumulated on every 8 | window is materialized. 9 | 10 | Windows must be created by calling one of the window type functions. 11 | The supported window types are as follows: 12 | 13 | * Global windows - that's the default window which means all data 14 | belongs to one single window. In other words, the data is not 15 | split in any way. The window finishes when all producers notify 16 | there is no more data 17 | 18 | * Fixed windows - splits incoming events into periodic, non- 19 | overlapping windows based on event times. In other words, a given 20 | event belongs to a single window. If data arrives late, a configured 21 | lateness can be specified. 22 | 23 | * Periodic windows - splits incoming events into periodic, non- 24 | overlapping windows based on processing times. Similar to fixed 25 | windows, a given event belongs to a single window. 26 | 27 | * Count windows - splits incoming events based on a count. 28 | Similar to fixed windows, a given event belongs to a single 29 | window. 30 | 31 | Other common window types can be expressed with Flow functions: 32 | 33 | * Session windows - splits incoming events into unique windows 34 | which is grouped until there is a configured gap between event 35 | times. Sessions are useful for data that is irregularly 36 | distributed with respect to time. 37 | 38 | We discuss all types and include examples below. In the first section, 39 | "Global windows", we build the basic intuition about windows and triggers 40 | as well as discuss the distinction between "Event time and processing time". 41 | Then we explore "Fixed windows" and the concept of lateness before moving 42 | on to other window types. 43 | 44 | ## Global windows 45 | 46 | By default, all events belong to the global window. The global window 47 | is automatically attached to a partition if no window is specified. 48 | The flow below: 49 | 50 | Flow.from_stages([some_producer]) 51 | |> Flow.partition() 52 | |> Flow.reduce(fn -> 0 end, & &1 + 2) 53 | 54 | is equivalent to: 55 | 56 | Flow.from_stages([some_producer]) 57 | |> Flow.partition(window: Flow.Window.global()) 58 | |> Flow.reduce(fn -> 0 end, & &1 + 2) 59 | 60 | Even though the global window does not split the data in any way, it 61 | already provides conveniences for working with both bounded (finite) 62 | and unbounded (infinite) via triggers. 63 | 64 | For example, the flow below uses a global window with a count-based 65 | trigger to emit the values being summed as we sum them: 66 | 67 | iex> window = Flow.Window.global() |> Flow.Window.trigger_every(10) 68 | iex> flow = Flow.from_enumerable(1..100) |> Flow.partition(window: window, stages: 1) 69 | iex> flow |> Flow.reduce(fn -> 0 end, &(&1 + &2)) |> Flow.emit(:state) |> Enum.to_list() 70 | [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 71 | 72 | Let's explore the types of triggers available next. 73 | 74 | ### Triggers 75 | 76 | Triggers allow us to check point the data processed so far. There 77 | are different triggers we can use: 78 | 79 | * Event count triggers - compute state operations every X events 80 | 81 | * Processing time triggers - compute state operations every X time 82 | units for every stage 83 | 84 | * Punctuation - hand-written triggers based on the data 85 | 86 | Flow supports the triggers above via the `trigger_every/2`, 87 | `trigger_periodically/3` and `trigger/3` respectively. 88 | 89 | Once a trigger is emitted, the `Flow.reduce/3` step halts and invokes 90 | the `Flow.on_trigger/2` callback, allowing you to emit events and change 91 | the reducer accumulator. 92 | 93 | ### Event time and processing time 94 | 95 | Before we move to other window types, it is important to discuss 96 | the distinction between event time and processing time. In particular, 97 | triggers created with the `trigger_periodically/3` function are 98 | intrinsically inaccurate and therefore should not be used to split the 99 | data. For example, if you are measuring the frequency that events arrive, 100 | using the event time will always yield the same result, while processing 101 | time will be vulnerable to fluctuations if, for instance, an external 102 | factor causes events to processed slower or faster than usual. 103 | 104 | Furthermore, periodic triggers are established per partition and are 105 | message-based, which means partitions will emit the triggers at different 106 | times and possibly with delays based on the partition message queue size. 107 | However, it is exactly this lack of precision which makes them efficient 108 | for checkpointing data. 109 | 110 | Flow provides other window types, such as fixed windows, exactly to address 111 | the issues with processing time. Such windows use the event time which is 112 | based on the data itself. When working with event time, we can assign the 113 | data into proper windows even when late or out of order. Such windows can 114 | be used to gather time-based insight from the data (for example, the most 115 | popular hashtags in the last 10 minutes) as well as for checkpointing data. 116 | 117 | ## Fixed windows (event time) 118 | 119 | Fixed windows group the data based on the event times. Regardless if 120 | the data is bounded or not, fixed windows give us time-based insight 121 | about the data. 122 | 123 | Fixed windows are created via the `fixed/3` function which specified 124 | the duration of the window and a function that retrieves the event time 125 | from each event: 126 | 127 | Flow.Window.fixed(1, :hour, fn {word, timestamp} -> timestamp end) 128 | 129 | Let's see an example that will use the window above to count the frequency 130 | of words based on windows that are 1 hour long. The timestamps used by 131 | Flow are integers in milliseconds. For now, we will also set the concurrency 132 | down 1 and max demand down to 5 as it is simpler to reason about the results: 133 | 134 | iex> data = [{"elixir", 0}, {"elixir", 1_000}, {"erlang", 60_000}, 135 | ...> {"concurrency", 3_200_000}, {"elixir", 4_000_000}, 136 | ...> {"erlang", 5_000_000}, {"erlang", 6_000_000}] 137 | iex> window = Flow.Window.fixed(1, :hour, fn {_word, timestamp} -> timestamp end) 138 | iex> flow = Flow.from_enumerable(data, max_demand: 5, stages: 1) 139 | iex> flow = Flow.partition(flow, window: window, stages: 1) 140 | iex> flow = Flow.reduce(flow, fn -> %{} end, fn {word, _}, acc -> 141 | ...> Map.update(acc, word, 1, & &1 + 1) 142 | ...> end) 143 | iex> flow |> Flow.emit(:state) |> Enum.to_list 144 | [%{"elixir" => 2, "erlang" => 1, "concurrency" => 1}, 145 | %{"elixir" => 1, "erlang" => 2}] 146 | 147 | Since the data has been broken in two windows, the first four events belong 148 | to the same window while the last 3 belongs to the second one. Notice that 149 | `Flow.reduce/3` is executed per window and that each event belongs to a single 150 | window exclusively. 151 | 152 | Similar to global windows, fixed windows can also have triggers, allowing 153 | us to checkpoint the data as the computation happens. 154 | 155 | ### Data ordering, watermarks and lateness 156 | 157 | When working with event time, Flow assumes by default that events are time 158 | ordered. This means that, when we move from one window to another, like 159 | when we received the entry `{"elixir", 4_000_000}` in the example above, 160 | we assume the previous window has been completed. 161 | 162 | Let's change the events above to be out of order and move the first event 163 | to the end of the dataset and see what happens: 164 | 165 | iex> data = [{"elixir", 1_000}, {"erlang", 60_000}, 166 | ...> {"concurrency", 3_200_000}, {"elixir", 4_000_000}, 167 | ...> {"erlang", 5_000_000}, {"erlang", 6_000_000}, {"elixir", 0}] 168 | iex> window = Flow.Window.fixed(1, :hour, fn {_word, timestamp} -> timestamp end) 169 | iex> flow = Flow.from_enumerable(data) |> Flow.partition(window: window, stages: 1, max_demand: 5) 170 | iex> flow = Flow.reduce(flow, fn -> %{} end, fn {word, _}, acc -> 171 | ...> Map.update(acc, word, 1, & &1 + 1) 172 | ...> end) 173 | iex> flow |> Flow.emit(:state) |> Enum.to_list 174 | [%{"elixir" => 1, "erlang" => 1, "concurrency" => 1}, 175 | %{"elixir" => 1, "erlang" => 2}] 176 | 177 | Notice that now the first map did not count the "elixir" word twice. 178 | Since the event arrived late, it was marked as lost. However, in many 179 | flows we actually expect data to arrive late or out of order, especially 180 | when talking about concurrent data processing. 181 | 182 | Luckily, event time windows include the concept of lateness, which is a 183 | processing time base period we would wait to receive late events. 184 | Let's change the example above once more but now change the window 185 | to also call `allowed_lateness/3`: 186 | 187 | iex> data = [{"elixir", 1_000}, {"erlang", 60_000}, 188 | ...> {"concurrency", 3_200_000}, {"elixir", 4_000_000}, 189 | ...> {"erlang", 5_000_000}, {"erlang", 6_000_000}, {"elixir", 0}] 190 | iex> window = Flow.Window.fixed(1, :hour, fn {_word, timestamp} -> timestamp end) 191 | iex> window = Flow.Window.allowed_lateness(window, 5, :minute) 192 | iex> flow = Flow.from_enumerable(data) |> Flow.partition(window: window, stages: 1, max_demand: 5) 193 | iex> flow = Flow.reduce(flow, fn -> %{} end, fn {word, _}, acc -> 194 | ...> Map.update(acc, word, 1, & &1 + 1) 195 | ...> end) 196 | iex> flow |> Flow.emit(:state) |> Enum.to_list 197 | [%{"concurrency" => 1, "elixir" => 1, "erlang" => 1}, 198 | %{"concurrency" => 1, "elixir" => 2, "erlang" => 1}, 199 | %{"elixir" => 1, "erlang" => 2}] 200 | 201 | Now that we allow late events, we can see the first window emitted 202 | twice. Instead of the window being marked as done when 1 hour passes, 203 | we say it emits a **watermark trigger**. The window will be effectively 204 | done only after the allowed lateness period. If desired, we can use 205 | `Flow.on_trigger/2` to get more information about each particular window 206 | and its trigger. Replace the last line above by the following: 207 | 208 | flow 209 | |> Flow.on_trigger(fn state, _index, trigger -> {[{state, trigger}], state} end) 210 | |> Enum.to_list() 211 | 212 | The trigger parameter will include the type of window, the current 213 | window and what caused the window to be emitted (`:watermark` or 214 | `:done`). 215 | 216 | Note that all stages must receive an event that is outside of a specific 217 | window before that window is considered complete. In other words if there are 218 | multiple stages in the partition preceding a reduce operation that has 219 | a window, the reduce step won't release a window until it has seen an event 220 | that is outside of that window from all processes that it receives data from. 221 | This could have an effect on how long events are delayed in the reduce step. 222 | 223 | ## Periodic windows (processing time) 224 | 225 | Periodic windows are similar to fixed windows except triggers are 226 | emitted based on processing time instead of event time. Remember that 227 | relying on periodic windows or triggers is intrinsically inaccurate and 228 | should not be used to split the data, only as a checkpointing device. 229 | 230 | Periodic windows are also similar to global windows that use 231 | `trigger_periodically/2` to emit events periodically. The difference is 232 | that periodic windows emit a window in a given interval while a trigger 233 | emits a trigger. This behaviour may affect functions such as `Flow.departition/4`, 234 | which calls the `merge` callback per trigger but the `done` callback per 235 | window. Unless you are relying on functions such as `Flow.departition/4`, 236 | there is no distinction between periodic windows and global windows with 237 | periodic triggers. 238 | 239 | ## Count windows (event count) 240 | 241 | Count windows are simpler versions of fixed windows where windows are split 242 | apart by event count. Since it is not timed-based, it does not provide the 243 | concept of lateness. 244 | 245 | iex> window = Flow.Window.count(10) 246 | iex> flow = Flow.from_enumerable(1..100) |> Flow.partition(window: window, stages: 1) 247 | iex> flow |> Flow.reduce(fn -> 0 end, &(&1 + &2)) |> Flow.emit(:state) |> Enum.to_list() 248 | [55, 155, 255, 355, 455, 555, 655, 755, 855, 955, 0] 249 | 250 | Count windows are also similar to global windows that use `trigger_every/2` 251 | to emit events per count. The difference is that count windows emit a 252 | window per event count while a trigger belongs to a window. This behaviour 253 | may affect functions such as `Flow.departition/4`, which calls the `merge` 254 | callback per trigger but the `done` callback per window. Unless you are 255 | relying on functions such as `Flow.departition/4`, there is no distinction 256 | between count windows and global windows with count triggers. 257 | 258 | ## Session windows (gap between events) 259 | 260 | Session windows allow events to accumulate until a configured time gap 261 | between events occurs. This allows for grouping events that occurred close to 262 | each other, while allowing the length of the window to vary. Flow does not 263 | provide a dedicated Session window type, but it can be constructed using 264 | `emit_and_reduce/3` and `on_trigger/2`. 265 | 266 | iex> data = [ 267 | ...> {"elixir", 2_000_000}, 268 | ...> {"erlang", 3_100_000}, 269 | ...> {"elixir", 3_200_000}, 270 | ...> {"erlang", 4_000_000}, 271 | ...> {"elixir", 4_100_000}, 272 | ...> {"erlang", 4_150_000} 273 | ...> ] 274 | iex> max_gap_between_events = 1_000_000 275 | iex> flow = Flow.from_enumerable(data) |> Flow.partition(key: fn {k, _} -> k end, stages: 1) 276 | iex> flow = 277 | ...> Flow.emit_and_reduce(flow, fn -> %{} end, fn {word, time}, acc -> 278 | ...> {count, previous_time} = Map.get(acc, word, {1, time}) 279 | ...> 280 | ...> if time - previous_time > max_gap_between_events do 281 | ...> {[{word, {count, previous_time}}], Map.put(acc, word, {1, time})} 282 | ...> else 283 | ...> {[], Map.update(acc, word, {1, time}, fn {count, _} -> {count + 1, time} end)} 284 | ...> end 285 | ...> end) 286 | iex> flow = Flow.on_trigger(flow, fn acc -> {Enum.to_list(acc), :unused} end) 287 | iex> Enum.to_list(flow) 288 | [{"elixir", {1, 2000000}}, {"elixir", {2, 4100000}}, {"erlang", {3, 4150000}}] 289 | """ 290 | 291 | @type t :: %{ 292 | required(:trigger) => {fun(), fun()} | nil, 293 | required(:periodically) => [trigger], 294 | optional(atom()) => term() 295 | } 296 | 297 | @typedoc "The supported window types." 298 | @type type :: :global | :fixed | :periodic | :count | any() 299 | 300 | @typedoc """ 301 | A function that returns the event time to window by. 302 | 303 | It must return an integer representing the time in milliseconds. 304 | Flow does not care if the integer is using the UNIX epoch, 305 | Gregorian epoch or any other as long as it is consistent. 306 | """ 307 | @type by :: (term -> non_neg_integer) 308 | 309 | @typedoc """ 310 | The window identifier. 311 | 312 | It is `:global` for `:global` windows or an integer for fixed windows. 313 | """ 314 | @type id :: :global | non_neg_integer() 315 | 316 | @typedoc """ 317 | The supported time units for fixed and periodic windows. 318 | """ 319 | @type time_unit :: :millisecond | :second | :minute | :hour 320 | 321 | @typedoc "The name of the trigger." 322 | @type trigger :: term 323 | 324 | @doc """ 325 | Returns a global window. 326 | 327 | Global window triggers have the shape of `{:global, :global, trigger_name}`. 328 | 329 | See the section on "Global windows" in the module documentation for examples. 330 | """ 331 | @spec global :: t 332 | def global do 333 | %Flow.Window.Global{} 334 | end 335 | 336 | @doc """ 337 | Returns a count-based window of every `count` elements. 338 | 339 | `count` must be a positive integer. 340 | 341 | Count window triggers have the shape of `{:count, window, trigger_name}`, 342 | where `window` is an incrementing integer identifying the window. 343 | 344 | See the section on "Count windows" in the module documentation for examples. 345 | """ 346 | @spec count(pos_integer) :: t 347 | def count(count) when is_integer(count) and count > 0 do 348 | %Flow.Window.Count{count: count} 349 | end 350 | 351 | @doc """ 352 | Returns a period-based window of every `count` `unit`. 353 | 354 | `count` is a positive integer and `unit` is one of `:millisecond`, 355 | `:second`, `:minute`, or `:hour`. Remember periodic triggers are established 356 | per partition and are message-based, which means partitions will emit the 357 | triggers at different times and possibly with delays based on the partition 358 | message queue size. 359 | 360 | Periodic window triggers have the shape of `{:periodic, window, trigger_name}`, 361 | where `window` is an incrementing integer identifying the window. 362 | 363 | See the section on "Periodic windows" in the module documentation for examples. 364 | """ 365 | @spec periodic(pos_integer, time_unit) :: t 366 | def periodic(count, unit) when is_integer(count) and count > 0 do 367 | %Flow.Window.Periodic{duration: to_ms(count, unit)} 368 | end 369 | 370 | @doc """ 371 | Returns a fixed window of duration `count` `unit` where the 372 | event time is calculated by the given function `by`. 373 | 374 | `count` is a positive integer and `unit` is one of `:millisecond`, 375 | `:second`, `:minute`, or `:hour`. 376 | 377 | Fixed window triggers have the shape of `{:fixed, window, trigger_name}`, 378 | where `window` is an integer that represents the beginning timestamp 379 | for the current window. 380 | 381 | If `allowed_lateness/3` is used with fixed windows, the window will 382 | first emit a `{:fixed, window, :watermark}` trigger when the window 383 | terminates and emit `{:fixed, window, :done}` only after the 384 | `allowed_lateness/3` duration has passed. 385 | 386 | See the section on "Fixed windows" in the module documentation for examples. 387 | """ 388 | @spec fixed(pos_integer, time_unit, (t -> pos_integer)) :: t 389 | def fixed(count, unit, by) when is_integer(count) and count > 0 and is_function(by, 1) do 390 | %Flow.Window.Fixed{duration: to_ms(count, unit), by: by} 391 | end 392 | 393 | @doc """ 394 | Sets a duration, in processing time, of how long we will 395 | wait for late events for a given window. 396 | 397 | If allowed lateness is configured, once the window is finished, 398 | it won't trigger a `:done` event but instead emit a `:watermark`. 399 | The window will be done only when the allowed lateness time expires, 400 | effectively emitting the `:done` trigger. 401 | 402 | `count` is a positive number. The `unit` may be a time unit 403 | (`:millisecond`, `:second`, `:minute`, or `:hour`). 404 | """ 405 | @spec allowed_lateness(t, pos_integer, time_unit) :: t 406 | def allowed_lateness(window, count, unit) 407 | 408 | def allowed_lateness(%{lateness: _} = window, count, unit) do 409 | %{window | lateness: to_ms(count, unit)} 410 | end 411 | 412 | def allowed_lateness(window, _, _) do 413 | raise ArgumentError, "allowed_lateness/3 not supported for window type #{inspect(window)}" 414 | end 415 | 416 | @doc """ 417 | Calculates when to emit a trigger. 418 | 419 | Triggers are calculated per window and are used to temporarily 420 | halt the window accumulation, typically done with `Flow.reduce/3`, 421 | allowing the next operations to execute before accumulation is 422 | resumed. 423 | 424 | This function expects the trigger accumulator function, which will 425 | be invoked at the beginning of every window, and a trigger function 426 | that receives the current batch of events and its own accumulator. 427 | The trigger function must return one of the three values: 428 | 429 | * `{:cont, acc}` - the reduce operation should continue as usual. 430 | `acc` is the trigger state. 431 | 432 | * `{:cont, events, acc}` - the reduce operation should continue, but 433 | only with the events you want to emit as part of the next state. 434 | `acc` is the trigger state. 435 | 436 | * `{:trigger, name, pre, pos, acc}` - where `name` is the trigger `name`, 437 | `pre` are the events to be consumed before the trigger, `pos` controls 438 | events to be processed after the trigger with the `acc` as the new trigger 439 | accumulator. 440 | 441 | We recommend looking at the implementation of `trigger_every/2` as 442 | an example of a custom trigger. 443 | """ 444 | @spec trigger(t, (-> acc), trigger_fun) :: t 445 | when trigger_fun: ([event], acc -> trigger_fun_return), 446 | trigger_fun_return: cont_tuple | cont_tuple_with_emitted_events | trigger_tuple, 447 | cont_tuple: {:cont, acc}, 448 | cont_tuple_with_emitted_events: {:cont, [event], acc}, 449 | trigger_tuple: {:trigger, trigger(), pre, pos, acc}, 450 | pre: [event], 451 | pos: [event], 452 | acc: term(), 453 | event: term() 454 | def trigger(window, acc_fun, trigger_fun) do 455 | if is_function(acc_fun, 0) do 456 | add_trigger(window, {acc_fun, trigger_fun}) 457 | else 458 | raise ArgumentError, 459 | "Flow.Window.trigger/3 expects the accumulator to be given as a function" 460 | end 461 | end 462 | 463 | @doc """ 464 | A trigger emitted every `count` elements in a window. 465 | 466 | The trigger will be named `{:every, count}`. 467 | 468 | ## Examples 469 | 470 | Below is an example that checkpoints the sum from 1 to 100, emitting 471 | a trigger with the state every 10 items. The extra 5050 value at the 472 | end is the trigger emitted because processing is done. 473 | 474 | iex> window = Flow.Window.global() |> Flow.Window.trigger_every(10) 475 | iex> flow = Flow.from_enumerable(1..100) |> Flow.partition(window: window, stages: 1) 476 | iex> flow |> Flow.reduce(fn -> 0 end, &(&1 + &2)) |> Flow.emit(:state) |> Enum.to_list() 477 | [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 478 | 479 | """ 480 | @spec trigger_every(t, pos_integer) :: t 481 | def trigger_every(window, count) when is_integer(count) and count > 0 do 482 | name = {:every, count} 483 | 484 | trigger(window, fn -> count end, fn events, acc -> 485 | length = length(events) 486 | 487 | if length >= acc do 488 | {pre, pos} = Enum.split(events, acc) 489 | {:trigger, name, pre, pos, count} 490 | else 491 | {:cont, acc - length} 492 | end 493 | end) 494 | end 495 | 496 | @doc """ 497 | Emits a trigger periodically every `count` `unit`. 498 | 499 | Such trigger will apply to every window that has changed since the last 500 | periodic trigger. 501 | 502 | `count` is a positive integer and `unit` is one of `:millisecond`, 503 | `:second`, `:minute`, or `:hour`. Remember periodic triggers are established 504 | per partition and are message-based, which means partitions will emit the 505 | triggers at different times and possibly with delays based on the partition 506 | message queue size. 507 | 508 | The trigger will be named `{:periodically, count, unit}`. 509 | 510 | ## Message-based triggers (timers) 511 | 512 | It is also possible to dispatch a trigger by sending a message to 513 | `self()` with the format of `{:trigger, name}`. This is useful for 514 | custom triggers and timers. One example is to send the message when 515 | building the accumulator for `Flow.reduce/3`. 516 | 517 | Similar to periodic triggers, message-based triggers will also be 518 | invoked to all windows that have changed since the last trigger. 519 | """ 520 | @spec trigger_periodically(t, pos_integer, time_unit) :: t 521 | def trigger_periodically(%{periodically: periodically} = window, count, unit) 522 | when is_integer(count) and count > 0 do 523 | trigger = {to_ms(count, unit), {:periodically, count, unit}} 524 | %{window | periodically: [trigger | periodically]} 525 | end 526 | 527 | @spec to_ms(pos_integer(), time_unit()) :: pos_integer 528 | defp to_ms(count, :millisecond), do: count 529 | defp to_ms(count, :second), do: count * 1000 530 | defp to_ms(count, :minute), do: count * 1000 * 60 531 | defp to_ms(count, :hour), do: count * 1000 * 60 * 60 532 | 533 | defp to_ms(_count, unit) do 534 | raise ArgumentError, 535 | "unknown unit #{inspect(unit)} (expected :millisecond, :second, :minute or :hour)" 536 | end 537 | 538 | defp add_trigger(%{trigger: nil} = window, trigger) do 539 | %{window | trigger: trigger} 540 | end 541 | 542 | defp add_trigger(%{}, _trigger) do 543 | raise ArgumentError, 544 | "Flow.Window.trigger/3 or Flow.Window.trigger_every/2 " <> 545 | "can only be called once per window" 546 | end 547 | end 548 | -------------------------------------------------------------------------------- /lib/flow/window/count.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.Count do 2 | @moduledoc false 3 | 4 | @enforce_keys [:count] 5 | defstruct [:count, :trigger, periodically: []] 6 | 7 | def materialize(%{count: max}, reducer_acc, reducer_fun, reducer_trigger, _options) do 8 | acc = fn -> {0, max, reducer_acc.()} end 9 | 10 | fun = fn ref, events, {window, count, acc}, index -> 11 | dispatch( 12 | events, 13 | window, 14 | count, 15 | [], 16 | acc, 17 | ref, 18 | index, 19 | max, 20 | reducer_acc, 21 | reducer_fun, 22 | reducer_trigger 23 | ) 24 | end 25 | 26 | trigger = fn {window, count, acc}, index, name -> 27 | {emit, acc} = reducer_trigger.(acc, index, {:count, window, name}) 28 | {emit, {window, count, acc}} 29 | end 30 | 31 | {acc, fun, trigger} 32 | end 33 | 34 | defp dispatch( 35 | [], 36 | window, 37 | count, 38 | emit, 39 | acc, 40 | _ref, 41 | _index, 42 | _max, 43 | _reducer_acc, 44 | _reducer_fun, 45 | _reducer_trigger 46 | ) do 47 | {emit, {window, count, acc}} 48 | end 49 | 50 | defp dispatch( 51 | events, 52 | window, 53 | count, 54 | emit, 55 | acc, 56 | ref, 57 | index, 58 | max, 59 | reducer_acc, 60 | reducer_fun, 61 | reducer_trigger 62 | ) do 63 | {count, events, rest} = collect(events, count, []) 64 | {reducer_emit, acc} = maybe_dispatch(events, acc, ref, index, window, reducer_fun) 65 | 66 | {trigger_emit, acc, window, count} = 67 | maybe_trigger(window, count, acc, index, max, reducer_acc, reducer_trigger) 68 | 69 | dispatch( 70 | rest, 71 | window, 72 | count, 73 | emit ++ reducer_emit ++ trigger_emit, 74 | acc, 75 | ref, 76 | index, 77 | max, 78 | reducer_acc, 79 | reducer_fun, 80 | reducer_trigger 81 | ) 82 | end 83 | 84 | defp maybe_trigger(window, 0, acc, index, max, reducer_acc, reducer_trigger) do 85 | {trigger_emit, _} = reducer_trigger.(acc, index, {:count, window, :done}) 86 | {trigger_emit, reducer_acc.(), window + 1, max} 87 | end 88 | 89 | defp maybe_trigger(window, count, acc, _index, _max, _reducer_acc, _reducer_trigger) do 90 | {[], acc, window, count} 91 | end 92 | 93 | defp maybe_dispatch([], acc, _ref, _index, _window, _reducer_fun) do 94 | {[], acc} 95 | end 96 | 97 | defp maybe_dispatch(events, acc, ref, index, window, reducer_fun) do 98 | if is_function(reducer_fun, 4) do 99 | reducer_fun.(ref, events, acc, index) 100 | else 101 | reducer_fun.(ref, events, acc, index, {:count, window, :placeholder}) 102 | end 103 | end 104 | 105 | defp collect([], count, acc), do: {count, :lists.reverse(acc), []} 106 | defp collect(events, 0, acc), do: {0, :lists.reverse(acc), events} 107 | defp collect([event | events], count, acc), do: collect(events, count - 1, [event | acc]) 108 | end 109 | -------------------------------------------------------------------------------- /lib/flow/window/fixed.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.Fixed do 2 | @moduledoc false 3 | 4 | @enforce_keys [:by, :duration] 5 | defstruct [:by, :duration, :trigger, lateness: 0, periodically: []] 6 | 7 | def materialize( 8 | %{by: by, duration: duration, lateness: lateness}, 9 | reducer_acc, 10 | reducer_fun, 11 | reducer_trigger, 12 | _options 13 | ) do 14 | ref = make_ref() 15 | acc = fn -> {nil, %{}} end 16 | lateness_fun = lateness_fun(lateness, duration, ref, reducer_acc, reducer_trigger) 17 | 18 | static = %{ 19 | by: by, 20 | duration: duration, 21 | reducer_acc: reducer_acc, 22 | reducer_fun: reducer_fun 23 | } 24 | 25 | # The reducing function works in three stages. 26 | # 27 | # 1. We start processing all events, grouping all events that belong 28 | # to the same window and then reducing them. One of the outcomes 29 | # of this function is the most recent window for a given producer. 30 | # 31 | # 2. Next we store the most recent timestamp for the producer and get 32 | # both minimum and maximum seen windows. 33 | # 34 | # 3. Finally we see which windows have been seen by all producers (min) 35 | # and if we are still missing any producer data (max is nil). We catch 36 | # up the all window to min, emitting triggers for the old windows. 37 | # 38 | fun = fn producers, ref, events, {all, windows}, index -> 39 | {reducer_emit, recent, windows} = 40 | split_events(events, ref, [], nil, Map.fetch!(producers, ref), windows, index, [], static) 41 | 42 | # Update the latest window for this producer 43 | producers = Map.put(producers, ref, recent) 44 | min_max = producers |> Map.values() |> Enum.min_max() 45 | 46 | {trigger_emit, acc} = emit_trigger_messages(all, min_max, windows, index, lateness_fun) 47 | {producers, reducer_emit ++ trigger_emit, acc} 48 | end 49 | 50 | trigger = fn acc, index, name -> 51 | handle_trigger(ref, duration, acc, index, name, reducer_acc, reducer_trigger) 52 | end 53 | 54 | {acc, fun, trigger} 55 | end 56 | 57 | ## Reducer 58 | 59 | defp split_events([event | events], ref, buffer, current, recent, windows, index, emit, static) do 60 | %{by: by, duration: duration} = static 61 | window = div(by!(by, event), duration) 62 | 63 | if is_nil(current) or window === current do 64 | split_events(events, ref, [event | buffer], window, recent, windows, index, emit, static) 65 | else 66 | {emit, recent, windows} = 67 | reduce_events(ref, buffer, current, recent, windows, index, emit, static) 68 | 69 | split_events(events, ref, [event], window, recent, windows, index, emit, static) 70 | end 71 | end 72 | 73 | defp split_events([], ref, buffer, window, recent, windows, index, emit, static) do 74 | reduce_events(ref, buffer, window, recent, windows, index, emit, static) 75 | end 76 | 77 | defp reduce_events(_ref, [], _window, recent, windows, _index, emit, _static) do 78 | {emit, recent, windows} 79 | end 80 | 81 | defp reduce_events(ref, buffer, window, recent, windows, index, emit, static) do 82 | events = :lists.reverse(buffer) 83 | 84 | case recent_window(window, recent, windows, static) do 85 | {:ok, window_acc, recent} -> 86 | reducer_fun = static.reducer_fun 87 | 88 | {new_emit, window_acc} = 89 | if is_function(reducer_fun, 4) do 90 | reducer_fun.(ref, events, window_acc, index) 91 | else 92 | trigger = {:fixed, window * static.duration, :placeholder} 93 | reducer_fun.(ref, events, window_acc, index, trigger) 94 | end 95 | 96 | {emit ++ new_emit, recent, Map.put(windows, window, window_acc)} 97 | 98 | :error -> 99 | {emit, recent, windows} 100 | end 101 | end 102 | 103 | defp recent_window(window, nil, windows, static) do 104 | case windows do 105 | %{^window => acc} -> {:ok, acc, window} 106 | %{} -> {:ok, static.reducer_acc.(), window} 107 | end 108 | end 109 | 110 | defp recent_window(window, recent, windows, static) do 111 | case windows do 112 | %{^window => acc} -> {:ok, acc, max(window, recent)} 113 | %{} when window >= recent -> {:ok, static.reducer_acc.(), window} 114 | %{} -> :error 115 | end 116 | end 117 | 118 | defp by!(by, event) do 119 | case by.(event) do 120 | x when is_integer(x) -> 121 | x 122 | 123 | x -> 124 | raise "Flow.Window.fixed/3 expects `by` function to return an integer, " <> 125 | "got #{inspect(x)} from #{inspect(by)}" 126 | end 127 | end 128 | 129 | ## Trigger emission 130 | 131 | # We still haven't received from all producers. 132 | defp emit_trigger_messages(old, {_, nil}, windows, _index, _lateness) do 133 | {[], {old, windows}} 134 | end 135 | 136 | # We received data from all producers from the first time. 137 | defp emit_trigger_messages(nil, {min, _}, windows, index, lateness) do 138 | emit_trigger_messages(Enum.min(Map.keys(windows)), min, windows, index, lateness, []) 139 | end 140 | 141 | # Catch up the old (all) to the new minimum. 142 | defp emit_trigger_messages(old, {min, _}, windows, index, lateness) do 143 | emit_trigger_messages(old, min, windows, index, lateness, []) 144 | end 145 | 146 | defp emit_trigger_messages(new, new, windows, _index, _lateness, emit) do 147 | {emit, {new, windows}} 148 | end 149 | 150 | defp emit_trigger_messages(old, new, windows, index, lateness, emit) do 151 | {new_emit, windows} = lateness.(old, windows, index) 152 | emit_trigger_messages(old + 1, new, windows, index, lateness, emit ++ new_emit) 153 | end 154 | 155 | defp lateness_fun(lateness, duration, ref, reducer_acc, reducer_trigger) do 156 | fn window, windows, index -> 157 | acc = Map.get_lazy(windows, window, reducer_acc) 158 | 159 | case lateness do 160 | 0 -> 161 | {emit, _} = reducer_trigger.(acc, index, {:fixed, window * duration, :done}) 162 | {emit, Map.delete(windows, window)} 163 | 164 | _ -> 165 | Process.send_after(self(), {:trigger, {ref, window}}, lateness) 166 | 167 | {emit, window_acc} = 168 | reducer_trigger.(acc, index, {:fixed, window * duration, :watermark}) 169 | 170 | {emit, Map.put(windows, window, window_acc)} 171 | end 172 | end 173 | end 174 | 175 | ## Trigger handling 176 | 177 | # Lateness termination. 178 | def handle_trigger(ref, duration, {current, windows}, index, {ref, window}, _acc, trigger) do 179 | case windows do 180 | %{^window => acc} -> 181 | {emit, _window_acc} = trigger.(acc, index, {:fixed, window * duration, :done}) 182 | {emit, {current, Map.delete(windows, window)}} 183 | 184 | %{} -> 185 | {[], {current, windows}} 186 | end 187 | end 188 | 189 | # Otherwise trigger all windows. 190 | def handle_trigger(_ref, _duration, {current, windows}, _index, _name, _acc, _trigger) 191 | when map_size(windows) == 0 do 192 | {[], {current, windows}} 193 | end 194 | 195 | def handle_trigger(_ref, duration, {current, windows}, index, name, acc, trigger) do 196 | {min, max} = windows |> Map.keys() |> Enum.min_max() 197 | {emit, windows} = trigger_all(min, max, duration, windows, index, name, acc, trigger, []) 198 | {emit, {current, windows}} 199 | end 200 | 201 | defp trigger_all(min, max, _duration, windows, _index, _name, _acc, _trigger, emit) 202 | when min > max do 203 | {emit, windows} 204 | end 205 | 206 | defp trigger_all(min, max, duration, windows, index, name, acc, trigger, emit) do 207 | window_acc = Map.get_lazy(windows, min, acc) 208 | {new_emit, window_acc} = trigger.(window_acc, index, {:fixed, min * duration, name}) 209 | windows = Map.put(windows, min, window_acc) 210 | trigger_all(min + 1, max, duration, windows, index, name, acc, trigger, emit ++ new_emit) 211 | end 212 | end 213 | -------------------------------------------------------------------------------- /lib/flow/window/global.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.Global do 2 | @moduledoc false 3 | 4 | @enforce_keys [] 5 | defstruct [:trigger, periodically: []] 6 | 7 | def materialize(_window, reducer_acc, reducer_fun, reducer_trigger, _options) do 8 | acc = reducer_acc 9 | 10 | fun = 11 | if is_function(reducer_fun, 4) do 12 | reducer_fun 13 | else 14 | fn ref, events, acc, index -> 15 | reducer_fun.(ref, events, acc, index, {:global, :global, :placeholder}) 16 | end 17 | end 18 | 19 | trigger = fn acc, index, name -> 20 | reducer_trigger.(acc, index, {:global, :global, name}) 21 | end 22 | 23 | {acc, fun, trigger} 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/flow/window/periodic.ex: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.Periodic do 2 | @moduledoc false 3 | 4 | @enforce_keys [:duration] 5 | defstruct [:duration, :trigger, periodically: []] 6 | 7 | def materialize(%{duration: duration}, reducer_acc, reducer_fun, reducer_trigger, _options) do 8 | ref = make_ref() 9 | 10 | acc = fn -> 11 | timer = send_after(ref, duration) 12 | {0, timer, reducer_acc.()} 13 | end 14 | 15 | fun = 16 | if is_function(reducer_fun, 4) do 17 | fn ref, events, {window, timer, acc}, index -> 18 | {emit, acc} = reducer_fun.(ref, events, acc, index) 19 | {emit, {window, timer, acc}} 20 | end 21 | else 22 | fn ref, events, {window, timer, acc}, index -> 23 | {emit, acc} = reducer_fun.(ref, events, acc, index, {:periodic, window, :placeholder}) 24 | {emit, {window, timer, acc}} 25 | end 26 | end 27 | 28 | trigger = fn 29 | {window, _timer, acc}, index, ^ref -> 30 | {emit, _} = reducer_trigger.(acc, index, {:periodic, window, :done}) 31 | timer = send_after(ref, duration) 32 | {emit, {window + 1, timer, reducer_acc.()}} 33 | 34 | {window, timer, acc}, index, name -> 35 | if name == :done, do: cancel_after(ref, timer) 36 | {emit, acc} = reducer_trigger.(acc, index, {:periodic, window, name}) 37 | {emit, {window, timer, acc}} 38 | end 39 | 40 | {acc, fun, trigger} 41 | end 42 | 43 | defp send_after(ref, duration) do 44 | Process.send_after(self(), {:trigger, ref}, duration) 45 | end 46 | 47 | defp cancel_after(ref, timer) do 48 | case Process.cancel_timer(timer) do 49 | false -> 50 | receive do 51 | {:trigger, ^ref} -> :ok 52 | after 53 | 0 -> :ok 54 | end 55 | 56 | _ -> 57 | :ok 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.Mixfile do 2 | use Mix.Project 3 | 4 | @version "1.2.4" 5 | 6 | def project do 7 | [ 8 | app: :flow, 9 | version: @version, 10 | elixir: "~> 1.7", 11 | package: package(), 12 | description: "Computational parallel flows for Elixir", 13 | start_permanent: Mix.env() == :prod, 14 | deps: deps(), 15 | name: "Flow", 16 | docs: [ 17 | main: "Flow", 18 | source_ref: "v#{@version}", 19 | source_url: "https://github.com/dashbitco/flow" 20 | ] 21 | ] 22 | end 23 | 24 | def application do 25 | [extra_applications: [:logger]] 26 | end 27 | 28 | defp deps do 29 | [ 30 | {:gen_stage, "~> 1.0"}, 31 | {:ex_doc, "~> 0.19", only: :docs} 32 | ] 33 | end 34 | 35 | defp package do 36 | %{ 37 | licenses: ["Apache-2.0"], 38 | maintainers: ["José Valim", "James Fish"], 39 | links: %{"GitHub" => "https://github.com/dashbitco/flow"} 40 | } 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "earmark_parser": {:hex, :earmark_parser, "1.4.31", "a93921cdc6b9b869f519213d5bc79d9e218ba768d7270d46fdcf1c01bacff9e2", [:mix], [], "hexpm", "317d367ee0335ef037a87e46c91a2269fef6306413f731e8ec11fc45a7efd059"}, 3 | "ex_doc": {:hex, :ex_doc, "0.29.3", "f07444bcafb302db86e4f02d8bbcd82f2e881a0dcf4f3e4740e4b8128b9353f7", [:mix], [{:earmark_parser, "~> 1.4.31", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3dc6787d7b08801ec3b51e9bd26be5e8826fbf1a17e92d1ebc252e1a1c75bfe1"}, 4 | "gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"}, 5 | "makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"}, 6 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"}, 7 | "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, 8 | "nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"}, 9 | } 10 | -------------------------------------------------------------------------------- /test/flow/window/count_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.CountTest do 2 | use ExUnit.Case, async: true 3 | 4 | defp single_window do 5 | Flow.Window.count(1000) 6 | end 7 | 8 | describe "single window" do 9 | test "with multiple mappers and reducers" do 10 | assert Flow.from_enumerable(1..100, stages: 4, max_demand: 5) 11 | |> Flow.map(& &1) 12 | |> Flow.partition(window: single_window(), stages: 4) 13 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 14 | |> Flow.emit(:state) 15 | |> Enum.sum() == 5050 16 | end 17 | 18 | test "trigger keep with large demand" do 19 | partition_opts = [window: single_window() |> Flow.Window.trigger_every(10), stages: 1] 20 | 21 | assert Flow.from_enumerable(1..100) 22 | |> Flow.partition(partition_opts) 23 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 24 | |> Flow.emit(:state) 25 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 26 | end 27 | 28 | test "trigger keep with small demand" do 29 | partition_opts = [ 30 | window: single_window() |> Flow.Window.trigger_every(10), 31 | stages: 1, 32 | max_demand: 5 33 | ] 34 | 35 | assert Flow.from_enumerable(1..100) 36 | |> Flow.partition(partition_opts) 37 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 38 | |> Flow.emit(:state) 39 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 40 | end 41 | 42 | test "trigger discard with large demand" do 43 | partition_opts = [ 44 | window: single_window() |> Flow.Window.trigger_every(10), 45 | stages: 1 46 | ] 47 | 48 | assert Flow.from_enumerable(1..100) 49 | |> Flow.partition(partition_opts) 50 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 51 | |> Flow.on_trigger(&{[&1], 0}) 52 | |> Enum.to_list() == [55, 155, 255, 355, 455, 555, 655, 755, 855, 955, 0] 53 | end 54 | 55 | test "trigger discard with small demand" do 56 | partition_opts = [ 57 | window: single_window() |> Flow.Window.trigger_every(10), 58 | stages: 1, 59 | max_demand: 5 60 | ] 61 | 62 | assert Flow.from_enumerable(1..100) 63 | |> Flow.partition(partition_opts) 64 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 65 | |> Flow.on_trigger(&{[&1], 0}) 66 | |> Enum.to_list() == [55, 155, 255, 355, 455, 555, 655, 755, 855, 955, 0] 67 | end 68 | 69 | test "trigger ordering" do 70 | window = 71 | Flow.Window.trigger(single_window(), fn -> true end, fn events, true -> 72 | {:cont, Enum.all?(events, &(rem(&1, 2) == 0))} 73 | end) 74 | 75 | assert Flow.from_enumerable(1..10) 76 | |> Flow.partition(window: window, stages: 1) 77 | |> Flow.map(&(&1 + 1)) 78 | |> Flow.map(&(&1 * 2)) 79 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 80 | |> Flow.emit(:state) 81 | |> Enum.sort() == [130] 82 | end 83 | 84 | test "trigger names" do 85 | partition_opts = [ 86 | window: single_window() |> Flow.Window.trigger_every(10), 87 | stages: 1 88 | ] 89 | 90 | events = 91 | Flow.from_enumerable(1..100) 92 | |> Flow.partition(partition_opts) 93 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 94 | |> Flow.on_trigger(fn state, _, {:count, 0, trigger} -> {[{trigger, state}], 0} end) 95 | |> Enum.sort() 96 | 97 | assert events == [ 98 | {:done, 0}, 99 | {{:every, 10}, 55}, 100 | {{:every, 10}, 155}, 101 | {{:every, 10}, 255}, 102 | {{:every, 10}, 355}, 103 | {{:every, 10}, 455}, 104 | {{:every, 10}, 555}, 105 | {{:every, 10}, 655}, 106 | {{:every, 10}, 755}, 107 | {{:every, 10}, 855}, 108 | {{:every, 10}, 955} 109 | ] 110 | end 111 | 112 | test "trigger based on intervals" do 113 | partition_opts = [ 114 | window: single_window() |> Flow.Window.trigger_periodically(100, :millisecond), 115 | stages: 1, 116 | max_demand: 10 117 | ] 118 | 119 | assert Stream.concat(1..10, Stream.timer(60_000)) 120 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 121 | |> Flow.partition(partition_opts) 122 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 123 | |> Flow.on_trigger(&{[&1 * 2], &1}) 124 | |> Enum.take(1) == [110] 125 | end 126 | 127 | test "trigger based on timers" do 128 | reduce_fun = fn -> 129 | Process.send_after(self(), {:trigger, :sample}, 200) 130 | 0 131 | end 132 | 133 | assert Stream.concat(1..10, Stream.timer(60_000)) 134 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 135 | |> Flow.partition(stages: 1, max_demand: 10, window: single_window()) 136 | |> Flow.reduce(reduce_fun, &(&1 + &2)) 137 | |> Flow.on_trigger(&{[{&1 * 2, &2, &3}], reduce_fun.()}) 138 | |> Enum.take(1) == [{110, {0, 1}, {:count, 0, :sample}}] 139 | end 140 | end 141 | 142 | defp double_ordered_window do 143 | Flow.Window.count(50) 144 | end 145 | 146 | describe "double ordered windows" do 147 | test "reduces per window with large demand" do 148 | assert Flow.from_enumerable(1..100, stages: 1) 149 | |> Flow.partition(window: double_ordered_window(), stages: 1) 150 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 151 | |> Flow.emit(:state) 152 | |> Enum.to_list() == [1275, 3775, 0] 153 | end 154 | 155 | test "triggers per window with large demand" do 156 | partition_opts = [ 157 | window: double_ordered_window() |> Flow.Window.trigger_every(12), 158 | stages: 1 159 | ] 160 | 161 | events = 162 | Flow.from_enumerable(1..100, stages: 1) 163 | |> Flow.partition(partition_opts) 164 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 165 | |> Flow.on_trigger(fn state, _, {:count, count, trigger} -> 166 | {[{state, count, trigger}], state} 167 | end) 168 | |> Enum.to_list() 169 | 170 | assert events == [ 171 | {78, 0, {:every, 12}}, 172 | {300, 0, {:every, 12}}, 173 | {666, 0, {:every, 12}}, 174 | {1176, 0, {:every, 12}}, 175 | {1275, 0, :done}, 176 | {678, 1, {:every, 12}}, 177 | {1500, 1, {:every, 12}}, 178 | {2466, 1, {:every, 12}}, 179 | {3576, 1, {:every, 12}}, 180 | {3775, 1, :done}, 181 | {0, 2, :done} 182 | ] 183 | end 184 | 185 | test "reduces per window with small demand" do 186 | partition_opts = [window: double_ordered_window(), stages: 1, max_demand: 5, min_demand: 0] 187 | 188 | assert Flow.from_enumerable(1..100, stages: 1) 189 | |> Flow.partition(partition_opts) 190 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 191 | |> Flow.emit(:state) 192 | |> Enum.to_list() == [1275, 3775, 0] 193 | end 194 | 195 | test "triggers per window with small demand" do 196 | partition_opts = [ 197 | window: double_ordered_window() |> Flow.Window.trigger_every(12), 198 | stages: 1, 199 | max_demand: 5, 200 | min_demand: 0 201 | ] 202 | 203 | events = 204 | Flow.from_enumerable(1..100, stages: 1) 205 | |> Flow.partition(partition_opts) 206 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 207 | |> Flow.on_trigger(fn state, _, {:count, count, trigger} -> 208 | {[{state, count, trigger}], state} 209 | end) 210 | |> Enum.to_list() 211 | 212 | assert events == [ 213 | {78, 0, {:every, 12}}, 214 | {300, 0, {:every, 12}}, 215 | {666, 0, {:every, 12}}, 216 | {1176, 0, {:every, 12}}, 217 | {1275, 0, :done}, 218 | {678, 1, {:every, 12}}, 219 | {1500, 1, {:every, 12}}, 220 | {2466, 1, {:every, 12}}, 221 | {3576, 1, {:every, 12}}, 222 | {3775, 1, :done}, 223 | {0, 2, :done} 224 | ] 225 | end 226 | end 227 | end 228 | -------------------------------------------------------------------------------- /test/flow/window/fixed_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.FixedTest do 2 | use ExUnit.Case, async: true 3 | 4 | defp single_window do 5 | Flow.Window.fixed(1, :second, fn _ -> 0 end) 6 | end 7 | 8 | describe "single window" do 9 | test "with multiple mappers and reducers" do 10 | assert Flow.from_enumerable(1..100, stages: 4, max_demand: 5) 11 | |> Flow.map(& &1) 12 | |> Flow.partition(window: single_window(), stages: 4) 13 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 14 | |> Flow.emit(:state) 15 | |> Enum.sum() == 5050 16 | end 17 | 18 | test "trigger with large demand" do 19 | partition_opts = [window: single_window() |> Flow.Window.trigger_every(10), stages: 1] 20 | 21 | assert Flow.from_enumerable(1..100) 22 | |> Flow.partition(partition_opts) 23 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 24 | |> Flow.emit(:state) 25 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 26 | end 27 | 28 | test "trigger with small demand" do 29 | partition_opts = [ 30 | window: single_window() |> Flow.Window.trigger_every(10), 31 | stages: 1, 32 | max_demand: 5 33 | ] 34 | 35 | assert Flow.from_enumerable(1..100) 36 | |> Flow.partition(partition_opts) 37 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 38 | |> Flow.emit(:state) 39 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 40 | end 41 | 42 | test "trigger ordering" do 43 | window = 44 | Flow.Window.trigger(single_window(), fn -> true end, fn events, true -> 45 | {:cont, Enum.all?(events, &(rem(&1, 2) == 0))} 46 | end) 47 | 48 | assert Flow.from_enumerable(1..10) 49 | |> Flow.partition(window: window, stages: 1) 50 | |> Flow.map(&(&1 + 1)) 51 | |> Flow.map(&(&1 * 2)) 52 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 53 | |> Flow.emit(:state) 54 | |> Enum.sort() == [130] 55 | end 56 | 57 | test "trigger names" do 58 | partition_opts = [ 59 | window: single_window() |> Flow.Window.trigger_every(10), 60 | stages: 1 61 | ] 62 | 63 | result = 64 | Flow.from_enumerable(1..100) 65 | |> Flow.partition(partition_opts) 66 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 67 | |> Flow.on_trigger(fn state, _, {:fixed, 0, trigger} -> {[{trigger, state}], 0} end) 68 | |> Enum.sort() 69 | 70 | assert result == [ 71 | {:done, 0}, 72 | {{:every, 10}, 55}, 73 | {{:every, 10}, 155}, 74 | {{:every, 10}, 255}, 75 | {{:every, 10}, 355}, 76 | {{:every, 10}, 455}, 77 | {{:every, 10}, 555}, 78 | {{:every, 10}, 655}, 79 | {{:every, 10}, 755}, 80 | {{:every, 10}, 855}, 81 | {{:every, 10}, 955} 82 | ] 83 | end 84 | 85 | test "trigger based on intervals" do 86 | partition_opts = [ 87 | window: single_window() |> Flow.Window.trigger_periodically(100, :millisecond), 88 | stages: 1, 89 | max_demand: 10 90 | ] 91 | 92 | assert Stream.concat(1..10, Stream.timer(60_000)) 93 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 94 | |> Flow.partition(partition_opts) 95 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 96 | |> Flow.on_trigger(&{[&1 * 2], &1}) 97 | |> Enum.take(1) == [110] 98 | end 99 | 100 | test "trigger based on timers" do 101 | reduce_fun = fn -> 102 | Process.send_after(self(), {:trigger, :sample}, 200) 103 | 0 104 | end 105 | 106 | assert Stream.concat(1..10, Stream.timer(60_000)) 107 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 108 | |> Flow.partition(stages: 1, max_demand: 10, window: single_window()) 109 | |> Flow.reduce(reduce_fun, &(&1 + &2)) 110 | |> Flow.on_trigger(&{[{&1 * 2, &2, &3}], reduce_fun.()}) 111 | |> Enum.take(1) == [{110, {0, 1}, {:fixed, 0, :sample}}] 112 | end 113 | end 114 | 115 | defp double_ordered_window do 116 | Flow.Window.fixed(1, :second, fn 117 | x when x <= 50 -> 0 + x 118 | x when x <= 100 -> 1_000 + x 119 | end) 120 | end 121 | 122 | describe "double ordered windows" do 123 | test "reduces per window with large demand" do 124 | assert Flow.from_enumerable(1..100, stages: 1) 125 | |> Flow.partition(window: double_ordered_window(), stages: 1) 126 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 127 | |> Flow.emit(:state) 128 | |> Enum.to_list() == [1275, 3775] 129 | end 130 | 131 | test "triggers per window with large demand" do 132 | partition_opts = [ 133 | window: double_ordered_window() |> Flow.Window.trigger_every(12), 134 | stages: 1 135 | ] 136 | 137 | result = 138 | Flow.from_enumerable(1..100, stages: 1) 139 | |> Flow.partition(partition_opts) 140 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 141 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 142 | {[{state, fixed, trigger}], state} 143 | end) 144 | |> Enum.to_list() 145 | 146 | assert result == [ 147 | {78, 0, {:every, 12}}, 148 | {300, 0, {:every, 12}}, 149 | {666, 0, {:every, 12}}, 150 | {1176, 0, {:every, 12}}, 151 | {678, 1000, {:every, 12}}, 152 | {1500, 1000, {:every, 12}}, 153 | {2466, 1000, {:every, 12}}, 154 | {3576, 1000, {:every, 12}}, 155 | {1275, 0, :done}, 156 | {3775, 1000, :done} 157 | ] 158 | end 159 | 160 | test "reduces per window with small demand" do 161 | partition_opts = [window: double_ordered_window(), stages: 1, max_demand: 5, min_demand: 0] 162 | 163 | assert Flow.from_enumerable(1..100, stages: 1) 164 | |> Flow.partition(partition_opts) 165 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 166 | |> Flow.emit(:state) 167 | |> Enum.to_list() == [1275, 3775] 168 | end 169 | 170 | test "triggers per window with small demand" do 171 | partition_opts = [ 172 | window: double_ordered_window() |> Flow.Window.trigger_every(12), 173 | stages: 1, 174 | max_demand: 5, 175 | min_demand: 0 176 | ] 177 | 178 | result = 179 | Flow.from_enumerable(1..100, stages: 1) 180 | |> Flow.partition(partition_opts) 181 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 182 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 183 | {[{state, fixed, trigger}], state} 184 | end) 185 | |> Enum.to_list() 186 | 187 | assert result == [ 188 | {78, 0, {:every, 12}}, 189 | {300, 0, {:every, 12}}, 190 | {666, 0, {:every, 12}}, 191 | {1176, 0, {:every, 12}}, 192 | {1275, 0, :done}, 193 | {678, 1000, {:every, 12}}, 194 | {1500, 1000, {:every, 12}}, 195 | {2466, 1000, {:every, 12}}, 196 | {3576, 1000, {:every, 12}}, 197 | {3775, 1000, :done} 198 | ] 199 | end 200 | 201 | test "triggers for all windows" do 202 | partition_opts = [ 203 | window: double_ordered_window() |> Flow.Window.trigger_periodically(100, :millisecond), 204 | stages: 1, 205 | max_demand: 5, 206 | min_demand: 0 207 | ] 208 | 209 | result = 210 | Stream.concat(1..100, Stream.timer(60_000)) 211 | |> Flow.from_enumerable(max_demand: 5, stages: 1) 212 | |> Flow.partition(partition_opts) 213 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 214 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 215 | {[{state, fixed, trigger}], state} 216 | end) 217 | |> Enum.take(2) 218 | 219 | assert result == [ 220 | {1275, 0, :done}, 221 | {3775, 1000, {:periodically, 100, :millisecond}} 222 | ] 223 | end 224 | end 225 | 226 | defp double_unordered_window_without_lateness do 227 | Flow.Window.fixed(1, :second, fn 228 | x when x <= 40 -> 229 | 0 230 | 231 | x when x <= 80 -> 232 | 2_000 233 | 234 | # Those events will be lost 235 | x when x <= 100 -> 236 | 0 237 | end) 238 | end 239 | 240 | # With one stage, termination happens when one stage is done. 241 | describe "double unordered windows without lateness with one stage" do 242 | test "reduces per window with large demand" do 243 | assert Flow.from_enumerable(1..100, stages: 1) 244 | |> Flow.partition(window: double_unordered_window_without_lateness(), stages: 1) 245 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 246 | |> Flow.emit(:state) 247 | |> Enum.to_list() == [2630, 0, 2420] 248 | end 249 | 250 | test "triggers per window with large demand" do 251 | partition_opts = [ 252 | window: double_unordered_window_without_lateness() |> Flow.Window.trigger_every(12), 253 | stages: 1 254 | ] 255 | 256 | result = 257 | Flow.from_enumerable(1..100, stages: 1) 258 | |> Flow.partition(partition_opts) 259 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 260 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 261 | {[{state, fixed, trigger}], state} 262 | end) 263 | |> Enum.to_list() 264 | 265 | assert result == [ 266 | {78, 0, {:every, 12}}, 267 | {300, 0, {:every, 12}}, 268 | {666, 0, {:every, 12}}, 269 | {558, 2000, {:every, 12}}, 270 | {1260, 2000, {:every, 12}}, 271 | {2106, 2000, {:every, 12}}, 272 | {1496, 0, {:every, 12}}, 273 | {2630, 0, {:every, 12}}, 274 | {2630, 0, :done}, 275 | {0, 1000, :done}, 276 | {2420, 2000, :done} 277 | ] 278 | end 279 | 280 | test "reduces per window with small demand" do 281 | partition_opts = [ 282 | window: double_unordered_window_without_lateness(), 283 | stages: 1, 284 | max_demand: 5, 285 | min_demand: 0 286 | ] 287 | 288 | assert Flow.from_enumerable(1..100, stages: 1) 289 | |> Flow.partition(partition_opts) 290 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 291 | |> Flow.emit(:state) 292 | |> Enum.to_list() == [820, 0, 2420] 293 | end 294 | 295 | test "triggers per window with small demand" do 296 | partition_opts = [ 297 | window: double_unordered_window_without_lateness() |> Flow.Window.trigger_every(12), 298 | stages: 1, 299 | max_demand: 5, 300 | min_demand: 0 301 | ] 302 | 303 | result = 304 | Flow.from_enumerable(1..100, stages: 1) 305 | |> Flow.partition(partition_opts) 306 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 307 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 308 | {[{state, fixed, trigger}], state} 309 | end) 310 | |> Enum.to_list() 311 | 312 | assert result == [ 313 | {78, 0, {:every, 12}}, 314 | {300, 0, {:every, 12}}, 315 | {666, 0, {:every, 12}}, 316 | {820, 0, :done}, 317 | {0, 1000, :done}, 318 | {558, 2000, {:every, 12}}, 319 | {1260, 2000, {:every, 12}}, 320 | {2106, 2000, {:every, 12}}, 321 | {2420, 2000, :done} 322 | ] 323 | end 324 | 325 | test "triggers for all windows" do 326 | partition_opts = [ 327 | window: 328 | double_unordered_window_without_lateness() 329 | |> Flow.Window.trigger_periodically(100, :millisecond), 330 | stages: 1, 331 | max_demand: 5, 332 | min_demand: 0 333 | ] 334 | 335 | result = 336 | Stream.concat(1..100, Stream.timer(60_000)) 337 | |> Flow.from_enumerable(max_demand: 5, stages: 1) 338 | |> Flow.partition(partition_opts) 339 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 340 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 341 | {[{state, fixed, trigger}], state} 342 | end) 343 | |> Enum.take(3) 344 | 345 | assert result == [ 346 | {820, 0, :done}, 347 | {0, 1000, :done}, 348 | {2420, 2000, {:periodically, 100, :millisecond}} 349 | ] 350 | end 351 | end 352 | 353 | # With two stages, termination is only guaranteed once both stages are done. 354 | describe "double unordered windows without lateness with two stages" do 355 | test "reduces per window with large demand" do 356 | assert Flow.from_enumerable(1..100, stages: 2) 357 | |> Flow.partition(window: double_unordered_window_without_lateness(), stages: 1) 358 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 359 | |> Flow.emit(:state) 360 | |> Enum.to_list() == [2630, 0, 2420] 361 | end 362 | 363 | test "triggers per window with large demand" do 364 | partition_opts = [ 365 | window: double_unordered_window_without_lateness() |> Flow.Window.trigger_every(12), 366 | stages: 1 367 | ] 368 | 369 | result = 370 | Flow.from_enumerable(1..100, stages: 2) 371 | |> Flow.partition(partition_opts) 372 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 373 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 374 | {[{state, fixed, trigger}], state} 375 | end) 376 | |> Enum.to_list() 377 | 378 | assert result == [ 379 | {78, 0, {:every, 12}}, 380 | {300, 0, {:every, 12}}, 381 | {666, 0, {:every, 12}}, 382 | {558, 2000, {:every, 12}}, 383 | {1260, 2000, {:every, 12}}, 384 | {2106, 2000, {:every, 12}}, 385 | {1496, 0, {:every, 12}}, 386 | {2630, 0, {:every, 12}}, 387 | {2630, 0, :done}, 388 | {0, 1000, :done}, 389 | {2420, 2000, :done} 390 | ] 391 | end 392 | 393 | test "reduces per window with small demand" do 394 | partition_opts = [ 395 | window: double_unordered_window_without_lateness(), 396 | stages: 1, 397 | max_demand: 100 398 | ] 399 | 400 | # We were not suppose to receive all data but, 401 | # because we have two stages, we are only done 402 | # once both stages are done, so we may end-up 403 | # consuming late events while the other producer is open. 404 | result = 405 | Flow.from_enumerable(1..100, stages: 2) 406 | |> Flow.map(& &1) 407 | |> Flow.partition(partition_opts) 408 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 409 | |> Flow.emit(:state) 410 | |> Enum.to_list() 411 | 412 | # In case we consume late events while terminating 413 | # In case we terminate fast (without late events) 414 | assert result == [2630, 0, 2420] or result == [820, 0, 2420] 415 | end 416 | 417 | test "triggers per window with small demand" do 418 | partition_opts = [ 419 | window: double_unordered_window_without_lateness() |> Flow.Window.trigger_every(12), 420 | stages: 1, 421 | max_demand: 5, 422 | min_demand: 0 423 | ] 424 | 425 | result = 426 | Flow.from_enumerable(1..100, stages: 2) 427 | |> Flow.map(& &1) 428 | |> Flow.partition(partition_opts) 429 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 430 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 431 | {[{state, fixed, trigger}], state} 432 | end) 433 | |> Enum.to_list() 434 | 435 | assert result == [ 436 | {78, 0, {:every, 12}}, 437 | {300, 0, {:every, 12}}, 438 | {666, 0, {:every, 12}}, 439 | {820, 0, :done}, 440 | {0, 1000, :done}, 441 | {558, 2000, {:every, 12}}, 442 | {1260, 2000, {:every, 12}}, 443 | {2106, 2000, {:every, 12}}, 444 | {2420, 2000, :done} 445 | ] 446 | end 447 | end 448 | 449 | defp double_unordered_window_with_lateness() do 450 | Flow.Window.fixed(1, :second, fn 451 | x when x <= 40 -> 452 | 0 453 | 454 | x when x <= 80 -> 455 | 2_000 456 | 457 | # Those events won't be lost due to lateness 458 | x when x <= 100 -> 459 | 0 460 | end) 461 | |> Flow.Window.allowed_lateness(1, :hour) 462 | end 463 | 464 | # With one stage, termination happens when one stage is done. 465 | describe "double unordered windows with lateness with one stage" do 466 | test "reduces per window with large demand" do 467 | assert Flow.from_enumerable(1..100, stages: 1) 468 | |> Flow.partition(window: double_unordered_window_with_lateness(), stages: 1) 469 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 470 | |> Flow.emit(:state) 471 | |> Enum.to_list() == [2630, 0, 2630, 0, 2420] 472 | end 473 | 474 | test "triggers per window with large demand" do 475 | partition_opts = [ 476 | window: double_unordered_window_with_lateness() |> Flow.Window.trigger_every(12), 477 | stages: 1 478 | ] 479 | 480 | result = 481 | Flow.from_enumerable(1..100, stages: 1) 482 | |> Flow.partition(partition_opts) 483 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 484 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 485 | {[{state, fixed, trigger}], state} 486 | end) 487 | |> Enum.to_list() 488 | 489 | assert result == [ 490 | {78, 0, {:every, 12}}, 491 | {300, 0, {:every, 12}}, 492 | {666, 0, {:every, 12}}, 493 | {558, 2000, {:every, 12}}, 494 | {1260, 2000, {:every, 12}}, 495 | {2106, 2000, {:every, 12}}, 496 | {1496, 0, {:every, 12}}, 497 | {2630, 0, {:every, 12}}, 498 | {2630, 0, :watermark}, 499 | {0, 1000, :watermark}, 500 | {2630, 0, :done}, 501 | {0, 1000, :done}, 502 | {2420, 2000, :done} 503 | ] 504 | end 505 | 506 | test "reduces per window with small demand" do 507 | partition_opts = [ 508 | window: double_unordered_window_with_lateness(), 509 | stages: 1, 510 | max_demand: 5, 511 | min_demand: 0 512 | ] 513 | 514 | assert Flow.from_enumerable(1..100, stages: 1) 515 | |> Flow.partition(partition_opts) 516 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 517 | |> Flow.emit(:state) 518 | |> Enum.to_list() == [820, 0, 2630, 0, 2420] 519 | end 520 | 521 | test "triggers per window with small demand" do 522 | partition_opts = [ 523 | window: double_unordered_window_with_lateness() |> Flow.Window.trigger_every(12), 524 | stages: 1, 525 | max_demand: 5, 526 | min_demand: 0 527 | ] 528 | 529 | result = 530 | Flow.from_enumerable(1..100, stages: 1) 531 | |> Flow.partition(partition_opts) 532 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 533 | |> Flow.on_trigger(fn state, _, {:fixed, fixed, trigger} -> 534 | {[{state, fixed, trigger}], state} 535 | end) 536 | |> Enum.to_list() 537 | 538 | assert result == [ 539 | {78, 0, {:every, 12}}, 540 | {300, 0, {:every, 12}}, 541 | {666, 0, {:every, 12}}, 542 | {820, 0, :watermark}, 543 | {0, 1000, :watermark}, 544 | {558, 2000, {:every, 12}}, 545 | {1260, 2000, {:every, 12}}, 546 | {2106, 2000, {:every, 12}}, 547 | {1496, 0, {:every, 12}}, 548 | {2630, 0, {:every, 12}}, 549 | {2630, 0, :done}, 550 | {0, 1000, :done}, 551 | {2420, 2000, :done} 552 | ] 553 | end 554 | end 555 | end 556 | -------------------------------------------------------------------------------- /test/flow/window/global_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.GlobalTest do 2 | use ExUnit.Case, async: true 3 | 4 | test "trigger keep with large demand" do 5 | partition_opts = [window: Flow.Window.global() |> Flow.Window.trigger_every(10), stages: 1] 6 | 7 | assert Flow.from_enumerable(1..100) 8 | |> Flow.partition(partition_opts) 9 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 10 | |> Flow.emit(:state) 11 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 12 | end 13 | 14 | test "trigger keep with small demand" do 15 | partition_opts = [ 16 | window: Flow.Window.global() |> Flow.Window.trigger_every(10), 17 | stages: 1, 18 | max_demand: 5 19 | ] 20 | 21 | assert Flow.from_enumerable(1..100) 22 | |> Flow.partition(partition_opts) 23 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 24 | |> Flow.emit(:state) 25 | |> Enum.to_list() == [55, 210, 465, 820, 1275, 1830, 2485, 3240, 4095, 5050, 5050] 26 | end 27 | 28 | test "trigger discard with large demand" do 29 | partition_opts = [ 30 | window: Flow.Window.global() |> Flow.Window.trigger_every(10), 31 | stages: 1 32 | ] 33 | 34 | assert Flow.from_enumerable(1..100) 35 | |> Flow.partition(partition_opts) 36 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 37 | |> Flow.on_trigger(&{[&1], 0}) 38 | |> Enum.to_list() == [55, 155, 255, 355, 455, 555, 655, 755, 855, 955, 0] 39 | end 40 | 41 | test "trigger discard with small demand" do 42 | partition_opts = [ 43 | window: Flow.Window.global() |> Flow.Window.trigger_every(10), 44 | stages: 1, 45 | max_demand: 5 46 | ] 47 | 48 | assert Flow.from_enumerable(1..100) 49 | |> Flow.partition(partition_opts) 50 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 51 | |> Flow.on_trigger(&{[&1], 0}) 52 | |> Enum.to_list() == [55, 155, 255, 355, 455, 555, 655, 755, 855, 955, 0] 53 | end 54 | 55 | test "trigger ordering" do 56 | window = 57 | Flow.Window.trigger(Flow.Window.global(), fn -> true end, fn events, true -> 58 | {:cont, Enum.all?(events, &(rem(&1, 2) == 0))} 59 | end) 60 | 61 | assert Flow.from_enumerable(1..10) 62 | |> Flow.partition(window: window, stages: 1) 63 | |> Flow.map(&(&1 + 1)) 64 | |> Flow.map(&(&1 * 2)) 65 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 66 | |> Flow.emit(:state) 67 | |> Enum.sort() == [130] 68 | end 69 | 70 | test "trigger names" do 71 | partition_opts = [ 72 | window: Flow.Window.global() |> Flow.Window.trigger_every(10), 73 | stages: 1 74 | ] 75 | 76 | events = 77 | Flow.from_enumerable(1..100) 78 | |> Flow.partition(partition_opts) 79 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 80 | |> Flow.on_trigger(fn state, _, {:global, :global, trigger} -> {[{trigger, state}], 0} end) 81 | |> Enum.sort() 82 | 83 | assert events == [ 84 | {:done, 0}, 85 | {{:every, 10}, 55}, 86 | {{:every, 10}, 155}, 87 | {{:every, 10}, 255}, 88 | {{:every, 10}, 355}, 89 | {{:every, 10}, 455}, 90 | {{:every, 10}, 555}, 91 | {{:every, 10}, 655}, 92 | {{:every, 10}, 755}, 93 | {{:every, 10}, 855}, 94 | {{:every, 10}, 955} 95 | ] 96 | end 97 | 98 | test "trigger based on intervals" do 99 | partition_opts = [ 100 | window: Flow.Window.global() |> Flow.Window.trigger_periodically(100, :millisecond), 101 | stages: 1, 102 | max_demand: 10 103 | ] 104 | 105 | assert Flow.from_enumerable(Stream.concat(1..10, Stream.timer(60_000)), max_demand: 5) 106 | |> Flow.partition(partition_opts) 107 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 108 | |> Flow.on_trigger(&{[&1 * 2], &1}) 109 | |> Enum.take(1) == [110] 110 | end 111 | 112 | test "trigger based on timers" do 113 | reduce_fun = fn -> 114 | Process.send_after(self(), {:trigger, :sample}, 200) 115 | 0 116 | end 117 | 118 | assert Stream.concat(1..10, Stream.timer(60_000)) 119 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 120 | |> Flow.partition(stages: 1, max_demand: 10) 121 | |> Flow.reduce(reduce_fun, &(&1 + &2)) 122 | |> Flow.on_trigger(&{[{&1 * 2, &2, &3}], reduce_fun.()}) 123 | |> Enum.take(1) == [{110, {0, 1}, {:global, :global, :sample}}] 124 | end 125 | end 126 | -------------------------------------------------------------------------------- /test/flow/window/periodic_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.Window.PeriodicTest do 2 | use ExUnit.Case, async: true 3 | 4 | defp single_window do 5 | Flow.Window.periodic(100, :millisecond) 6 | end 7 | 8 | test "emits based on intervals" do 9 | result = 10 | Stream.concat(1..10, Stream.timer(60_000)) 11 | |> Flow.from_enumerable(max_demand: 5) 12 | |> Flow.partition(window: single_window(), stages: 1, max_demand: 10) 13 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 14 | |> Flow.on_trigger(fn state, index, {:periodic, window, :done} -> 15 | {[{state, index, window}], state} 16 | end) 17 | |> Enum.take(2) 18 | 19 | assert result == [{55, {0, 1}, 0}, {0, {0, 1}, 1}] 20 | end 21 | 22 | test "emits based on intervals with count triggers" do 23 | partition_opts = [ 24 | window: single_window() |> Flow.Window.trigger_every(5), 25 | stages: 1, 26 | max_demand: 10 27 | ] 28 | 29 | result = 30 | Stream.concat(1..10, Stream.timer(60_000)) 31 | |> Flow.from_enumerable(max_demand: 5, stages: 2) 32 | |> Flow.partition(partition_opts) 33 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 34 | |> Flow.on_trigger(fn state, _, {:periodic, window, trigger} -> 35 | {[{state, window, trigger}], state} 36 | end) 37 | |> Enum.take(3) 38 | 39 | assert result == [{15, 0, {:every, 5}}, {55, 0, {:every, 5}}, {55, 0, :done}] 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /test/flow/window_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Flow.WindowTest do 2 | use ExUnit.Case, async: true 3 | doctest Flow.Window 4 | 5 | test "periodic triggers" do 6 | assert Flow.Window.global() 7 | |> Flow.Window.trigger_periodically(10, :second) 8 | |> Map.fetch!(:periodically) == [{10000, {:periodically, 10, :second}}] 9 | 10 | assert Flow.Window.global() 11 | |> Flow.Window.trigger_periodically(10, :minute) 12 | |> Map.fetch!(:periodically) == [{600_000, {:periodically, 10, :minute}}] 13 | 14 | assert Flow.Window.global() 15 | |> Flow.Window.trigger_periodically(10, :hour) 16 | |> Map.fetch!(:periodically) == [{36_000_000, {:periodically, 10, :hour}}] 17 | end 18 | 19 | describe "custom trigger w/ :cont, emitted events and no emitted events" do 20 | setup do 21 | flow = fn window, parent -> 22 | Flow.from_enumerables([[:a, :b, :c], [:a, :b, :c]], stages: 1) 23 | |> Flow.partition(window: window, stages: 1) 24 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 25 | |> Flow.on_trigger(fn state -> 26 | send(parent, Enum.sort(state)) 27 | {[], []} 28 | end) 29 | |> Flow.start_link() 30 | end 31 | 32 | moving_event_trigger = fn window, count, emitted -> 33 | name = {:moving_event_trigger, count} 34 | 35 | Flow.Window.trigger(window, fn -> [] end, fn events, acc -> 36 | new_acc = acc ++ events 37 | 38 | if length(new_acc) >= count do 39 | pre = Enum.take(new_acc, count) 40 | pos = Enum.drop(new_acc, 1) 41 | {:trigger, name, pre, pos, []} 42 | else 43 | {:cont, emitted, new_acc} 44 | end 45 | end) 46 | end 47 | 48 | [flow: flow, trigger: moving_event_trigger] 49 | end 50 | 51 | test "skip on :cont w/ reset", context do 52 | window = Flow.Window.global() |> context[:trigger].(2, []) 53 | {:ok, pid} = context[:flow].(window, self()) 54 | 55 | assert_receive [:a, :b] 56 | assert_receive [:b, :c] 57 | assert_receive [:a, :c] 58 | assert_receive [:a, :b] 59 | assert_receive [:b, :c] 60 | refute_received [:a] 61 | refute_received [:c] 62 | 63 | ref = Process.monitor(pid) 64 | assert_receive {:DOWN, ^ref, _, _, _} 65 | end 66 | 67 | test "emit on :cont w/ reset", context do 68 | window = Flow.Window.global() |> context[:trigger].(2, [:elixir]) 69 | {:ok, pid} = context[:flow].(window, self()) 70 | 71 | assert_receive [:a, :b] 72 | assert_receive [:b, :c] 73 | assert_receive [:a, :c, :elixir] 74 | assert_receive [:a, :b] 75 | assert_receive [:b, :c] 76 | refute_received [:a, :c] 77 | refute_received [:a] 78 | refute_received [:c] 79 | 80 | ref = Process.monitor(pid) 81 | assert_receive {:DOWN, ^ref, _, _, _} 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /test/flow_test.exs: -------------------------------------------------------------------------------- 1 | defmodule FlowTest do 2 | use ExUnit.Case, async: true 3 | 4 | doctest Flow 5 | 6 | defmodule Counter do 7 | use GenStage 8 | 9 | def start_link(counter) do 10 | GenStage.start_link(__MODULE__, counter) 11 | end 12 | 13 | def init(counter) do 14 | {:producer, counter} 15 | end 16 | 17 | def handle_demand(demand, counter) when demand > 0 do 18 | # If the counter is 3 and we ask for 2 items, we will 19 | # emit the items 3 and 4, and set the state to 5. 20 | events = Enum.to_list(counter..(counter + demand - 1)) 21 | {:noreply, events, counter + demand} 22 | end 23 | end 24 | 25 | defmodule Copier do 26 | use GenStage 27 | 28 | def start_link(parent) do 29 | GenStage.start_link(__MODULE__, parent) 30 | end 31 | 32 | def init(parent) do 33 | {:producer_consumer, parent} 34 | end 35 | 36 | def handle_events(events, _from, parent) do 37 | send(parent, {:producer_consumed, events}) 38 | {:noreply, events, parent} 39 | end 40 | end 41 | 42 | defmodule Forwarder do 43 | use GenStage 44 | 45 | def start_link(parent) do 46 | GenStage.start_link(__MODULE__, parent) 47 | end 48 | 49 | def init(parent) do 50 | {:consumer, parent} 51 | end 52 | 53 | def handle_events(events, _from, parent) do 54 | send(parent, {:consumed, events}) 55 | {:noreply, [], parent} 56 | end 57 | end 58 | 59 | defmodule Sleeper do 60 | use GenStage 61 | 62 | def start_link(parent) do 63 | GenStage.start_link(__MODULE__, parent, name: Sleeper) 64 | end 65 | 66 | def init(parent) do 67 | {:consumer, parent} 68 | end 69 | 70 | def handle_events(events, _from, parent) do 71 | send(parent, {:consumed, self(), events}) 72 | Process.sleep(:infinity) 73 | end 74 | end 75 | 76 | defmodule NonStarter do 77 | use GenStage 78 | 79 | def start_link(_) do 80 | GenStage.start_link(__MODULE__, []) 81 | end 82 | 83 | def init(_) do 84 | :ignore 85 | end 86 | end 87 | 88 | describe "on use" do 89 | test "defines a child_spec/2 function" do 90 | defmodule MyFlow do 91 | use Flow, shutdown: 1000 92 | end 93 | 94 | assert MyFlow.child_spec(:ok) == %{ 95 | id: FlowTest.MyFlow, 96 | shutdown: 1000, 97 | start: {FlowTest.MyFlow, :start_link, [:ok]} 98 | } 99 | end 100 | end 101 | 102 | test "child_spec/2" do 103 | parent = self() 104 | 105 | start_supervised!( 106 | {Flow, 107 | Flow.from_enumerables([[1, 2, 3], [4, 5, 6]], stages: 2) 108 | |> Flow.filter(&(rem(&1, 2) == 0)) 109 | |> Flow.map(&send(parent, &1))} 110 | ) 111 | 112 | assert_receive 2 113 | assert_receive 4 114 | assert_receive 6 115 | refute_received 1 116 | end 117 | 118 | describe "errors" do 119 | test "on multiple reduce calls" do 120 | message = ~r"cannot call group_by/reduce/emit_and_reduce on a flow after another" 121 | 122 | assert_raise ArgumentError, message, fn -> 123 | Flow.from_enumerable([1, 2, 3]) 124 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 125 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 126 | |> Enum.to_list() 127 | end 128 | end 129 | 130 | test "on_trigger/2 without reduce" do 131 | message = ~r"on_trigger/2 must be called after a group_by/reduce/emit_and_reduce operation" 132 | 133 | assert_raise ArgumentError, message, fn -> 134 | Flow.from_enumerable([1, 2, 3]) 135 | |> Flow.on_trigger(fn x -> x end) 136 | |> Enum.to_list() 137 | end 138 | end 139 | 140 | test "on map_batch/2" do 141 | message = ~r"map_batch/2 can only be called at the beginning of the stage/partition" 142 | 143 | assert_raise ArgumentError, message, fn -> 144 | Flow.from_enumerable([1, 2, 3]) 145 | |> Flow.reduce(fn -> 0 end, &+/2) 146 | |> Flow.map_batch(& &1) 147 | end 148 | 149 | message = ~r"map_batch/2 can only be called at the beginning of the stage/partition" 150 | 151 | assert_raise ArgumentError, message, fn -> 152 | Flow.from_enumerable([1, 2, 3]) 153 | |> Flow.map(& &1) 154 | |> Flow.map_batch(& &1) 155 | end 156 | end 157 | 158 | test "on mapper after emit/1" do 159 | message = ~r"map/2 cannot be called after group_by/reduce/emit_and_reduce operation" 160 | 161 | assert_raise ArgumentError, message, fn -> 162 | Flow.from_enumerable([1, 2, 3]) 163 | |> Flow.reduce(fn -> 0 end, &+/2) 164 | |> Flow.emit(:state) 165 | |> Flow.map(& &1) 166 | |> Enum.to_list() 167 | end 168 | end 169 | 170 | @tag :capture_log 171 | test "on window without computation" do 172 | Process.flag(:trap_exit, true) 173 | 174 | assert catch_exit( 175 | [1, 2, 3] 176 | |> Flow.from_enumerable(window: Flow.Window.fixed(1, :second, & &1)) 177 | |> Enum.to_list() 178 | ) 179 | end 180 | 181 | @tag :capture_log 182 | test "on error in producer started via run" do 183 | Process.flag(:trap_exit, true) 184 | 185 | assert catch_exit( 186 | :start 187 | |> Stream.iterate(fn _ -> raise "oops" end) 188 | |> Flow.from_enumerable(stages: 1, max_demand: 1) 189 | |> Flow.run() 190 | ) 191 | end 192 | 193 | @tag :capture_log 194 | test "on error in producer started via non-linked stream" do 195 | assert catch_exit( 196 | :start 197 | |> Stream.iterate(fn _ -> raise "oops" end) 198 | |> Flow.from_enumerable(stages: 1, max_demand: 1) 199 | |> Flow.stream(link: false) 200 | |> Enum.to_list() 201 | ) 202 | end 203 | 204 | @tag :capture_log 205 | test "on error in producer started via start_link" do 206 | Process.flag(:trap_exit, true) 207 | 208 | {:ok, pid} = 209 | [] 210 | |> Stream.take(0) 211 | |> Flow.from_enumerable(stages: 1, max_demand: 1) 212 | |> Flow.start_link() 213 | 214 | assert_receive {:EXIT, ^pid, :normal} 215 | 216 | {:ok, pid} = 217 | :start 218 | |> Stream.iterate(fn _ -> raise "oops" end) 219 | |> Flow.from_enumerable(stages: 1, max_demand: 1) 220 | |> Flow.start_link() 221 | 222 | assert_receive {:EXIT, ^pid, :shutdown} 223 | end 224 | end 225 | 226 | describe "run/1" do 227 | test "does not leave lingering messages nor monitors" do 228 | Flow.from_enumerable(1..100, stages: 4) 229 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 230 | |> Flow.on_trigger(&{[&1], &1}) 231 | |> Enum.to_list() 232 | 233 | refute_received _ 234 | assert Process.info(self(), :monitors) == {:monitors, []} 235 | end 236 | 237 | test "terminates flow if parent process terminates" do 238 | parent = self() 239 | 240 | {:ok, task_pid} = 241 | Task.start(fn -> 242 | Flow.from_enumerable(Stream.concat([0], Stream.cycle(1..100)), stages: 4) 243 | |> Flow.map(fn 244 | 0 -> send(parent, {:running, self()}) 245 | n -> n 246 | end) 247 | |> Enum.to_list() 248 | end) 249 | 250 | assert_receive {:running, stage_pid} 251 | stage_ref = Process.monitor(stage_pid) 252 | Process.exit(task_pid, :kill) 253 | assert_receive {:DOWN, ^stage_ref, _, _, _} 254 | end 255 | end 256 | 257 | describe "enumerable-stream" do 258 | @flow Flow.from_enumerables([[1, 2, 3], [4, 5, 6]], stages: 2) 259 | 260 | test "only sources" do 261 | assert @flow |> Enum.sort() == [1, 2, 3, 4, 5, 6] 262 | end 263 | 264 | @tag :capture_log 265 | test "raises locally" do 266 | Process.flag(:trap_exit, true) 267 | assert catch_exit(@flow |> Flow.map(fn _ -> raise "oops" end) |> Enum.to_list()) 268 | end 269 | 270 | test "filter/2" do 271 | assert @flow |> Flow.filter(&(rem(&1, 2) == 0)) |> Enum.sort() == [2, 4, 6] 272 | end 273 | 274 | test "flat_map/2" do 275 | assert @flow |> Flow.flat_map(&[&1, &1]) |> Enum.sort() == 276 | [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6] 277 | end 278 | 279 | test "map_batch/2" do 280 | assert @flow |> Flow.map_batch(fn [x, y, z] -> [x + y + z] end) |> Enum.sort() == [6, 15] 281 | 282 | assert @flow 283 | |> Flow.map_batch(fn [x, y, z] -> [x + y + z] end) 284 | |> Flow.map(&(&1 * 2)) 285 | |> Enum.sort() == [12, 30] 286 | end 287 | 288 | test "map/2" do 289 | assert @flow |> Flow.map(&(&1 * 2)) |> Enum.sort() == [2, 4, 6, 8, 10, 12] 290 | end 291 | 292 | test "reject/2" do 293 | assert @flow |> Flow.reject(&(rem(&1, 2) == 0)) |> Enum.sort() == [1, 3, 5] 294 | end 295 | 296 | test "uniq_by/2" do 297 | result = @flow |> Flow.uniq_by(&rem(&1, 2)) |> Enum.sort() 298 | assert length(result) == 2 299 | end 300 | 301 | test "keeps ordering" do 302 | flow = 303 | @flow 304 | |> Flow.filter(&(rem(&1, 2) == 0)) 305 | |> Flow.map(fn x -> x + 1 end) 306 | |> Flow.map(fn x -> x * 2 end) 307 | 308 | assert Enum.sort(flow) == [6, 10, 14] 309 | end 310 | 311 | test "reduce/3" do 312 | assert @flow 313 | |> Flow.reduce(fn -> 0 end, &+/2) 314 | |> Flow.on_trigger(&{[&1], &1}) 315 | |> Enum.sum() == 21 316 | end 317 | 318 | test "emit_and_reduce/3" do 319 | assert @flow 320 | |> Flow.emit_and_reduce(fn -> 0 end, &{[&1], &1 + &2}) 321 | |> Flow.on_trigger(&{[&1], &1}) 322 | |> Enum.sum() == 42 323 | end 324 | 325 | test "flat_map/2 + emit_and_reduce/3" do 326 | assert @flow 327 | |> Flow.flat_map(&[&1, &1]) 328 | |> Flow.emit_and_reduce(fn -> 0 end, &{[&1], &1 + &2}) 329 | |> Flow.on_trigger(&{[&1], &1}) 330 | |> Enum.sum() == 84 331 | end 332 | 333 | test "start_link/2" do 334 | parent = self() 335 | 336 | {:ok, pid} = 337 | @flow 338 | |> Flow.filter(&(rem(&1, 2) == 0)) 339 | |> Flow.map(&send(parent, &1)) 340 | |> Flow.start_link() 341 | 342 | assert_receive 2 343 | assert_receive 4 344 | assert_receive 6 345 | refute_received 1 346 | 347 | ref = Process.monitor(pid) 348 | assert_receive {:DOWN, ^ref, _, _, _} 349 | end 350 | 351 | test "start_link/2 with :name", config do 352 | {:ok, pid} = 353 | @flow 354 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 355 | |> Flow.start_link(name: config.test) 356 | 357 | assert Process.whereis(config.test) == pid 358 | end 359 | 360 | test "start_link/2 with merged flow" do 361 | parent = self() 362 | 363 | Flow.merge([Flow.from_enumerable([1])], GenStage.DemandDispatcher) 364 | |> Flow.map(&send(parent, &1)) 365 | |> Flow.start_link() 366 | 367 | assert_receive 1 368 | end 369 | 370 | test "into_stages/3" do 371 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 372 | 373 | {:ok, pid} = 374 | @flow 375 | |> Flow.filter(&(rem(&1, 2) == 0)) 376 | |> Flow.into_stages([forwarder]) 377 | 378 | assert_receive {:consumed, [2]} 379 | assert_receive {:consumed, [4, 6]} 380 | 381 | ref = Process.monitor(pid) 382 | assert_receive {:DOWN, ^ref, _, _, _} 383 | end 384 | 385 | test "into_stages/3 with :name", config do 386 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 387 | 388 | {:ok, pid} = 389 | @flow 390 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 391 | |> Flow.into_stages([forwarder], name: config.test) 392 | 393 | assert Process.whereis(config.test) == pid 394 | end 395 | 396 | test "through_stages/3" do 397 | {:ok, printer1} = GenStage.start_link(Copier, self()) 398 | {:ok, printer2} = GenStage.start_link(Copier, self()) 399 | 400 | assert @flow 401 | |> Flow.through_stages([printer1]) 402 | |> Flow.filter(&(rem(&1, 2) == 0)) 403 | |> Flow.through_stages([printer2]) 404 | |> Flow.map(& &1) 405 | |> Flow.start_link() 406 | 407 | assert_receive {:producer_consumed, [1, 2, 3]} 408 | assert_receive {:producer_consumed, [4, 5, 6]} 409 | assert_receive {:producer_consumed, [2]} 410 | assert_receive {:producer_consumed, [4, 6]} 411 | end 412 | 413 | test "through_stages/3 + into_stages/3" do 414 | {:ok, printer1} = GenStage.start_link(Copier, self()) 415 | {:ok, printer2} = GenStage.start_link(Copier, self()) 416 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 417 | 418 | assert @flow 419 | |> Flow.through_stages([printer1]) 420 | |> Flow.filter(&(rem(&1, 2) == 0)) 421 | |> Flow.through_stages([printer2]) 422 | |> Flow.into_stages([forwarder]) 423 | 424 | assert_receive {:producer_consumed, [1, 2, 3]} 425 | assert_receive {:producer_consumed, [4, 5, 6]} 426 | assert_receive {:producer_consumed, [2]} 427 | assert_receive {:producer_consumed, [4, 6]} 428 | assert_receive {:consumed, [2]} 429 | assert_receive {:consumed, [4, 6]} 430 | end 431 | 432 | test "into_specs/3" do 433 | {:ok, _} = 434 | @flow 435 | |> Flow.filter(&(rem(&1, 2) == 0)) 436 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 437 | 438 | assert_receive {:consumed, [2]} 439 | assert_receive {:consumed, [4, 6]} 440 | end 441 | 442 | test "into_specs/3 with :name", config do 443 | {:ok, pid} = 444 | @flow 445 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 446 | |> Flow.into_specs([{{Forwarder, self()}, []}], name: config.test) 447 | 448 | assert Process.whereis(config.test) == pid 449 | end 450 | 451 | test "through_specs/3" do 452 | assert @flow 453 | |> Flow.through_specs([{{Copier, self()}, []}]) 454 | |> Flow.filter(&(rem(&1, 2) == 0)) 455 | |> Flow.through_specs([{{Copier, self()}, []}]) 456 | |> Flow.map(& &1) 457 | |> Flow.start_link() 458 | 459 | assert_receive {:producer_consumed, [1, 2, 3]} 460 | assert_receive {:producer_consumed, [4, 5, 6]} 461 | assert_receive {:producer_consumed, [2]} 462 | assert_receive {:producer_consumed, [4, 6]} 463 | end 464 | 465 | test "through_specs/3 + into_specs/3" do 466 | assert @flow 467 | |> Flow.through_specs([{{Copier, self()}, []}]) 468 | |> Flow.filter(&(rem(&1, 2) == 0)) 469 | |> Flow.through_specs([{{Copier, self()}, []}]) 470 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 471 | 472 | assert_receive {:producer_consumed, [1, 2, 3]} 473 | assert_receive {:producer_consumed, [4, 5, 6]} 474 | assert_receive {:producer_consumed, [2]} 475 | assert_receive {:producer_consumed, [4, 6]} 476 | assert_receive {:consumed, [2]} 477 | assert_receive {:consumed, [4, 6]} 478 | end 479 | 480 | test "on halt without intermediary" do 481 | {:links, [coordinator]} = 482 | Stream.cycle([1, 2, 3]) 483 | |> Flow.from_enumerable() 484 | |> Stream.take(10) 485 | |> Enum.reduce(nil, fn _, acc -> 486 | acc || Process.info(self(), :links) 487 | end) 488 | 489 | ref = Process.monitor(coordinator) 490 | assert_receive {:DOWN, ^ref, _, _, _} 491 | end 492 | 493 | test "on halt with intermediary" do 494 | {:links, [coordinator]} = 495 | Stream.cycle([1, 2, 3]) 496 | |> Flow.from_enumerable() 497 | |> Flow.map(& &1) 498 | |> Stream.take(10) 499 | |> Enum.reduce(nil, fn _, acc -> 500 | acc || Process.info(self(), :links) 501 | end) 502 | 503 | ref = Process.monitor(coordinator) 504 | assert_receive {:DOWN, ^ref, _, _, _} 505 | end 506 | end 507 | 508 | describe "enumerable-unpartioned-stream" do 509 | @flow Flow.from_enumerables([[1, 2, 3], [4, 5, 6]], stages: 4) 510 | 511 | test "only sources" do 512 | assert @flow |> Enum.sort() == [1, 2, 3, 4, 5, 6] 513 | end 514 | 515 | @tag :capture_log 516 | test "raises locally" do 517 | Process.flag(:trap_exit, true) 518 | assert catch_exit(@flow |> Flow.map(fn _ -> raise "oops" end) |> Enum.to_list()) 519 | end 520 | 521 | test "filter/2" do 522 | assert @flow |> Flow.filter(&(rem(&1, 2) == 0)) |> Enum.sort() == [2, 4, 6] 523 | end 524 | 525 | test "flat_map/2" do 526 | assert @flow |> Flow.flat_map(&[&1, &1]) |> Enum.sort() == 527 | [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6] 528 | end 529 | 530 | test "map_batch/2" do 531 | assert @flow |> Flow.map_batch(fn [x, y, z] -> [x + y + z] end) |> Enum.sort() == [6, 15] 532 | end 533 | 534 | test "map/2" do 535 | assert @flow |> Flow.map(&(&1 * 2)) |> Enum.sort() == [2, 4, 6, 8, 10, 12] 536 | end 537 | 538 | test "reject/2" do 539 | assert @flow |> Flow.reject(&(rem(&1, 2) == 0)) |> Enum.sort() == [1, 3, 5] 540 | end 541 | 542 | test "reduce/3" do 543 | assert @flow 544 | |> Flow.reduce(fn -> 0 end, &+/2) 545 | |> Flow.on_trigger(&{[&1], &1}) 546 | |> Enum.sum() == 21 547 | end 548 | 549 | test "emit_and_reduce/3" do 550 | assert @flow 551 | |> Flow.emit_and_reduce(fn -> 0 end, &{[&1], &1 + &2}) 552 | |> Flow.on_trigger(&{[&1], &1}) 553 | |> Enum.sum() == 42 554 | end 555 | 556 | test "uniq_by/2" do 557 | result = @flow |> Flow.uniq_by(&rem(&1, 2)) |> Enum.sort() 558 | assert length(result) == 2 559 | end 560 | 561 | test "keeps ordering" do 562 | flow = 563 | @flow 564 | |> Flow.filter(&(rem(&1, 2) == 0)) 565 | |> Flow.map(fn x -> x + 1 end) 566 | |> Flow.map(fn x -> x * 2 end) 567 | 568 | assert Enum.sort(flow) == [6, 10, 14] 569 | end 570 | 571 | test "allows custom windowing" do 572 | window = 573 | Flow.Window.fixed(1, :second, fn 574 | x when x <= 50 -> 0 575 | x when x <= 100 -> 1_000 576 | end) 577 | 578 | windows = 579 | Flow.from_enumerable(1..100, window: window, stages: 4, max_demand: 5) 580 | |> Flow.reduce(fn -> 0 end, fn n, acc -> 581 | # slowing down the flow a bit in order to make it more deterministic 582 | Process.sleep(1) 583 | n + acc 584 | end) 585 | |> Flow.on_trigger(&{[&1], &1}) 586 | |> Enum.to_list() 587 | 588 | assert length(windows) == 8 589 | assert Enum.sum(windows) == 5050 590 | end 591 | 592 | test "start_link/2" do 593 | parent = self() 594 | 595 | {:ok, pid} = 596 | @flow 597 | |> Flow.filter(&(rem(&1, 2) == 0)) 598 | |> Flow.map(&send(parent, &1)) 599 | |> Flow.start_link() 600 | 601 | assert_receive 2 602 | assert_receive 4 603 | assert_receive 6 604 | refute_received 1 605 | 606 | ref = Process.monitor(pid) 607 | assert_receive {:DOWN, ^ref, _, _, _} 608 | end 609 | 610 | test "start_link/2 with :name", config do 611 | {:ok, pid} = 612 | @flow 613 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 614 | |> Flow.start_link(name: config.test) 615 | 616 | assert Process.whereis(config.test) == pid 617 | end 618 | 619 | test "into_stages/3" do 620 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 621 | 622 | {:ok, _} = 623 | @flow 624 | |> Flow.filter(&(rem(&1, 2) == 0)) 625 | |> Flow.into_stages([forwarder]) 626 | 627 | assert_receive {:consumed, [2]} 628 | assert_receive {:consumed, [4, 6]} 629 | end 630 | 631 | test "into_stages/3 with :name", config do 632 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 633 | 634 | {:ok, pid} = 635 | @flow 636 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 637 | |> Flow.into_stages([forwarder], name: config.test) 638 | 639 | assert Process.whereis(config.test) == pid 640 | end 641 | 642 | test "through_stages/3" do 643 | {:ok, printer1} = GenStage.start_link(Copier, self()) 644 | {:ok, printer2} = GenStage.start_link(Copier, self()) 645 | 646 | assert @flow 647 | |> Flow.through_stages([printer1]) 648 | |> Flow.filter(&(rem(&1, 2) == 0)) 649 | |> Flow.through_stages([printer2]) 650 | |> Flow.map(& &1) 651 | |> Flow.start_link() 652 | 653 | assert_receive {:producer_consumed, [1, 2, 3]} 654 | assert_receive {:producer_consumed, [4, 5, 6]} 655 | assert_receive {:producer_consumed, [2]} 656 | assert_receive {:producer_consumed, [4, 6]} 657 | end 658 | 659 | test "through_stages/3 + into_stages/3" do 660 | {:ok, printer1} = GenStage.start_link(Copier, self()) 661 | {:ok, printer2} = GenStage.start_link(Copier, self()) 662 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 663 | 664 | assert @flow 665 | |> Flow.through_stages([printer1]) 666 | |> Flow.filter(&(rem(&1, 2) == 0)) 667 | |> Flow.through_stages([printer2]) 668 | |> Flow.into_stages([forwarder]) 669 | 670 | assert_receive {:producer_consumed, [1, 2, 3]} 671 | assert_receive {:producer_consumed, [4, 5, 6]} 672 | assert_receive {:producer_consumed, [2]} 673 | assert_receive {:producer_consumed, [4, 6]} 674 | assert_receive {:consumed, [2]} 675 | assert_receive {:consumed, [4, 6]} 676 | end 677 | 678 | test "into_specs/3" do 679 | {:ok, _} = 680 | @flow 681 | |> Flow.filter(&(rem(&1, 2) == 0)) 682 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 683 | 684 | assert_receive {:consumed, [2]} 685 | assert_receive {:consumed, [4, 6]} 686 | end 687 | 688 | test "into_specs/3 with :name", config do 689 | {:ok, pid} = 690 | @flow 691 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 692 | |> Flow.into_specs([{{Forwarder, self()}, []}], name: config.test) 693 | 694 | assert Process.whereis(config.test) == pid 695 | end 696 | 697 | test "through_specs/3" do 698 | assert @flow 699 | |> Flow.through_specs([{{Copier, self()}, []}]) 700 | |> Flow.filter(&(rem(&1, 2) == 0)) 701 | |> Flow.through_specs([{{Copier, self()}, []}]) 702 | |> Flow.map(& &1) 703 | |> Flow.start_link() 704 | 705 | assert_receive {:producer_consumed, [1, 2, 3]} 706 | assert_receive {:producer_consumed, [4, 5, 6]} 707 | assert_receive {:producer_consumed, [2]} 708 | assert_receive {:producer_consumed, [4, 6]} 709 | end 710 | 711 | test "through_specs/3 + into_specs/3" do 712 | assert @flow 713 | |> Flow.through_specs([{{Copier, self()}, []}]) 714 | |> Flow.filter(&(rem(&1, 2) == 0)) 715 | |> Flow.through_specs([{{Copier, self()}, []}]) 716 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 717 | 718 | assert_receive {:producer_consumed, [1, 2, 3]} 719 | assert_receive {:producer_consumed, [4, 5, 6]} 720 | assert_receive {:producer_consumed, [2]} 721 | assert_receive {:producer_consumed, [4, 6]} 722 | assert_receive {:consumed, [2]} 723 | assert_receive {:consumed, [4, 6]} 724 | end 725 | end 726 | 727 | describe "enumerable-partitioned-stream" do 728 | @flow Flow.from_enumerables([[1, 2, 3], [4, 5, 6], 7..10], stages: 4) 729 | |> Flow.partition(stages: 4) 730 | 731 | test "only sources" do 732 | assert @flow 733 | |> Enum.sort() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 734 | 735 | assert @flow 736 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 737 | |> Flow.on_trigger(&{[&1], &1}) 738 | |> Enum.map(&Enum.sort/1) 739 | |> Enum.sort() == [[1, 5, 7, 9], [2, 6, 8], [3, 4], [10]] 740 | end 741 | 742 | @tag :capture_log 743 | test "raises locally" do 744 | Process.flag(:trap_exit, true) 745 | assert catch_exit(@flow |> Flow.map(fn _ -> raise "oops" end) |> Enum.to_list()) 746 | end 747 | 748 | test "filter/2" do 749 | assert @flow |> Flow.filter(&(rem(&1, 2) == 0)) |> Enum.sort() == [2, 4, 6, 8, 10] 750 | end 751 | 752 | test "flat_map/2" do 753 | assert @flow |> Flow.flat_map(&[&1, &1]) |> Enum.sort() == 754 | [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10] 755 | end 756 | 757 | test "map/2" do 758 | assert @flow |> Flow.map(&(&1 * 2)) |> Enum.sort() == [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] 759 | end 760 | 761 | test "map_batch/2" do 762 | assert @flow |> Flow.map_batch(fn list -> [Enum.sum(list)] end) |> Enum.sort() == 763 | [1, 2, 3, 4, 5, 6, 8, 10, 16] 764 | end 765 | 766 | test "reject/2" do 767 | assert @flow |> Flow.reject(&(rem(&1, 2) == 0)) |> Enum.sort() == [1, 3, 5, 7, 9] 768 | end 769 | 770 | test "reduce/3" do 771 | assert @flow 772 | |> Flow.reduce(fn -> 0 end, &+/2) 773 | |> Flow.on_trigger(&{[&1], &1}) 774 | |> Enum.sort() == [7, 10, 16, 22] 775 | 776 | assert @flow 777 | |> Flow.reject(&(rem(&1, 2) == 0)) 778 | |> Flow.reduce(fn -> 0 end, &+/2) 779 | |> Flow.on_trigger(&{[&1], &1}) 780 | |> Enum.sort() == [0, 0, 3, 22] 781 | end 782 | 783 | test "emit_and_reduce/3" do 784 | assert @flow 785 | |> Flow.emit_and_reduce(fn -> 0 end, &{[&1], &1 + &2}) 786 | |> Flow.on_trigger(&{[&1], &1}) 787 | |> Enum.sum() == 110 788 | end 789 | 790 | test "uniq_by/2" do 791 | result = @flow |> Flow.uniq_by(&rem(&1, 2)) |> Enum.sort() 792 | assert length(result) == 5 793 | end 794 | 795 | test "keeps ordering" do 796 | flow = 797 | @flow 798 | |> Flow.filter(&(rem(&1, 2) == 0)) 799 | |> Flow.map(fn x -> x + 1 end) 800 | |> Flow.map(fn x -> x * 2 end) 801 | 802 | assert Enum.sort(flow) == [6, 10, 14, 18, 22] 803 | end 804 | 805 | test "start_link/2" do 806 | parent = self() 807 | 808 | {:ok, pid} = 809 | @flow 810 | |> Flow.filter(&(rem(&1, 2) == 0)) 811 | |> Flow.map(&send(parent, &1)) 812 | |> Flow.start_link() 813 | 814 | assert_receive 2 815 | assert_receive 4 816 | assert_receive 6 817 | refute_received 1 818 | 819 | ref = Process.monitor(pid) 820 | assert_receive {:DOWN, ^ref, _, _, _} 821 | end 822 | 823 | test "start_link/2 with :name", config do 824 | {:ok, pid} = 825 | @flow 826 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 827 | |> Flow.start_link(name: config.test) 828 | 829 | assert Process.whereis(config.test) == pid 830 | end 831 | 832 | test "into_stages/3" do 833 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 834 | 835 | {:ok, _} = 836 | @flow 837 | |> Flow.filter(&(rem(&1, 2) == 0)) 838 | |> Flow.into_stages([{forwarder, cancel: :transient}]) 839 | 840 | assert_receive {:consumed, [2]} 841 | assert_receive {:consumed, [4]} 842 | assert_receive {:consumed, [6]} 843 | assert_receive {:consumed, [8]} 844 | assert_receive {:consumed, [10]} 845 | end 846 | 847 | test "into_stages/3 with :name", config do 848 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 849 | 850 | {:ok, pid} = 851 | @flow 852 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 853 | |> Flow.into_stages([forwarder], name: config.test) 854 | 855 | assert Process.whereis(config.test) == pid 856 | end 857 | 858 | test "through_stages/3" do 859 | {:ok, printer1} = GenStage.start_link(Copier, self()) 860 | {:ok, printer2} = GenStage.start_link(Copier, self()) 861 | 862 | assert @flow 863 | |> Flow.through_stages([printer1]) 864 | |> Flow.filter(&(rem(&1, 2) == 0)) 865 | |> Flow.through_stages([printer2]) 866 | |> Flow.map(& &1) 867 | |> Flow.start_link() 868 | 869 | assert_receive {:producer_consumed, [2]} 870 | assert_receive {:producer_consumed, [6]} 871 | assert_receive {:producer_consumed, [8]} 872 | assert_receive {:producer_consumed, [1]} 873 | assert_receive {:producer_consumed, [5]} 874 | assert_receive {:producer_consumed, ~c"\a\t"} 875 | assert_receive {:producer_consumed, [3]} 876 | assert_receive {:producer_consumed, [4]} 877 | assert_receive {:producer_consumed, [10]} 878 | 879 | assert_receive {:producer_consumed, [2]} 880 | assert_receive {:producer_consumed, [6]} 881 | assert_receive {:producer_consumed, [8]} 882 | assert_receive {:producer_consumed, [4]} 883 | assert_receive {:producer_consumed, [10]} 884 | end 885 | 886 | test "through_stages/3 + into_stages/3" do 887 | {:ok, printer1} = GenStage.start_link(Copier, self()) 888 | {:ok, printer2} = GenStage.start_link(Copier, self()) 889 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 890 | 891 | assert @flow 892 | |> Flow.through_stages([printer1]) 893 | |> Flow.filter(&(rem(&1, 2) == 0)) 894 | |> Flow.through_stages([printer2]) 895 | |> Flow.into_stages([forwarder]) 896 | 897 | assert_receive {:producer_consumed, [2]} 898 | assert_receive {:producer_consumed, [6]} 899 | assert_receive {:producer_consumed, [8]} 900 | assert_receive {:producer_consumed, [1]} 901 | assert_receive {:producer_consumed, [5]} 902 | assert_receive {:producer_consumed, ~c"\a\t"} 903 | assert_receive {:producer_consumed, [3]} 904 | assert_receive {:producer_consumed, [4]} 905 | assert_receive {:producer_consumed, [10]} 906 | 907 | assert_receive {:producer_consumed, [2]} 908 | assert_receive {:producer_consumed, [6]} 909 | assert_receive {:producer_consumed, [8]} 910 | assert_receive {:producer_consumed, [4]} 911 | assert_receive {:producer_consumed, [10]} 912 | 913 | assert_receive {:consumed, [2]} 914 | assert_receive {:consumed, [6]} 915 | assert_receive {:consumed, [8]} 916 | assert_receive {:consumed, [4]} 917 | assert_receive {:consumed, [10]} 918 | end 919 | 920 | test "into_specs/3" do 921 | {:ok, _} = 922 | @flow 923 | |> Flow.filter(&(rem(&1, 2) == 0)) 924 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 925 | 926 | assert_receive {:consumed, [2]} 927 | assert_receive {:consumed, [4]} 928 | assert_receive {:consumed, [6]} 929 | assert_receive {:consumed, [8]} 930 | assert_receive {:consumed, [10]} 931 | end 932 | 933 | test "into_specs/3 with :name", config do 934 | {:ok, pid} = 935 | @flow 936 | |> Flow.map(fn _ -> Process.sleep(:infinity) end) 937 | |> Flow.into_specs([{{Forwarder, self()}, []}], name: config.test) 938 | 939 | assert Process.whereis(config.test) == pid 940 | end 941 | 942 | test "through_specs/3" do 943 | assert @flow 944 | |> Flow.through_specs([{{Copier, self()}, []}]) 945 | |> Flow.filter(&(rem(&1, 2) == 0)) 946 | |> Flow.through_specs([{{Copier, self()}, []}]) 947 | |> Flow.map(& &1) 948 | |> Flow.start_link() 949 | 950 | assert_receive {:producer_consumed, [2]} 951 | assert_receive {:producer_consumed, [6]} 952 | assert_receive {:producer_consumed, [8]} 953 | assert_receive {:producer_consumed, [1]} 954 | assert_receive {:producer_consumed, [5]} 955 | assert_receive {:producer_consumed, ~c"\a\t"} 956 | assert_receive {:producer_consumed, [3]} 957 | assert_receive {:producer_consumed, [4]} 958 | assert_receive {:producer_consumed, [10]} 959 | 960 | assert_receive {:producer_consumed, [2]} 961 | assert_receive {:producer_consumed, [6]} 962 | assert_receive {:producer_consumed, [8]} 963 | assert_receive {:producer_consumed, [4]} 964 | assert_receive {:producer_consumed, [10]} 965 | end 966 | 967 | test "through_specs/3 + into_specs/3" do 968 | assert @flow 969 | |> Flow.through_specs([{{Copier, self()}, []}]) 970 | |> Flow.filter(&(rem(&1, 2) == 0)) 971 | |> Flow.through_specs([{{Copier, self()}, []}]) 972 | |> Flow.into_specs([{{Forwarder, self()}, []}]) 973 | 974 | assert_receive {:producer_consumed, [2]} 975 | assert_receive {:producer_consumed, [6]} 976 | assert_receive {:producer_consumed, [8]} 977 | assert_receive {:producer_consumed, [1]} 978 | assert_receive {:producer_consumed, [5]} 979 | assert_receive {:producer_consumed, ~c"\a\t"} 980 | assert_receive {:producer_consumed, [3]} 981 | assert_receive {:producer_consumed, [4]} 982 | assert_receive {:producer_consumed, [10]} 983 | 984 | assert_receive {:producer_consumed, [2]} 985 | assert_receive {:producer_consumed, [6]} 986 | assert_receive {:producer_consumed, [8]} 987 | assert_receive {:producer_consumed, [4]} 988 | assert_receive {:producer_consumed, [10]} 989 | 990 | assert_receive {:consumed, [2]} 991 | assert_receive {:consumed, [6]} 992 | assert_receive {:consumed, [8]} 993 | assert_receive {:consumed, [4]} 994 | assert_receive {:consumed, [10]} 995 | end 996 | end 997 | 998 | describe "stages-unpartioned-stream" do 999 | @tag report: [:counter] 1000 | 1001 | setup do 1002 | {:ok, pid} = GenStage.start_link(Counter, 0) 1003 | {:ok, counter: pid} 1004 | end 1005 | 1006 | test "only sources", %{counter: pid} do 1007 | assert Flow.from_stages([pid], stages: 1) 1008 | |> Enum.take(5) 1009 | |> Enum.sort() == [0, 1, 2, 3, 4] 1010 | end 1011 | 1012 | test "filter/2", %{counter: pid} do 1013 | assert Flow.from_stages([pid], stages: 1) 1014 | |> Flow.filter(&(rem(&1, 2) == 0)) 1015 | |> Enum.take(5) 1016 | |> Enum.sort() == [0, 2, 4, 6, 8] 1017 | end 1018 | 1019 | test "flat_map/2", %{counter: pid} do 1020 | assert Flow.from_stages([pid], stages: 1) 1021 | |> Flow.flat_map(&[&1, &1]) 1022 | |> Enum.take(5) 1023 | |> Enum.sort() == [0, 0, 1, 1, 2] 1024 | end 1025 | 1026 | test "map/2", %{counter: pid} do 1027 | assert Flow.from_stages([pid], stages: 1) 1028 | |> Flow.map(&(&1 * 2)) 1029 | |> Enum.take(5) 1030 | |> Enum.sort() == [0, 2, 4, 6, 8] 1031 | end 1032 | 1033 | test "reject/2", %{counter: pid} do 1034 | assert Flow.from_stages([pid], stages: 1) 1035 | |> Flow.reject(&(rem(&1, 2) == 0)) 1036 | |> Enum.take(5) 1037 | |> Enum.sort() == [1, 3, 5, 7, 9] 1038 | end 1039 | 1040 | test "keeps ordering", %{counter: pid} do 1041 | assert Flow.from_stages([pid], stages: 1) 1042 | |> Flow.filter(&(rem(&1, 2) == 0)) 1043 | |> Flow.map(fn x -> x + 1 end) 1044 | |> Flow.map(fn x -> x * 2 end) 1045 | |> Enum.take(5) 1046 | |> Enum.sort() == [2, 6, 10, 14, 18] 1047 | end 1048 | end 1049 | 1050 | describe "specs-unpartioned-stream" do 1051 | @specs [{Counter, 0}] 1052 | 1053 | test "only sources" do 1054 | assert Flow.from_specs(@specs, stages: 1) 1055 | |> Enum.take(5) 1056 | |> Enum.sort() == [0, 1, 2, 3, 4] 1057 | end 1058 | 1059 | test "filter/2" do 1060 | assert Flow.from_specs(@specs, stages: 1) 1061 | |> Flow.filter(&(rem(&1, 2) == 0)) 1062 | |> Enum.take(5) 1063 | |> Enum.sort() == [0, 2, 4, 6, 8] 1064 | end 1065 | 1066 | test "flat_map/2" do 1067 | assert Flow.from_specs(@specs, stages: 1) 1068 | |> Flow.flat_map(&[&1, &1]) 1069 | |> Enum.take(5) 1070 | |> Enum.sort() == [0, 0, 1, 1, 2] 1071 | end 1072 | 1073 | test "map/2" do 1074 | assert Flow.from_specs(@specs, stages: 1) 1075 | |> Flow.map(&(&1 * 2)) 1076 | |> Enum.take(5) 1077 | |> Enum.sort() == [0, 2, 4, 6, 8] 1078 | end 1079 | 1080 | test "map_batch/2" do 1081 | assert Flow.from_specs(@specs, stages: 1) 1082 | |> Flow.map_batch(&[Enum.sum(&1)]) 1083 | |> Enum.take(5) 1084 | |> Enum.sort() == [124_750, 374_750, 624_750, 874_750, 1_124_750] 1085 | end 1086 | 1087 | test "reject/2" do 1088 | assert Flow.from_specs(@specs, stages: 1) 1089 | |> Flow.reject(&(rem(&1, 2) == 0)) 1090 | |> Enum.take(5) 1091 | |> Enum.sort() == [1, 3, 5, 7, 9] 1092 | end 1093 | 1094 | test "keeps ordering" do 1095 | assert Flow.from_specs(@specs, stages: 1) 1096 | |> Flow.filter(&(rem(&1, 2) == 0)) 1097 | |> Flow.map(fn x -> x + 1 end) 1098 | |> Flow.map(fn x -> x * 2 end) 1099 | |> Enum.take(5) 1100 | |> Enum.sort() == [2, 6, 10, 14, 18] 1101 | end 1102 | end 1103 | 1104 | describe "specs-ignored" do 1105 | @specs [{NonStarter, []}] 1106 | 1107 | test "ignores ignored stage" do 1108 | assert Flow.from_specs(@specs) 1109 | |> Enum.to_list() == [] 1110 | end 1111 | end 1112 | 1113 | describe "partition/2" do 1114 | test "allows custom partitioning" do 1115 | assert Flow.from_enumerables([[1, 2, 3], [4, 5, 6], 7..10]) 1116 | |> Flow.partition(hash: fn x -> {x, 0} end, stages: 4) 1117 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1118 | |> Flow.on_trigger(&{[Enum.sort(&1)], &1}) 1119 | |> Enum.sort() == [[], [], [], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] 1120 | end 1121 | 1122 | test "allows element based partitioning" do 1123 | assert Flow.from_enumerables([[{1, 1}, {2, 2}, {3, 3}], [{1, 4}, {2, 5}, {3, 6}]]) 1124 | |> Flow.partition(key: {:elem, 0}, stages: 2) 1125 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1126 | |> Flow.on_trigger(fn acc -> 1127 | {[acc |> Enum.map(&elem(&1, 1)) |> Enum.sort()], acc} 1128 | end) 1129 | |> Enum.sort() == [[1, 2, 4, 5], [3, 6]] 1130 | end 1131 | 1132 | test "allows key based partitioning" do 1133 | enumerables = [ 1134 | [%{key: 1, value: 1}, %{key: 2, value: 2}, %{key: 3, value: 3}], 1135 | [%{key: 1, value: 4}, %{key: 2, value: 5}, %{key: 3, value: 6}] 1136 | ] 1137 | 1138 | assert Flow.from_enumerables(enumerables) 1139 | |> Flow.partition(key: {:key, :key}, stages: 2) 1140 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1141 | |> Flow.on_trigger(fn acc -> {[acc |> Enum.map(& &1.value) |> Enum.sort()], acc} end) 1142 | |> Enum.sort() == [[1, 2, 4, 5], [3, 6]] 1143 | end 1144 | 1145 | test "allows function based partitioning" do 1146 | enumerables = [ 1147 | [%{key: 1, value: 1}, %{key: 2, value: 2}, %{key: 3, value: 3}], 1148 | [%{key: 1, value: 4}, %{key: 2, value: 5}, %{key: 3, value: 6}] 1149 | ] 1150 | 1151 | assert Flow.from_enumerables(enumerables) 1152 | |> Flow.partition(key: & &1.key, stages: 2) 1153 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1154 | |> Flow.on_trigger(fn acc -> {[acc |> Enum.map(& &1.value) |> Enum.sort()], acc} end) 1155 | |> Enum.sort() == [[1, 2, 4, 5], [3, 6]] 1156 | end 1157 | 1158 | test "allows function based partitioning after shuffling" do 1159 | enumerables = [ 1160 | [%{key: 1, value: 1}, %{key: 2, value: 2}, %{key: 3, value: 3}], 1161 | [%{key: 1, value: 4}, %{key: 2, value: 5}, %{key: 3, value: 6}] 1162 | ] 1163 | 1164 | assert Flow.from_enumerables(enumerables) 1165 | |> Flow.shuffle(stages: 2) 1166 | |> Flow.partition(key: & &1.key, stages: 2) 1167 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1168 | |> Flow.on_trigger(fn acc -> {[acc |> Enum.map(& &1.value) |> Enum.sort()], acc} end) 1169 | |> Enum.sort() == [[1, 2, 4, 5], [3, 6]] 1170 | end 1171 | 1172 | test "allows custom windowing" do 1173 | window = 1174 | Flow.Window.fixed(1, :second, fn 1175 | x when x <= 50 -> 0 1176 | x when x <= 100 -> 1_000 1177 | end) 1178 | 1179 | assert Flow.from_enumerable(1..100) 1180 | |> Flow.partition(window: window, stages: 4) 1181 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1182 | |> Flow.on_trigger(&{[Enum.sum(&1)], &1}) 1183 | |> Enum.sort() == [173, 361, 364, 377, 797, 865, 895, 1218] 1184 | end 1185 | end 1186 | 1187 | describe "take_sort/3" do 1188 | test "is equivalent to Enum.sort/3 ascending on the whole collection" do 1189 | list1 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1190 | list2 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1191 | list3 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1192 | list4 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1193 | 1194 | assert Flow.from_enumerables([list1, list2, list3, list4]) 1195 | |> Flow.partition() 1196 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1197 | |> Flow.take_sort(100) 1198 | |> Enum.at(0) == (list1 ++ list2 ++ list3 ++ list4) |> Enum.sort() |> Enum.take(100) 1199 | end 1200 | 1201 | test "is equivalent to Enum.sort/3 descending on the whole collection" do 1202 | list1 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1203 | list2 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1204 | list3 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1205 | list4 = Enum.map(1..1000, fn _ -> :rand.uniform(10000) end) 1206 | 1207 | assert Flow.from_enumerables([list1, list2, list3, list4]) 1208 | |> Flow.partition() 1209 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1210 | |> Flow.take_sort(100, &>=/2) 1211 | |> Enum.at(0) == 1212 | (list1 ++ list2 ++ list3 ++ list4) |> Enum.sort(&>=/2) |> Enum.take(100) 1213 | end 1214 | end 1215 | 1216 | describe "departition/2" do 1217 | test "joins partitioned data" do 1218 | assert Flow.from_enumerable(1..10) 1219 | |> Flow.partition(stages: 4) 1220 | |> Flow.reduce(fn -> 0 end, &+/2) 1221 | |> Flow.departition(fn -> [] end, &[&1 | &2], &Enum.sort/1) 1222 | |> Enum.at(0) == [7, 10, 16, 22] 1223 | end 1224 | 1225 | test "joins partitioned data with window info" do 1226 | assert Flow.from_enumerable(1..10) 1227 | |> Flow.partition(stages: 4) 1228 | |> Flow.reduce(fn -> 0 end, &+/2) 1229 | |> Flow.departition(fn -> [] end, &[&1 | &2], &{&2, Enum.sort(&1)}) 1230 | |> Enum.at(0) == {:global, [7, 10, 16, 22]} 1231 | end 1232 | 1233 | test "joins uneven partitioned data" do 1234 | hash_fun = fn 1235 | 0 -> {0, 0} 1236 | x -> {x, 1} 1237 | end 1238 | 1239 | assert Flow.from_enumerable(1..10) 1240 | |> Flow.partition(stages: 2, window: Flow.Window.count(3), hash: hash_fun) 1241 | |> Flow.reduce(fn -> 0 end, &+/2) 1242 | |> Flow.departition(fn -> [] end, &[&1 | &2], &Enum.sort/1) 1243 | |> Enum.to_list() == [[0, 6], [15], [24], [10]] 1244 | end 1245 | 1246 | test "joins partitioned data with triggers" do 1247 | partition_opts = [ 1248 | stages: 4, 1249 | window: Flow.Window.global() |> Flow.Window.trigger_every(2) 1250 | ] 1251 | 1252 | assert Flow.from_enumerable(1..10) 1253 | |> Flow.partition(partition_opts) 1254 | |> Flow.reduce(fn -> 0 end, &+/2) 1255 | |> Flow.departition(fn -> [] end, &[&1 | &2], &Enum.sort/1) 1256 | |> Enum.at(0) == [6, 7, 7, 8, 10, 16, 22, 22] 1257 | end 1258 | 1259 | test "joins partitioned data with map operations" do 1260 | assert Flow.from_enumerable(1..10) 1261 | |> Flow.partition(stages: 4) 1262 | |> Flow.reduce(fn -> 0 end, &+/2) 1263 | |> Flow.departition(fn -> [] end, &[&1 | &2], & &1) 1264 | |> Flow.map(&Enum.sort/1) 1265 | |> Enum.at(0) == [7, 10, 16, 22] 1266 | end 1267 | 1268 | test "joins partitioned data with reduce operations" do 1269 | partition_opts = [ 1270 | stages: 4, 1271 | window: Flow.Window.global() |> Flow.Window.trigger_every(2) 1272 | ] 1273 | 1274 | assert Flow.from_enumerable(1..10) 1275 | |> Flow.partition(partition_opts) 1276 | |> Flow.reduce(fn -> 0 end, &+/2) 1277 | |> Flow.on_trigger(&{[&1], 0}) 1278 | |> Flow.departition(fn -> [] end, &[&1 | &2], &Enum.sort/1) 1279 | |> Flow.reduce(fn -> 0 end, &(Enum.sum(&1) + &2)) 1280 | |> Flow.on_trigger(&{[&1], 0}) 1281 | |> Enum.at(0) == 55 1282 | end 1283 | 1284 | test "with start_link/1" do 1285 | parent = self() 1286 | 1287 | {:ok, pid} = 1288 | Flow.from_enumerable(1..10) 1289 | |> Flow.partition(stages: 4) 1290 | |> Flow.reduce(fn -> 0 end, &+/2) 1291 | |> Flow.departition(fn -> [] end, &[&1 | &2], &send(parent, Enum.sort(&1))) 1292 | |> Flow.start_link() 1293 | 1294 | assert_receive [7, 10, 16, 22] 1295 | ref = Process.monitor(pid) 1296 | assert_receive {:DOWN, ^ref, _, _, _} 1297 | end 1298 | end 1299 | 1300 | describe "merge/3 through partition" do 1301 | defp merge_and_partition(options) do 1302 | flow1 = 1303 | Stream.take_every(1..100, 2) 1304 | |> Flow.from_enumerable() 1305 | |> Flow.map(&(&1 * 2)) 1306 | 1307 | flow2 = 1308 | Stream.take_every(2..100, 2) 1309 | |> Flow.from_enumerable() 1310 | |> Flow.map(&(&1 * 2)) 1311 | 1312 | Flow.partition([flow1, flow2], options) 1313 | end 1314 | 1315 | test "merges different flows together" do 1316 | assert merge_and_partition(stages: 4, min_demand: 5) 1317 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 1318 | |> Flow.on_trigger(&{[&1], &1}) 1319 | |> Enum.sum() == 10100 1320 | end 1321 | 1322 | test "allows custom partitioning" do 1323 | assert merge_and_partition(stages: 4, min_demand: 5, hash: fn x -> {x, 0} end) 1324 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1325 | |> Flow.on_trigger(&{[Enum.sum(&1)], &1}) 1326 | |> Enum.sort() == [0, 0, 0, 10100] 1327 | end 1328 | 1329 | test "allows custom windowing" do 1330 | window = 1331 | Flow.Window.fixed(1, :second, fn 1332 | x when x <= 100 -> 0 1333 | x when x <= 200 -> 1_000 1334 | end) 1335 | 1336 | assert merge_and_partition(window: window, stages: 4, min_demand: 5) 1337 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1338 | |> Flow.on_trigger(&{[Enum.sum(&1)], &1}) 1339 | |> Enum.sort() == [594, 596, 654, 706, 1248, 1964, 2066, 2272] 1340 | end 1341 | end 1342 | 1343 | describe "merge/3 through shuffle" do 1344 | defp merge_and_shuffle(options) do 1345 | flow1 = 1346 | Stream.take_every(1..100, 2) 1347 | |> Flow.from_enumerable() 1348 | |> Flow.map(&(&1 * 2)) 1349 | 1350 | flow2 = 1351 | Stream.take_every(2..100, 2) 1352 | |> Flow.from_enumerable() 1353 | |> Flow.map(&(&1 * 2)) 1354 | 1355 | Flow.shuffle([flow1, flow2], options) 1356 | end 1357 | 1358 | test "merges different flows together" do 1359 | assert merge_and_shuffle(stages: 1) 1360 | |> Flow.reduce(fn -> 0 end, &(&1 + &2)) 1361 | |> Flow.on_trigger(&{[&1], &1}) 1362 | |> Enum.sum() == 10100 1363 | end 1364 | 1365 | test "allows custom windowing" do 1366 | window = 1367 | Flow.Window.fixed(1, :second, fn 1368 | x when x <= 100 -> 0 1369 | x when x <= 200 -> 1_000 1370 | end) 1371 | 1372 | assert merge_and_shuffle(window: window, stages: 1) 1373 | |> Flow.reduce(fn -> [] end, &[&1 | &2]) 1374 | |> Flow.on_trigger(&{[Enum.sum(&1)], &1}) 1375 | |> Enum.sort() == [2550, 7550] 1376 | end 1377 | end 1378 | 1379 | describe "bounded_join/7" do 1380 | test "inner joins two matching flows" do 1381 | flow = 1382 | Flow.bounded_join( 1383 | :inner, 1384 | Flow.from_enumerable([0, 1, 2, 3]), 1385 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1386 | & &1, 1387 | &(&1 - 3), 1388 | &{&1, &2} 1389 | ) 1390 | 1391 | assert Enum.sort(flow) == [{1, 4}, {2, 5}, {3, 6}] 1392 | end 1393 | 1394 | test "inner joins two unmatching flows" do 1395 | flow = 1396 | Flow.bounded_join( 1397 | :inner, 1398 | Flow.from_enumerable([0, 1, 2, 3]), 1399 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1400 | & &1, 1401 | & &1, 1402 | &{&1, &2} 1403 | ) 1404 | 1405 | assert Enum.sort(flow) == [] 1406 | end 1407 | 1408 | test "left joins two matching flows" do 1409 | flow = 1410 | Flow.bounded_join( 1411 | :left_outer, 1412 | Flow.from_enumerable([0, 1, 2, 3]), 1413 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1414 | & &1, 1415 | &(&1 - 3), 1416 | &{&1, &2} 1417 | ) 1418 | 1419 | assert Enum.sort(flow) == [{0, nil}, {1, 4}, {2, 5}, {3, 6}] 1420 | end 1421 | 1422 | test "left joins two unmatching flows" do 1423 | flow = 1424 | Flow.bounded_join( 1425 | :left_outer, 1426 | Flow.from_enumerable([0, 1, 2, 3]), 1427 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1428 | & &1, 1429 | & &1, 1430 | &{&1, &2} 1431 | ) 1432 | 1433 | assert Enum.sort(flow) == [{0, nil}, {1, nil}, {2, nil}, {3, nil}] 1434 | end 1435 | 1436 | test "right joins two matching flows" do 1437 | flow = 1438 | Flow.bounded_join( 1439 | :right_outer, 1440 | Flow.from_enumerable([0, 1, 2, 3]), 1441 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1442 | & &1, 1443 | &(&1 - 3), 1444 | &{&1, &2} 1445 | ) 1446 | 1447 | assert Enum.sort(flow) == [{1, 4}, {2, 5}, {3, 6}, {nil, 7}, {nil, 8}] 1448 | end 1449 | 1450 | test "right joins two unmatching flows" do 1451 | flow = 1452 | Flow.bounded_join( 1453 | :right_outer, 1454 | Flow.from_enumerable([0, 1, 2, 3]), 1455 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1456 | & &1, 1457 | & &1, 1458 | &{&1, &2} 1459 | ) 1460 | 1461 | assert Enum.sort(flow) == [{nil, 4}, {nil, 5}, {nil, 6}, {nil, 7}, {nil, 8}] 1462 | end 1463 | 1464 | test "outer joins two matching flows" do 1465 | flow = 1466 | Flow.bounded_join( 1467 | :full_outer, 1468 | Flow.from_enumerable([0, 1, 2, 3]), 1469 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1470 | & &1, 1471 | &(&1 - 3), 1472 | &{&1, &2} 1473 | ) 1474 | 1475 | assert Enum.sort(flow) == [{0, nil}, {1, 4}, {2, 5}, {3, 6}, {nil, 7}, {nil, 8}] 1476 | end 1477 | 1478 | test "outer joins two unmatching flows" do 1479 | flow = 1480 | Flow.bounded_join( 1481 | :full_outer, 1482 | Flow.from_enumerable([0, 1, 2, 3]), 1483 | Flow.from_enumerable([4, 5, 6, 7, 8]), 1484 | & &1, 1485 | & &1, 1486 | &{&1, &2} 1487 | ) 1488 | 1489 | assert Enum.sort(flow) == [ 1490 | {0, nil}, 1491 | {1, nil}, 1492 | {2, nil}, 1493 | {3, nil}, 1494 | {nil, 4}, 1495 | {nil, 5}, 1496 | {nil, 6}, 1497 | {nil, 7}, 1498 | {nil, 8} 1499 | ] 1500 | end 1501 | 1502 | test "joins two flows followed by mapper operation" do 1503 | assert Flow.bounded_join( 1504 | :inner, 1505 | Flow.from_enumerable([0, 1, 2, 3]), 1506 | Flow.from_enumerable([4, 5, 6]), 1507 | & &1, 1508 | &(&1 - 3), 1509 | &{&1, &2} 1510 | ) 1511 | |> Flow.map(fn {k, v} -> k + v end) 1512 | |> Enum.sort() == [5, 7, 9] 1513 | end 1514 | 1515 | test "joins two flows followed by reduce" do 1516 | assert Flow.bounded_join( 1517 | :inner, 1518 | Flow.from_enumerable([0, 1, 2, 3]), 1519 | Flow.from_enumerable([4, 5, 6]), 1520 | & &1, 1521 | &(&1 - 3), 1522 | &{&1, &2}, 1523 | stages: 2 1524 | ) 1525 | |> Flow.reduce(fn -> 0 end, fn {k, v}, acc -> k + v + acc end) 1526 | |> Flow.on_trigger(&{[&1], &1}) 1527 | |> Enum.sort() == [9, 12] 1528 | end 1529 | 1530 | test "joins mapper and reducer flows" do 1531 | assert Flow.bounded_join( 1532 | :inner, 1533 | Flow.from_enumerable(0..9) |> Flow.partition(), 1534 | Flow.from_enumerable(0..9) |> Flow.map(&(&1 + 10)), 1535 | & &1, 1536 | &(&1 - 10), 1537 | &{&1, &2}, 1538 | stages: 2 1539 | ) 1540 | |> Flow.reduce(fn -> 0 end, fn {k, v}, acc -> k + v + acc end) 1541 | |> Flow.on_trigger(&{[&1], &1}) 1542 | |> Enum.sort() == [44, 146] 1543 | end 1544 | 1545 | test "outer joins two flows with windows" do 1546 | window = Flow.Window.fixed(10, :millisecond, & &1) |> Flow.Window.trigger_every(2) 1547 | # Notice how 9 and 12 do not form a pair for being in different windows. 1548 | flow = 1549 | Flow.window_join( 1550 | :full_outer, 1551 | Flow.from_enumerable([0, 1, 2, 3, 9, 10, 11]), 1552 | Flow.from_enumerable([4, 5, 6, 7, 8, 12, 13]), 1553 | window, 1554 | & &1, 1555 | &(&1 - 3), 1556 | &{&1, &2} 1557 | ) 1558 | 1559 | assert Enum.sort(flow) == [ 1560 | {0, nil}, 1561 | {1, 4}, 1562 | {2, 5}, 1563 | {3, 6}, 1564 | {9, nil}, 1565 | {10, 13}, 1566 | {11, nil}, 1567 | {nil, 7}, 1568 | {nil, 8}, 1569 | {nil, 12} 1570 | ] 1571 | end 1572 | end 1573 | 1574 | describe "coordinator" do 1575 | test "subscribes to coordinator after into_stages start" do 1576 | {:ok, counter_pid} = GenStage.start_link(Counter, 0) 1577 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 1578 | 1579 | {:ok, pid} = 1580 | Flow.from_stages([counter_pid], stages: 1) 1581 | |> Flow.map(&(&1 * 2)) 1582 | |> Flow.into_stages([]) 1583 | 1584 | GenStage.sync_subscribe(forwarder, to: pid, cancel: :transient, max_demand: 1) 1585 | 1586 | assert_receive {:consumed, [2]} 1587 | assert_receive {:consumed, [4]} 1588 | assert_receive {:consumed, [6]} 1589 | end 1590 | 1591 | test "subscribes to coordinator after into_specs start" do 1592 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 1593 | 1594 | {:ok, pid} = 1595 | Flow.from_specs([{Counter, 0}], stages: 1) 1596 | |> Flow.map(&(&1 * 2)) 1597 | |> Flow.into_specs([{{Sleeper, self()}, [max_demand: 1]}]) 1598 | 1599 | GenStage.sync_subscribe(forwarder, to: pid, cancel: :transient, max_demand: 1) 1600 | 1601 | assert_receive {:consumed, _, [0]} 1602 | assert_receive {:consumed, [2]} 1603 | assert_receive {:consumed, [4]} 1604 | assert_receive {:consumed, [6]} 1605 | end 1606 | 1607 | @tag :capture_log 1608 | test "terminates according to tree in into_specs/3" do 1609 | Process.flag(:trap_exit, true) 1610 | 1611 | {:ok, pid} = 1612 | Flow.from_specs([{Counter, 0}], stages: 1) 1613 | |> Flow.map(&(&1 * 2)) 1614 | |> Flow.into_specs([{{Forwarder, self()}, max_demand: 1}]) 1615 | 1616 | # assert_receive {:consumed, [0]} 1617 | 1618 | [_consumer, _producer_consumer, {_, producer, _, _}] = 1619 | Supervisor.which_children(:sys.get_state(pid).supervisor) 1620 | 1621 | Process.link(producer) 1622 | Process.exit(producer, :oops) 1623 | assert_receive {:EXIT, ^producer, :oops} 1624 | assert_receive {:EXIT, ^pid, :shutdown} 1625 | end 1626 | 1627 | test "terminates according to consumer strategy in into_specs/3" do 1628 | Process.flag(:trap_exit, true) 1629 | 1630 | # restart: :permanent with shutdown exit 1631 | spec = {Sleeper, self()} 1632 | 1633 | {:ok, pid} = 1634 | Flow.from_specs([{Counter, 0}], stages: 1) 1635 | |> Flow.map(&(&1 * 2)) 1636 | |> Flow.into_specs([{spec, max_demand: 4}]) 1637 | 1638 | sleeper = Process.whereis(Sleeper) 1639 | assert_receive {:consumed, ^sleeper, [0, 2]} 1640 | Process.exit(sleeper, :oops) 1641 | assert_receive {:EXIT, ^pid, :shutdown} 1642 | 1643 | # restart: :permanent with normal exit 1644 | spec = {Sleeper, self()} 1645 | 1646 | {:ok, pid} = 1647 | Flow.from_specs([{Counter, 0}], stages: 1) 1648 | |> Flow.map(&(&1 * 2)) 1649 | |> Flow.into_specs([{spec, max_demand: 4}]) 1650 | 1651 | sleeper = Process.whereis(Sleeper) 1652 | assert_receive {:consumed, ^sleeper, [0, 2]} 1653 | Process.exit(sleeper, :shutdown) 1654 | assert_receive {:EXIT, ^pid, :normal} 1655 | 1656 | # restart: :transient with no exit 1657 | spec = Supervisor.child_spec(spec, restart: :transient) 1658 | 1659 | {:ok, _} = 1660 | Flow.from_specs([{Counter, 0}], stages: 1) 1661 | |> Flow.map(&(&1 * 2)) 1662 | |> Flow.into_specs([{spec, max_demand: 4}]) 1663 | 1664 | sleeper = Process.whereis(Sleeper) 1665 | assert_receive {:consumed, ^sleeper, [0, 2]} 1666 | Process.exit(sleeper, :shutdown) 1667 | refute_receive {:EXIT, ^pid, _} 1668 | end 1669 | 1670 | @tag :capture_log 1671 | test "fails to subscribe to coordinator with subscription timeout" do 1672 | Process.flag(:trap_exit, true) 1673 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 1674 | 1675 | assert {:error, {:timeout, _}} = 1676 | :start 1677 | |> Stream.iterate(fn _ -> raise "oops" end) 1678 | |> Flow.from_enumerable(stages: 1, max_demand: 1) 1679 | |> Flow.into_stages([forwarder], subscribe_timeout: 0) 1680 | end 1681 | 1682 | test "can be consumed as a stream" do 1683 | {:ok, counter_pid} = GenStage.start_link(Counter, 0) 1684 | 1685 | {:ok, pid} = 1686 | Flow.from_stages([counter_pid], stages: 1, max_demand: 1) 1687 | |> Flow.filter(&(rem(&1, 2) == 1)) 1688 | |> Flow.into_stages([]) 1689 | 1690 | assert GenStage.stream([{pid, cancel: :transient}]) |> Enum.take(5) == [1, 3, 5, 7, 9] 1691 | end 1692 | 1693 | test "sets demand to proxied" do 1694 | {:ok, counter_pid} = GenStage.start_link(Counter, 0) 1695 | {:ok, forwarder} = GenStage.start_link(Forwarder, self()) 1696 | 1697 | {:ok, pid} = 1698 | Flow.from_stages([counter_pid], stages: 1, max_demand: 1) 1699 | |> Flow.filter(&(rem(&1, 2) == 1)) 1700 | |> Flow.into_stages([forwarder], demand: :accumulate) 1701 | 1702 | refute_received {:consumed, [1]} 1703 | assert GenStage.demand(pid, :forward) 1704 | assert_receive {:consumed, [1]} 1705 | end 1706 | end 1707 | 1708 | describe "mapper_reducer" do 1709 | test "triggers on_init callback once for each stage" do 1710 | parent = self() 1711 | 1712 | [1, 2, 3, 4] 1713 | |> Flow.from_enumerable(stages: 2, on_init: &send(parent, &1)) 1714 | |> Flow.map(& &1) 1715 | |> Flow.run() 1716 | 1717 | assert_receive {0, 2} 1718 | assert_receive {1, 2} 1719 | end 1720 | end 1721 | end 1722 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | assert_timeout = String.to_integer(System.get_env("ELIXIR_ASSERT_TIMEOUT") || "1000") 2 | 3 | ExUnit.start(assert_receive_timeout: assert_timeout) 4 | --------------------------------------------------------------------------------