├── .formatter.exs ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .tool-versions ├── CHANGELOG.md ├── LICENSE ├── README.md ├── config └── config.exs ├── lib ├── parent.ex ├── parent │ ├── application.ex │ ├── client.ex │ ├── gen_server.ex │ ├── meta_registry.ex │ ├── registry.ex │ ├── restart.ex │ ├── restart_counter.ex │ ├── state.ex │ └── supervisor.ex ├── periodic.ex └── periodic │ ├── logger.ex │ └── test.ex ├── mix.exs ├── mix.lock ├── scripts └── bench.exs └── test ├── parent ├── client_test.exs ├── gen_server_test.exs └── supervisor_test.exs ├── parent_test.exs ├── periodic └── logger_test.exs ├── periodic_test.exs ├── support ├── capture_log.ex ├── periodic │ └── test_helper.ex └── test_server.ex └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"], 4 | locals_without_parens: [check: :*, all: :*, gen: :*] 5 | ] 6 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | push: 5 | 6 | jobs: 7 | main: 8 | # Old ubuntu needed to support erlang 22 9 | runs-on: ubuntu-20.04 10 | strategy: 11 | matrix: 12 | include: 13 | - elixir: '1.10.4' 14 | otp: '22.3.4.27' 15 | check-format: true 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: erlef/setup-beam@v1 19 | with: 20 | otp-version: ${{ matrix.otp }} 21 | elixir-version: ${{ matrix.elixir }} 22 | - uses: actions/cache@v4 23 | with: 24 | path: | 25 | _build 26 | deps 27 | key: ${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }} 28 | - run: mix deps.get 29 | - run: mix compile --warnings-as-errors 30 | - run: MIX_ENV=test mix compile --warnings-as-errors 31 | - run: MIX_ENV=prod mix compile --warnings-as-errors 32 | - run: mix format --check-formatted 33 | if: ${{ matrix.check-format }} 34 | - run: mix docs 35 | - run: mix test 36 | - run: mix dialyzer 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | parent-*.tar 24 | 25 | -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | erlang 23.1 2 | elixir 1.11-otp-23 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.12.1 2 | 3 | - relax telemetry dep requirement 4 | 5 | # 0.12.0 6 | 7 | ## Breaking changes 8 | 9 | - A child which times out (according to its `:timeout` setting) is terminated according to its shutdown specification, using the `:timeout` exit reason for non-forceful termination. Previously timed out children were always forcefully terminated. 10 | 11 | ## Bug fixes 12 | 13 | - Correctly takes down anonymous bound siblings of an anonymous child 14 | 15 | # 0.11.2 16 | 17 | - Properly cleans up stopped child from the internal state 18 | - Correctly updates bindings when a non-ephemeral child stops 19 | 20 | # 0.11.1 21 | 22 | - Fixes a crash in `Parent.child_spec/1` 23 | 24 | # 0.11.0 25 | 26 | ## Sumary 27 | 28 | This version adds the remaining of the `Supervisor` behaviour to `Parent`, such as automatic child restarts, self-termination when maximum restart intensity is exceeded, and binding lifecycles of children. The new module `Parent.Supervisor` provides the highest-level interface, roughly comparable to a callbackless `Supervisor`. However, all of the parenting features are available in lower level modules, `Parent.GenServer` (which is similar to a callback-based `Supervisor` + `GenServer`) and `Parent` (which can be thought of as a toolkit for building custom parent behaviours and processes). The new module `Parent.Client` can be used to interact with any parent process from the outside (i.e. from other processes). 29 | 30 | Beyond just mirroring supervisor functionality, `Parent` explores some different approaches, most notably: 31 | 32 | - There are no supervision strategies. Instead, the options `:binds_to` and `:shutdown_group` can be used to bind lifecycles of children. 33 | - There's no per-parent distinction between static and dynamic (aka `:simple_one_for_one` or `DynamicSupervisor`) parents. The same process can be used to parent both static and dynamic children, using the per-child `:ephemeral?` setting to control the behaviour for each child. 34 | - Parent supports fine-grained children discovery, and the optional `:registry?` option for exposing children info via an ETS table. 35 | 36 | As a result, `Parent` can help in flattening and simplifying the supervision tree. Refer to documentation for more details, starting with the README page for a quick showcase, and the `Parent` module docs for a detailed reference. 37 | 38 | ## Breaking changes 39 | 40 | - Requires Elixir 1.10+ 41 | - Callback `GenServer.handle_child_terminated` has been renamed to `GenServer.handle_stopped_children`. In addition, the callback now receives only two arguments. Refer to documentation for details. 42 | - Children are by default permanent and non-ephemeral, which changes the behaviour compared to previous versions. To retain the previous behaviour, include `restart: :temporary, ephemeral?: true` in a childspec of every child. See `Parent` documentation for details. 43 | - `Parent.await_child_termination/2` is removed. 44 | - Return type of functions `Parent.children/0`, `Parent.handle_message/1`, `Parent.shutdown_child/1`, `Parent.restart_child/1` has changed. See `Parent` documentation for details. 45 | - Previously deprecated `Parent.GenServer` functions are removed. You can use `Parent.Client` functions instead. 46 | 47 | ## Additions 48 | 49 | - Support for automatic restarts of children via the `:restart` option, and maximum restart intensity via the `:max_restarts` option. 50 | - Support for ephemeral children, which allows a per-child control of static/dynamic supervision behaviour. 51 | - Support for binding children lifecycles via options `:binds_to` and `:shutdown_group`. 52 | - Parent can act as an ETS-based registry using the `registry?: true` parent option. 53 | - Added `Parent.Supervisor`, which provides a high-level supervisor-like functionality. 54 | - Added `Parent.Client`, which provides API for interacting with parent processes from the outside. 55 | 56 | # 0.11.0-rc.1 57 | 58 | When restarting a child, parent will unconditionally restart all the siblings bound to it (directly or transitively), irrespective of their restart strategy. 59 | 60 | This version introduces the concept of ephemeral children, through the `:ephemeral?` flag in child specification. This flag controls how a parent manages non-running children, which is a state a child can enter if the child's start function returns `:ignore`, if a transient child terminates normally, or if a temporary child terminates. 61 | 62 | If a child is not ephemeral (default), the parent will still keep the child in its internal state, and the non-running child will appear in the result of functions such as `Parent.children/0` with its pid set to `:undefined`. This mimics the behaviour of "static" (one_for_one, one_for_all, rest_for_one) supervisors. 63 | 64 | If a child is ephemeral (`ephemeral?: true`), the parent will remove the child from its state when the child is not running. This mimics the behaviour of `DynamicSupervisor`. 65 | 66 | ## Breaking changes 67 | 68 | - Temporary, transient, and non-running children are restarted if the process they depend on is being restarted by the parent. 69 | - Non-running children will by default remain in the parent's state. If you use parent to dynamically start temporary children, you should make your children ephemeral by including `ephemeral?: true` in the childspec. 70 | - `Parent.GenServer.handle_stopped_children` is not invoked for non-ephemeral children. If you rely on this callback, you need to make the children ephemeral. 71 | 72 | ## Additions 73 | 74 | - Support for ephemeral children, which allows a per-child control of static/dynamic supervision behaviour. 75 | 76 | To change this, set the child as ephemeral 77 | 78 | # 0.11.0-rc.0 79 | 80 | This version adds the remaining of the `Supervisor` behaviour to `Parent`, such as automatic child restarts, self-termination when maximum restart intensity is exceeded, and binding lifecycles of children. The new high-level module `Parent.Supervisor` provides the highest-level interface, roughly comparable to a callbackless `Supervisor`. However, all of the parenting features are available in lower level modules, `Parent.GenServer` (which is similar to a callback-based `Supervisor` + `GenServer`) and `Parent` (which can be thought of as a toolkit for building custom parent behaviours and processes). The new module `Parent.Client` can be used to interact with any parent process from the outside (i.e. from other processes). 81 | 82 | Beyond just mirroring supervisor functionality, `Parent` explores some different approaches, most notably: 83 | 84 | - There are no supervision strategies. Instead, the options `:binds_to` and `:shutdown_group` can be used to bind lifecycles of children. 85 | - There's no special option for controlling dynamic mode (aka `:simple_one_for_one` or `DynamicSupervisor`). In the vast majority of cases the usage is exactly the same, and you can easily combine statical and dynamical children under the same supervisor. 86 | - Parent supports more fine-grained children discovery, and an optional `:registry?` option for exposing children info via ETS table. 87 | 88 | As a result, `Parent` can help in flattening and simplifying the supervision tree. 89 | 90 | Refer to documentation for more details. 91 | 92 | ## Breaking changes 93 | 94 | - Requires Elixir 1.10+ 95 | - Callback `GenServer.handle_child_terminated` has been renamed to `GenServer.handle_stopped_children`. In addition, the callback now receives only two arguments. Refer to documentation for details. 96 | - Children are by default permanent, so they are automatically restarted. To retain the previous behaviour of your existing parent processes you need to explicitly set the `:restart` option of your children to `:temporary`. 97 | - `Parent.await_child_termination/2` is removed. 98 | - Return type of functions `Parent.children/0`, `Parent.handle_message/1`, `Parent.shutdown_child/1`, `Parent.restart_child/1` has changed. Refer to documentation for details. 99 | - Previously deprecated `Parent.GenServer` functions are removed. 100 | 101 | ## Additions 102 | 103 | - Support for automatic restarts of children via the `:restart` option, and maximum restart intensity via the `:max_restarts` option. 104 | - Support for binding children lifecycles via options `:binds_to` and `:shutdown_group`. 105 | - Parent can also act as an ETS-based registry using the `registry?: true` parent option. 106 | - Added `Parent.Supervisor`, which provides a high-level supervisor-like functionality. 107 | - Added `Parent.Client`, which provides API for interacting with parent processes from the outside. 108 | 109 | # 0.10.0 110 | 111 | - **[Deprecation]** - all `Parent.GenServer` functions except for `start_link` have been deprecated. Use equivalent functions from the `Parent` module instead. 112 | - The `Parent` module which provides plumbing for building custom parent processes and behaviours is now included in the public API. 113 | 114 | # 0.9.0 115 | 116 | - `Parent.GenServer` terminates children synchronously, in the reverse start order. The same change holds for `shutdown_all/1`. 117 | 118 | # 0.8.0 119 | 120 | ## Periodic 121 | 122 | - Improved support for custom scheduling via the `:when` option. 123 | - Simplified synchronous testing via `Periodic.sync_tick/2`. 124 | 125 | ### The `:when` option 126 | 127 | The `:when` option assists the implementation of custom schedulers. For example, let's say we want to run a job once a day at midnight. In previous version, the suggested approach was as follows: 128 | 129 | ```elixir 130 | Periodic.start_link( 131 | every: :timer.minutes(1), 132 | run: fn -> 133 | with %Time{hour: 0, minute: 0} <- Time.utc_now(), 134 | do: run_job() 135 | end 136 | ) 137 | ``` 138 | 139 | Note that with this approach we're actually starting a new job process every minute, and making a decision in that process. The problem here is that telemetry events and log entries will be emitted once every minute, instead of just once per day, which will lead to a lot of unwanted noise. Consequently, with this approach some parts of `Periodic` (telemetry, logging, handling of overlapping jobs) become useless. 140 | 141 | The `:when` option can help us here. Let's see the usage first. The previous example can be rewritten as: 142 | 143 | ```elixir 144 | Periodic.start_link( 145 | every: :timer.minutes(1), 146 | when: fn -> match?(%Time{hour: 0, minute: 0}, Time.utc_now()) end, 147 | run: &run_job/0 148 | ) 149 | ``` 150 | 151 | Unlike a custom check executed in the job, the `:when` function is invoked inside the scheduler process. If the function returns `false`, the job won't be started at all. As a result, all the features of `Periodic`, including telemetry and logging, will work exactly as expected. For example, telemetry events will only be emitted at midnight. 152 | 153 | ### Synchronous manual ticking 154 | 155 | Previously `Periodic.Test` exposed the `tick/1` function which allowed clients to manually tick the scheduler. The problem here was that `tick` would return before the job finished, so client code needed to perform a sequence of steps to test the scheduler: 156 | 157 | 1. Provide the telemetry id 158 | 2. Setup telemetry handler in the test 159 | 3. Invoke `tick/1` 160 | 4. Invoke `assert_periodic_event(telemetry_id, :finished, %{reason: :normal})` to wait for the job to finish, and assert that it hasn't crashed. 161 | 162 | The new `sync_tick` function turns this into a single step: 163 | 164 | ```elixir 165 | assert Periodic.Test.sync_tick(pid_or_registered_name) == {:ok, :normal} 166 | ``` 167 | 168 | # 0.7.0 169 | 170 | ## Periodic 171 | 172 | - **[Breaking]** The options `:log_level` and `:log_meta` are not supported anymore. Logging is done via telemetry and the provided `Periodic.Logger`. 173 | - **[Breaking]** The value `:infinity` isn't accepted for the options `:every` and `:initial_delay` anynmore. Instead, if you want to avoid running the job in test environment, set the `:mode` option to `:manual`. 174 | - Telemetry events are now emitted. The new module `Periodic.Logger` can be used to log these events. 175 | - Added `Periodic.Test` to support deterministic testing of periodic jobs. 176 | - Scheduler process can be registered via the `:name` option. 177 | - Default job shutdown is changed to 5 seconds (from `:brutal_kill`), and can now be configured via the `:job_shutdown` option. This allows polite termination (the parent of the scheduler process will wait until the job is done). 178 | - Scheduling is now based on absolute monotonic time, which reduces the chance of clock skew in regular delay mode. 179 | - Documentation is thoroughly reworked. Check it out [here](https://hexdocs.pm/parent/Periodic.html#content). 180 | 181 | # 0.6.0 182 | 183 | - The `:overlap?` option in `Periodic` is deprecated. Use `:on_overlap` instead. 184 | - Added support for termination of previous job instances in `Periodic` via the `:on_overlap` option. 185 | - Added support for the shifted delay mode in `Periodic` via the `:delay_mode` option. 186 | 187 | # 0.5.1 188 | 189 | - support handle_continue on Elixir 1.7.x or greater 190 | 191 | # 0.5.0 192 | 193 | - add supervisor compliance to support hot code reloads 194 | 195 | # 0.4.1 196 | 197 | - Fixed `Periodic` typespec. 198 | 199 | # 0.4.0 200 | 201 | - Added support for `:initial_delay` in `Periodic` 202 | 203 | # 0.3.0 204 | 205 | - Added `Parent.GenServer.await_child_termination/2` 206 | 207 | # 0.2.0 208 | 209 | - Added the support for child timeout. See the "Timeout" section in `Parent.GenServer`. 210 | 211 | # 0.1.1 212 | 213 | - Bugfix: termination of all children would crash if a timeout occurs while terminating a child 214 | 215 | # 0.1.0 216 | 217 | - First version 218 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018, Saša Jurić 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parent 2 | 3 | [![hex.pm](https://img.shields.io/hexpm/v/parent.svg?style=flat-square)](https://hex.pm/packages/parent) 4 | [![hexdocs.pm](https://img.shields.io/badge/docs-latest-green.svg?style=flat-square)](https://hexdocs.pm/parent/) 5 | [![Build Status](https://travis-ci.org/sasa1977/parent.svg?branch=master)](https://travis-ci.org/sasa1977/parent) 6 | 7 | Support for custom parenting of processes. See [docs](https://hexdocs.pm/parent/Parent.html) for reference. 8 | 9 | Parent is a toolkit for building processes which parent other children and manage their life cycles. The library provides features similar to `Supervisor`, such as the support for automatic restarts and failure escalation (maximum restart intensity), with some additional benefits that can help flattening the supervision tree, reduce the amount of custom process monitors, and simplify the process structure. The most important differences from `Supervisor` are: 10 | 11 | - No supervision strategies (one_for_one, rest_for_one, etc). Instead, Parent uses bindings and shutdown groups to achieve the similar behaviour. 12 | - No distinction between static and dynamic supervisors. Instead, a per-child option called `:ephemeral?` is used to achieve dynamic behaviour. 13 | - Basic registry-like capabilities for simple children discovery baked-in directly into parent. 14 | - Exposed lower level plumbing modules, such as `Parent.GenServer` and `Parent`, which can be used to build custom parent processes (i.e. supervisors with custom logic). 15 | 16 | 17 | ## Examples 18 | 19 | ### Basic supervisor 20 | 21 | ```elixir 22 | Parent.Supervisor.start_link( 23 | # child spec is a superset of supervisor child specification 24 | child_specs, 25 | 26 | # parent options, note that there's no `:strategy` 27 | max_restarts: 3, 28 | max_seconds: 5, 29 | 30 | # std. Supervisor/GenServer options 31 | name: __MODULE__ 32 | ) 33 | ``` 34 | 35 | ### Binding lifecycles 36 | 37 | ```elixir 38 | Parent.Supervisor.start_link( 39 | [ 40 | Parent.child_spec(Child1), 41 | Parent.child_spec(Child2, binds_to: [Child1]), 42 | Parent.child_spec(Child3, binds_to: [Child1]), 43 | Parent.child_spec(Child4, shutdown_group: :children4_to_6), 44 | Parent.child_spec(Child5, shutdown_group: :children4_to_6), 45 | Parent.child_spec(Child6, shutdown_group: :children4_to_6), 46 | Parent.child_spec(Child7, binds_to: [Child1]), 47 | ] 48 | ) 49 | ``` 50 | 51 | - if `Child1` is restarted, `Child2`, `Child3`, and `Child7` will be restarted too 52 | - if `Child2`, `Child3`, or `Child7` is restarted, nothing else is restarted 53 | - if any of `Child4`, `Child5`, or `Child6` is restarted, all other processes from the shutdown group are restarted too 54 | 55 | ### Discovering siblings during startup 56 | 57 | ```elixir 58 | Parent.Supervisor.start_link( 59 | [ 60 | Parent.child_spec(Child1), 61 | Parent.child_spec(Child2, binds_to: [Child1]), 62 | # ... 63 | ] 64 | ) 65 | 66 | defmodule Child2 do 67 | def start_link do 68 | # can be safely invoked inside the parent process 69 | child1 = Parent.child_pid(:child1) 70 | 71 | # ... 72 | end 73 | end 74 | ``` 75 | 76 | ### Pausing and resuming a part of the system 77 | 78 | ```elixir 79 | # stops child1 and all children depending on it, removing it from the parent 80 | stopped_children = Parent.Client.shutdown_child(some_parent, :child1) 81 | 82 | # ... 83 | 84 | # returns all stopped children back to the parent 85 | Parent.Client.return_children(some_parent, stopped_children) 86 | ``` 87 | 88 | ### Dynamic supervisor with anonymous children 89 | 90 | ```elixir 91 | Parent.Supervisor.start_link([], name: MySup) 92 | 93 | # set `ephemeral?: true` for dynamic children if child is temporary/transient 94 | {:ok, pid1} = Parent.Client.start_child(MySup, Parent.child_spec(Child, id: nil, ephemeral?: true)) 95 | {:ok, pid2} = Parent.Client.start_child(MySup, Parent.child_spec(Child, id: nil, ephemeral?: true)) 96 | # ... 97 | 98 | Parent.Client.shutdown_child(MySup, pid1) 99 | Parent.Client.restart_child(MySup, pid2) 100 | ``` 101 | 102 | ### Dynamic supervisor with child discovery 103 | 104 | ```elixir 105 | Parent.Supervisor.start_link([], name: MySup) 106 | 107 | # meta is an optional value associated with the child 108 | Parent.Client.start_child(MySup, Parent.child_spec(Child, id: id1, ephemeral?: true, meta: some_meta)) 109 | Parent.Client.start_child(MySup, Parent.child_spec(Child, id: id2, ephemeral?: true, meta: another_meta)) 110 | # ... 111 | 112 | # synchronous calls into the parent process 113 | pid = Parent.Client.child_pid(MySup, id1) 114 | meta = Parent.Client.child_meta(MySub, id1) 115 | all_children = Parent.Client.children(MySup) 116 | ``` 117 | 118 | Optional ETS-powered registry: 119 | 120 | ```elixir 121 | Parent.Supervisor.start_link([], registry?: true) 122 | 123 | # start some children 124 | 125 | # ETS lookup, no call into parent involved 126 | Parent.Client.child_pid(my_sup, id1) 127 | Parent.Client.children(my_sup) 128 | ``` 129 | 130 | ### Per-child max restart frequency 131 | 132 | ```elixir 133 | Parent.Supervisor.start_link( 134 | [ 135 | Parent.child_spec(Child1, max_restarts: 10, max_seconds: 10), 136 | Parent.child_spec(Child2, max_restarts: 3, max_seconds: 5) 137 | ], 138 | 139 | # Per-parent max restart frequency can be disabled, or a parent-wide limit can be used. In the 140 | # former case make sure that this limit is higher than the limit of any child. 141 | max_restarts: :infinity 142 | ) 143 | ``` 144 | 145 | ### Module-based supervisor 146 | 147 | ```elixir 148 | defmodule MySup do 149 | use Parent.GenServer 150 | 151 | def start_link(init_arg), 152 | do: Parent.GenServer.start_link(__MODULE__, init_arg, name: __MODULE__) 153 | 154 | @impl GenServer 155 | def init(_init_arg) do 156 | Parent.start_all_children!(children) 157 | {:ok, initial_state} 158 | end 159 | end 160 | ``` 161 | 162 | ### Restarting with a delay 163 | 164 | ```elixir 165 | defmodule MySup do 166 | use Parent.GenServer 167 | 168 | def start_link(init_arg), 169 | do: Parent.GenServer.start_link(__MODULE__, init_arg, name: __MODULE__) 170 | 171 | @impl GenServer 172 | def init(_init_arg) do 173 | # Make sure that children are temporary and ephemeral b/c otherwise `handle_stopped_children/2` 174 | # won't be invoked. 175 | Parent.start_all_children!(children) 176 | {:ok, initial_state} 177 | end 178 | 179 | @impl Parent.GenServer 180 | def handle_stopped_children(stopped_children, state) do 181 | # invoked when a child stops and is not restarted 182 | Process.send_after(self, {:restart, stopped_children}, delay) 183 | {:noreply, state} 184 | end 185 | 186 | def handle_info({:restart, stopped_children}, state) do 187 | # Returns the child to the parent preserving its place according to startup order and bumping 188 | # its restart count. This is basically a manual restart. 189 | Parent.return_children(stopped_children) 190 | {:noreply, state} 191 | end 192 | end 193 | ``` 194 | 195 | ### Starting additional children after a child stops 196 | 197 | ```elixir 198 | defmodule MySup do 199 | use Parent.GenServer 200 | 201 | def start_link(init_arg), 202 | do: Parent.GenServer.start_link(__MODULE__, init_arg, name: __MODULE__) 203 | 204 | @impl GenServer 205 | def init(_init_arg) do 206 | Parent.start_child(first_child_spec) 207 | {:ok, initial_state} 208 | end 209 | 210 | @impl Parent.GenServer 211 | def handle_stopped_children(%{child1: info}, state) do 212 | Parent.start_child(other_children) 213 | {:noreply, state} 214 | end 215 | 216 | def handle_stopped_children(_other, state), do: {:noreply, state} 217 | end 218 | ``` 219 | 220 | ### Building a custom parent process or behaviour from scratch 221 | 222 | ```elixir 223 | defp init_process do 224 | Parent.initialize(parent_opts) 225 | start_some_children() 226 | loop() 227 | end 228 | 229 | defp loop() do 230 | receive do 231 | msg -> 232 | case Parent.handle_message(msg) do 233 | # parent handled the message 234 | :ignore -> loop() 235 | 236 | # parent handled the message and returned some useful information 237 | {:stopped_children, stopped_children} -> handle_stopped_children(stopped_children) 238 | 239 | # not a parent message 240 | nil -> custom_handle_message(msg) 241 | end 242 | end 243 | end 244 | ``` 245 | 246 | ## Status 247 | 248 | This library has seen production usage in a couple of different projects. However, features such as automatic restarts and ETS registry are pretty fresh (aded in late 2020) and so they haven't seen any serious production testing yet. 249 | 250 | Based on a very quick & shallow test, Parent is about 3x slower and consumes about 2x more memory than DynamicSupervisor. 251 | 252 | The API is prone to significant changes. 253 | 254 | Compared to supervisor crash reports, the error logging is very basic and probably not sufficient. 255 | 256 | ## License 257 | 258 | [MIT](./LICENSE) 259 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | 3 | config :logger, :console, 4 | format: "$metadata[$level] $message\n", 5 | metadata: [] 6 | -------------------------------------------------------------------------------- /lib/parent.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent do 2 | @moduledoc """ 3 | Functions for implementing a parent process. 4 | 5 | A parent process is a process that manages the lifecycle of its children. Typically the simplest 6 | approach is to use higher-level abstractions, such as `Parent.Supervisor` or `Parent.GenServer`. 7 | The common behaviour for every parent process is implemented in this module, and therefore it is 8 | described in this document. 9 | 10 | ## Overview 11 | 12 | A parent process has the following properties: 13 | 14 | 1. It traps exits and uses the `shutdown: :infinity` shutdown strategy. 15 | 2. It keeps track of its children. 16 | 3. It presents itself to the rest of the OTP as a supervisor, which means that generic code 17 | walking the supervision tree, such as OTP release handler, will also iterate the parent's 18 | subtree. 19 | 4. Before terminating, it stops its children synchronously, in the reverse startup order. 20 | 21 | You can interact with the parent process from other processes using functions from the 22 | `Parent.Client` module. If you want to manipulate the parent from the inside, you can use the 23 | functions from this module. 24 | 25 | ## Initialization 26 | 27 | A parent process has to be initialized using `initialize/1`. This function takes the following 28 | initialization options: 29 | 30 | - `:max_restarts` and `:max_seconds` - same as with `Supervisor`, with the same defaults 31 | - `:registry?` - If true, the parent will manage its own ETS-based child registry. See the 32 | "Child discovery" section for details. 33 | 34 | When using higher-level abstractions, these options are typically passed throguh start functions, 35 | such as `Parent.Supervisor.start_link/2`. 36 | 37 | ## Child specification 38 | 39 | Child specification describes how the parent starts and manages a child. This specification is 40 | passed to functions such as `start_child/2`, `Parent.Client.start_child/2`, or 41 | `Parent.Supervisor.start_link/2` to start a child process. 42 | 43 | The specification is a map which is a superset of the [Supervisor child 44 | specifications](https://hexdocs.pm/elixir/Supervisor.html#module-child-specification). All the 45 | fields that are shared with `Supervisor` have the same effect. 46 | 47 | It's worth noting that the `:id` field is optional. If not provided, the child will be anonymous, 48 | and you can only manage it via its pid. Therefore, the minimum required child specification 49 | is `%{start: mfa_or_zero_arity_fun}`. 50 | 51 | Also, just like with `Supervisor`, you can provide `module | {module, arg}` when starting a 52 | child. See [Supervisor.child_spec/1](https://hexdocs.pm/elixir/Supervisor.html#module-child_spec-1) 53 | for details. 54 | 55 | To modify a child specification, `Parent.child_spec/2` can be used. 56 | 57 | ## Bound children 58 | 59 | You can bind the lifecycle of each child to the lifecycles of its older siblings. This is roughly 60 | similar to the `:rest_for_one` supervisor strategy. 61 | 62 | For example, if you want to start two children, a consumer and a producer, and bind the 63 | producer's lifecycle to the consumer, you need the following child specifications: 64 | 65 | consumer_spec = %{ 66 | id: :consumer, 67 | # ... 68 | } 69 | 70 | producer_spec = %{ 71 | id: :producer, 72 | binds_to: [:consumer] 73 | } 74 | 75 | This will make sure that if the consumer stops, the producer is taken down as well. 76 | 77 | For this to work, you need to start the consumer before the producer. In other words, a child 78 | can only be bound to its older siblings. 79 | 80 | It's worth noting that bindings are transitive. If a child A is bound to the child B, which is 81 | in turns bound to child C, then child A also depends on child C. If child C stops, B and A will 82 | be stopped to. 83 | 84 | ## Shutdown groups 85 | 86 | A shutdown group is a mechanism that roughly emulates the `:one_for_all` supervisor strategy. 87 | For example, to set up a two-way lifecycle dependency between the consumer and the producer, we 88 | can use the following specifications: 89 | 90 | consumer_spec = %{ 91 | id: :consumer, 92 | shutdown_group: :consumer_and_producer 93 | # ... 94 | } 95 | 96 | producer_spec = %{ 97 | id: :producer, 98 | shutdown_group: :consumer_and_producer 99 | } 100 | 101 | In this case, when any child of the group terminates, the other children will be taken down as 102 | well. All children belonging to the same shutdown group must use the same `:restart` and 103 | `:ephemeral` settings. 104 | 105 | Note that a child can be a member of some shutdown group, and at the same time bound to other 106 | older siblings. 107 | 108 | ## Lifecycle dependency consequences 109 | 110 | A lifecycle dependency means that a child is taken down when its dependency stops. This will 111 | happen irrespective of how the child has been stopped. Even if you manually stop the child using 112 | functions such as `shutdown_child/1` or `Parent.Client.shutdown_child/2`, the siblings bound to 113 | it will be taken down. 114 | 115 | In general, parent doesn't permit the state which violates the binding settings. If the process A 116 | is bound to the process B, parent will not allow A to keep running if B stops. 117 | 118 | ## Handling child termination 119 | 120 | When a child terminates, depending on its `:restart` and `:ephemeral` settings, parent will do 121 | one of the following things: 122 | 123 | - restart the child (possibly giving up if restart limit has been exceeded) 124 | - set the child's pid to `:undefined` 125 | - remove the child from its internal structures 126 | 127 | The `:restart` option controls when a child will be automatically restarted by its parent: 128 | 129 | - `:permanent` - A child is automatically restarted if it stops. This is the default value. 130 | - `:transient` - A child is automatically restarted only if it exits abnormally. 131 | - `:temporary` - A child is not automatically restarted. 132 | 133 | The `:ephemeral` option controls what to do when a non-running child is will not be restarted. 134 | Such situation can happen when a temporary child terminates, when a transient child stops 135 | normally, or if the child's start function returns `:ignore`. 136 | 137 | If the child is not marked as ephemeral (default), parent will keep the child in its internal 138 | structures, setting its pid to `:undefined`. Functions such as `Parent.children/0` will include 139 | the non-running child in the result, and such child can be restarted using 140 | `Parent.restart_child/1`. This mimics the behaviour of "static supervisors" (i.e. one_for_one, 141 | rest_for_one, one_for_all). 142 | 143 | If the child is marked as ephemeral, parent will remove the child from its internal structures. 144 | This mimics the behaviour of `DynamicSupervisor`. 145 | 146 | In all of these cases, parent will perform the same action on bound siblings, ignoring their 147 | `:restart` and `:ephemeral` settings. For example, if a permanent child is restarted, parent 148 | will restart all of its bound siblings (including the temporary ones). Likewise, if a temporary 149 | child stops, parent will stop all of its bound siblings, including the permanent ones. If an 150 | ephemeral child stops, parent will remove all of its bound siblings, including the non-ephemeral 151 | ones. 152 | 153 | If a child is not restarted, its ephemeral bound siblings will be removed. This is the only case 154 | where parent honors the `:ephemeral` status of bound siblings. 155 | 156 | ## Restart flow 157 | 158 | Process restarts can be triggered by the following situations: 159 | 160 | - a child terminated and it will be restarted due to its `:restart` setting 161 | - `restart_child/1` has been invoked (manual restart) 162 | - `return_children/1` has been invoked (returning removed children) 163 | 164 | In all these situations the flow is the same. Parent will first synchronously stop the bound 165 | dependencies of the child (in the reverse startup order). Then it will attempt to restart the 166 | child and its siblings. This is done by starting processes synchronously, one by one, in the 167 | startup order. If all processes are started successfully, restart has succeeded. 168 | 169 | If some process fails to start, the parent treats it as a crash. It will take down all the 170 | bound siblings, while proceeding to start other children which are not depending on the failed 171 | process. 172 | 173 | Therefore, a restart may partially succeed, with some children not being started. In this case, 174 | the parent will retry to restart such children, according to their specification. Temporary 175 | children which fail to restart will be considered as stopped, and parent will not attempt to 176 | restart them again. 177 | 178 | A failed attempt to restart a child is considered as a crash and contributes to the restart 179 | intensity. Thus, if a child repeatedly fails to restart, the parent will give up at some point, 180 | according to restart intensity settings. 181 | 182 | The restarted children keep their original startup order with respect to non-restarted children. 183 | For example, suppose that four children are running: A, B, C, and D, and children B and D are 184 | restarted. If the parent process then stops, it will take the children down in the order D, C, B, 185 | and A. 186 | 187 | ### Maximum restart frequency 188 | 189 | Similarly to `Supervisor`, a parent process keeps track of the amount of restarts, and 190 | self-terminates if maximum threshold (defaults to 3 restarts in 5 seconds) is exceeded. 191 | 192 | In addition, you can provide child specific thresholds by including `:max_restarts` and 193 | `:max_seconds` options in child specification. Note that `:max_restarts` can be set to 194 | `:infinity` (both for the parent and each child). This can be useful if you want to disable the 195 | parent global limit, and use child-specific limits. 196 | 197 | Finally, it's worth noting that if termination of one child causes the restart of multiple 198 | children, parent will treat this as a single restart event when calculating the restart frequency 199 | and considering possible self-termination. 200 | 201 | ## Child timeout 202 | 203 | You can optionally include the `:timeout` option in the child specification to ask the parent to 204 | terminate the child if it doesn't stop in the given time. The child will be terminated according 205 | to its shutdown specification. In the case of non-forceful termination, the `:timeout` exit 206 | signal will be used. 207 | 208 | A non-temporary child which timeouts will be restarted. 209 | 210 | ## Child discovery 211 | 212 | Children can be discovered by other processes using functions such as `Parent.Client.child_pid/2`, 213 | or `Parent.Client.children/1`. By default, these functions will perform a synchronous call into 214 | the parent process. This should work fine as long as the parent is not pressured by various 215 | events, such as frequent children stopping and starting, or some other custom logic. 216 | 217 | In such cases you can consider setting the `registry?` option to `true` when initializing the 218 | parent process. When this option is set, parent will create an ETS table which will be used by 219 | the discovery functions. 220 | 221 | In addition, parent supports maintaining the child-specific meta information. You can set this 222 | information by providing the `:meta` field in the child specification, update it through 223 | functions such as `update_child_meta/2` or `Parent.Client.update_child_meta/3`, and query it 224 | through `Parent.Client.child_meta/2`. 225 | 226 | ## Building custom parent processes 227 | 228 | If available parent behaviours don't fit your purposes, you can consider building your own 229 | behaviour or a concrete parent process. In this case, the functions of this module will provide 230 | the necessary plumbing. 231 | 232 | The basic idea is presented in the following sketch: 233 | 234 | defp init_process do 235 | Parent.initialize(parent_opts) 236 | start_some_children() 237 | loop() 238 | end 239 | 240 | defp loop() do 241 | receive do 242 | msg -> 243 | case Parent.handle_message(msg) do 244 | # parent handled the message 245 | :ignore -> loop() 246 | 247 | # parent handled the message and returned some useful information 248 | {:stopped_children, stopped_children} -> handle_stopped_children(stopped_children) 249 | 250 | # not a parent message 251 | nil -> custom_handle_message(msg) 252 | end 253 | end 254 | end 255 | 256 | More specifically, to build a parent process you need to do the following: 257 | 258 | 1. Invoke `initialize/0` when the process is started. 259 | 2. Use functions such as `start_child/2` to work with child processes. 260 | 3. When a message is received, invoke `handle_message/1` before handling the message yourself. 261 | 4. If you receive a shutdown exit message from your parent, stop the process. 262 | 5. Before terminating, invoke `shutdown_all/1` to stop all the children. 263 | 6. Use `:infinity` as the shutdown strategy for the parent process, and `:supervisor` for its type. 264 | 7. If the process is a `GenServer`, handle supervisor calls (see `supervisor_which_children/0` 265 | and `supervisor_count_children/0`). 266 | 8. Implement `format_status/2` (see `Parent.GenServer` for details) where applicable. 267 | 268 | If the parent process is powered by a non-interactive code (e.g. `Task`), make sure 269 | to receive messages sent to that process, and handle them properly (see points 3 and 4). 270 | 271 | You can take a look at the code of `Parent.GenServer` for specific details. 272 | """ 273 | require Logger 274 | 275 | alias Parent.{Registry, Restart, State} 276 | 277 | @type opts :: [option] 278 | @type option :: 279 | {:max_restarts, non_neg_integer | :infinity} 280 | | {:max_seconds, pos_integer} 281 | | {:registry?, boolean} 282 | 283 | @type child_spec :: %{ 284 | :start => start, 285 | optional(:id) => child_id, 286 | optional(:modules) => [module] | :dynamic, 287 | optional(:type) => :worker | :supervisor, 288 | optional(:meta) => child_meta, 289 | optional(:shutdown) => shutdown, 290 | optional(:timeout) => pos_integer | :infinity, 291 | optional(:restart) => :temporary | :transient | :permanent, 292 | optional(:max_restarts) => non_neg_integer | :infinity, 293 | optional(:max_seconds) => pos_integer, 294 | optional(:binds_to) => [child_ref], 295 | optional(:shutdown_group) => shutdown_group, 296 | optional(:ephemeral?) => boolean 297 | } 298 | 299 | @type child_id :: term 300 | @type child_meta :: term 301 | @type shutdown_group :: term 302 | 303 | @type child_ref :: child_id | pid 304 | 305 | @type start :: (() -> Supervisor.on_start_child()) | {module, atom, [term]} 306 | 307 | @type shutdown :: non_neg_integer | :infinity | :brutal_kill 308 | 309 | @type start_spec :: child_spec | module | {module, term} 310 | 311 | @type child :: %{id: child_id, pid: pid | :undefined, meta: child_meta} 312 | 313 | @type handle_message_response :: 314 | {:stopped_children, stopped_children} 315 | | :ignore 316 | 317 | @type stopped_children :: %{ 318 | child_id => %{ 319 | optional(atom) => any, 320 | pid: pid | :undefined, 321 | meta: child_meta, 322 | exit_reason: term 323 | } 324 | } 325 | 326 | @type on_start_child :: Supervisor.on_start_child() | {:error, start_error} 327 | @type start_error :: 328 | :invalid_child_id 329 | | {:missing_deps, [child_ref]} 330 | | {:non_uniform_shutdown_group, [shutdown_group]} 331 | 332 | @doc """ 333 | Builds and overrides a child specification 334 | 335 | This operation is similar to 336 | [Supervisor.child_spec/1](https://hexdocs.pm/elixir/Supervisor.html#child_spec/2) 337 | """ 338 | @spec child_spec(start_spec, Keyword.t() | child_spec) :: child_spec 339 | def child_spec(spec, overrides \\ []) do 340 | spec 341 | |> child_spec_to_map() 342 | |> Map.merge(Map.new(overrides)) 343 | |> expand_child_spec() 344 | end 345 | 346 | @spec parent_spec(Keyword.t() | child_spec) :: child_spec 347 | def parent_spec(overrides \\ []), 348 | do: Map.merge(%{shutdown: :infinity, type: :supervisor}, Map.new(overrides)) 349 | 350 | @doc """ 351 | Initializes the state of the parent process. 352 | 353 | This function should be invoked once inside the parent process before other functions from this 354 | module are used. If a parent behaviour, such as `Parent.GenServer`, is used, this function must 355 | not be invoked. 356 | """ 357 | @spec initialize(opts) :: :ok 358 | def initialize(opts \\ []) do 359 | if initialized?(), do: raise("Parent state is already initialized") 360 | Process.flag(:trap_exit, true) 361 | if Keyword.get(opts, :registry?, false), do: Registry.initialize() 362 | store(State.initialize(opts)) 363 | end 364 | 365 | @doc "Returns true if the parent state is initialized." 366 | @spec initialized?() :: boolean 367 | def initialized?(), do: not is_nil(Process.get(__MODULE__)) 368 | 369 | @doc "Starts the child described by the specification." 370 | @spec start_child(start_spec, Keyword.t()) :: on_start_child() 371 | def start_child(child_spec, overrides \\ []) do 372 | state = state() 373 | child_spec = Parent.child_spec(child_spec, overrides) 374 | 375 | with {:ok, pid, timer_ref} <- start_child_process(state, child_spec) do 376 | if pid != :undefined or not child_spec.ephemeral?, 377 | do: store(State.register_child(state, pid, child_spec, timer_ref)) 378 | 379 | {:ok, pid} 380 | end 381 | end 382 | 383 | @doc """ 384 | Synchronously starts all children. 385 | 386 | If some child fails to start, all of the children will be taken down and the parent process 387 | will exit. 388 | """ 389 | @spec start_all_children!([start_spec()]) :: [pid | :undefined] 390 | def start_all_children!(child_specs) do 391 | Enum.map( 392 | child_specs, 393 | fn child_spec -> 394 | full_spec = Parent.child_spec(child_spec) 395 | 396 | case start_child(full_spec) do 397 | {:ok, pid} -> 398 | pid 399 | 400 | {:error, error} -> 401 | msg = "Error starting the child #{inspect(full_spec.id)}: #{inspect(error)}" 402 | give_up!(state(), :start_error, msg) 403 | end 404 | end 405 | ) 406 | end 407 | 408 | @doc """ 409 | Restarts the child and its siblings. 410 | 411 | See "Restart flow" for details on restarting procedure. 412 | """ 413 | @spec restart_child(child_ref) :: :ok | :error 414 | def restart_child(child_ref) do 415 | with {:ok, children, state} <- State.pop_child_with_bound_siblings(state(), child_ref) do 416 | stop_children(children, :shutdown) 417 | state = Restart.perform(state, children) 418 | store(state) 419 | :ok 420 | end 421 | end 422 | 423 | @doc """ 424 | Starts new instances of stopped children. 425 | 426 | This function can be invoked to return stopped children back to the parent. Essentially, this 427 | function works the same as automatic restarts. 428 | 429 | The `stopped_children` information is obtained via functions such as `shutdown_child/1` or 430 | `shutdown_all/1`. In addition, Parent will provide this info via `handle_message/1` if an 431 | ephemeral child stops and is not restarted. 432 | """ 433 | @spec return_children(stopped_children) :: :ok 434 | def return_children(stopped_children) do 435 | state() 436 | |> Restart.perform(Map.values(stopped_children)) 437 | |> store() 438 | end 439 | 440 | @doc """ 441 | Shuts down the child and all siblings depending on it, and removes them from the parent state. 442 | 443 | This function will also shut down all siblings directly and transitively bound to the given child. 444 | The function will wait for the child to terminate, and pull the `:EXIT` message from the mailbox. 445 | 446 | All terminated children are removed from the parent state. The `stopped_children` structure 447 | describes all of these children, and can be used with `return_children/1` to manually restart 448 | these processes. 449 | """ 450 | @spec shutdown_child(child_ref) :: {:ok, stopped_children} | :error 451 | def shutdown_child(child_ref) do 452 | with {:ok, children, state} <- State.pop_child_with_bound_siblings(state(), child_ref) do 453 | stop_children(children, :shutdown) 454 | store(state) 455 | {:ok, stopped_children(children)} 456 | end 457 | end 458 | 459 | @doc """ 460 | Terminates all running child processes. 461 | 462 | Children are terminated synchronously, in the reverse order from the order they have been started 463 | in. All corresponding `:EXIT` messages will be pulled from the mailbox. 464 | """ 465 | @spec shutdown_all(term) :: stopped_children 466 | def shutdown_all(reason \\ :shutdown) do 467 | state = state() 468 | children = State.children(state) 469 | stop_children(children, with(:normal <- reason, do: :shutdown)) 470 | store(State.reinitialize(state)) 471 | stopped_children(children) 472 | end 473 | 474 | @doc """ 475 | Should be invoked by the parent process for each incoming message. 476 | 477 | If the given message is not handled, this function returns `nil`. In such cases, the client code 478 | should perform standard message handling. Otherwise, the message has been handled by the parent, 479 | and the client code shouldn't treat this message as a standard message (e.g. by calling 480 | `handle_info` of the callback module). 481 | 482 | If `:ignore` is returned, the message has been processed, and the client code should ignore it. 483 | Finally, if the return value is `{:stopped_children, info}`, it indicates that some ephemeral 484 | processes have stopped and have been removed from parent. A client may do some extra processing 485 | in this case. 486 | 487 | Note that you don't need to invoke this function in a `Parent.GenServer` callback module. 488 | """ 489 | @spec handle_message(term) :: handle_message_response() | nil 490 | def handle_message({:"$parent_call", client, {Parent.Client, function, args}}) do 491 | GenServer.reply(client, apply(__MODULE__, function, args)) 492 | :ignore 493 | end 494 | 495 | def handle_message(message) do 496 | with {result, state} <- do_handle_message(state(), message) do 497 | store(state) 498 | result 499 | end 500 | end 501 | 502 | @doc "Returns the list of running child processes in the startup order." 503 | @spec children :: [child] 504 | def children() do 505 | state() 506 | |> State.children() 507 | |> Enum.sort_by(& &1.startup_index) 508 | |> Enum.map(&%{id: &1.spec.id, pid: &1.pid, meta: &1.meta}) 509 | end 510 | 511 | @doc """ 512 | Returns true if the child process is still running, false otherwise. 513 | 514 | Note that this function might return true even if the child has terminated. This can happen if 515 | the corresponding `:EXIT` message still hasn't been processed, and also if a non-ephemeral 516 | child is not running. 517 | """ 518 | @spec child?(child_ref) :: boolean 519 | def child?(child_ref), do: match?({:ok, _}, State.child(state(), child_ref)) 520 | 521 | @doc """ 522 | Should be invoked by the behaviour when handling `:which_children` GenServer call. 523 | 524 | You only need to invoke this function if you're implementing a parent process using a behaviour 525 | which forwards `GenServer` call messages to the `handle_call` callback. In such cases you need 526 | to respond to the client with the result of this function. Note that parent behaviours such as 527 | `Parent.GenServer` will do this automatically. 528 | 529 | If no translation of `GenServer` messages is taking place, i.e. if you're handling all messages 530 | in their original shape, this function will be invoked through `handle_message/1`. 531 | """ 532 | @spec supervisor_which_children() :: [{term(), pid(), :worker, [module()] | :dynamic}] 533 | def supervisor_which_children() do 534 | state() 535 | |> State.children() 536 | |> Enum.map(&{&1.spec.id || :undefined, &1.pid, &1.spec.type, &1.spec.modules}) 537 | end 538 | 539 | @doc """ 540 | Should be invoked by the behaviour when handling `:count_children` GenServer call. 541 | 542 | See `supervisor_which_children/0` for details. 543 | """ 544 | @spec supervisor_count_children() :: [ 545 | specs: non_neg_integer, 546 | active: non_neg_integer, 547 | supervisors: non_neg_integer, 548 | workers: non_neg_integer 549 | ] 550 | def supervisor_count_children() do 551 | Enum.reduce( 552 | State.children(state()), 553 | %{specs: 0, active: 0, supervisors: 0, workers: 0}, 554 | fn child, acc -> 555 | %{ 556 | acc 557 | | specs: acc.specs + 1, 558 | active: acc.active + 1, 559 | workers: acc.workers + if(child.spec.type == :worker, do: 1, else: 0), 560 | supervisors: acc.supervisors + if(child.spec.type == :supervisor, do: 1, else: 0) 561 | } 562 | end 563 | ) 564 | |> Map.to_list() 565 | end 566 | 567 | @doc """ 568 | Should be invoked by the behaviour when handling `:get_childspec` GenServer call. 569 | 570 | See `:supervisor.get_childspec/2` for details. 571 | """ 572 | @spec supervisor_get_childspec(child_ref) :: {:ok, child_spec} | {:error, :not_found} 573 | def supervisor_get_childspec(child_ref) do 574 | case State.child(state(), child_ref) do 575 | {:ok, child} -> {:ok, child.spec} 576 | :error -> {:error, :not_found} 577 | end 578 | end 579 | 580 | @doc "Returns the count of children." 581 | @spec num_children() :: non_neg_integer 582 | def num_children(), do: State.num_children(state()) 583 | 584 | @doc "Returns the id of a child process with the given pid." 585 | @spec child_id(pid) :: {:ok, child_id} | :error 586 | def child_id(child_pid), do: State.child_id(state(), child_pid) 587 | 588 | @doc "Returns the pid of a child process with the given id." 589 | @spec child_pid(child_id) :: {:ok, pid} | :error 590 | def child_pid(child_id), do: State.child_pid(state(), child_id) 591 | 592 | @doc "Returns the meta associated with the given child id." 593 | @spec child_meta(child_ref) :: {:ok, child_meta} | :error 594 | def child_meta(child_ref), do: State.child_meta(state(), child_ref) 595 | 596 | @doc "Updates the meta of the given child process." 597 | @spec update_child_meta(child_ref, (child_meta -> child_meta)) :: :ok | :error 598 | def update_child_meta(child_ref, updater) do 599 | with {:ok, meta, new_state} <- State.update_child_meta(state(), child_ref, updater) do 600 | if State.registry?(new_state), do: Registry.update_meta(child_ref, meta) 601 | store(new_state) 602 | end 603 | end 604 | 605 | defp child_spec_to_map(mod) when is_atom(mod), do: child_spec_to_map({mod, []}) 606 | defp child_spec_to_map({mod, arg}), do: mod.child_spec(arg) 607 | defp child_spec_to_map(%{} = child_spec), do: child_spec 608 | defp child_spec_to_map(_other), do: raise("invalid child_spec") 609 | 610 | defp expand_child_spec(child_spec) do 611 | default_spec() 612 | |> Map.merge(default_type_and_shutdown_spec(Map.get(child_spec, :type, :worker))) 613 | |> Map.put(:modules, default_modules(child_spec.start)) 614 | |> Map.merge(child_spec) 615 | end 616 | 617 | defp default_spec do 618 | %{ 619 | id: nil, 620 | meta: nil, 621 | timeout: :infinity, 622 | restart: :permanent, 623 | max_restarts: :infinity, 624 | max_seconds: :timer.seconds(5), 625 | binds_to: [], 626 | shutdown_group: nil, 627 | ephemeral?: false 628 | } 629 | end 630 | 631 | defp default_type_and_shutdown_spec(:worker), do: %{type: :worker, shutdown: :timer.seconds(5)} 632 | defp default_type_and_shutdown_spec(:supervisor), do: %{type: :supervisor, shutdown: :infinity} 633 | 634 | defp default_modules({mod, _fun, _args}), do: [mod] 635 | 636 | defp default_modules(fun) when is_function(fun), 637 | do: [fun |> :erlang.fun_info() |> Keyword.fetch!(:module)] 638 | 639 | defp start_child_process(state, child_spec) do 640 | with :ok <- validate_spec(state, child_spec), 641 | do: start_validated_child(state, child_spec) 642 | end 643 | 644 | @doc false 645 | def start_validated_child(state, child_spec) do 646 | case invoke_start_function(child_spec.start) do 647 | :ignore -> 648 | {:ok, :undefined, nil} 649 | 650 | {:ok, pid} -> 651 | timer_ref = 652 | case child_spec.timeout do 653 | :infinity -> nil 654 | timeout -> Process.send_after(self(), {__MODULE__, :child_timeout, pid}, timeout) 655 | end 656 | 657 | if State.registry?(state), do: Registry.register(pid, child_spec) 658 | {:ok, pid, timer_ref} 659 | 660 | {:error, _} = error -> 661 | error 662 | end 663 | end 664 | 665 | defp validate_spec(state, child_spec) do 666 | with :ok <- check_id_type(child_spec.id), 667 | :ok <- check_id_uniqueness(state, child_spec.id), 668 | :ok <- check_missing_deps(state, child_spec), 669 | do: check_valid_shutdown_group(state, child_spec) 670 | end 671 | 672 | defp check_id_type(pid) when is_pid(pid), do: {:error, :invalid_child_id} 673 | defp check_id_type(_other), do: :ok 674 | 675 | defp check_id_uniqueness(state, id) do 676 | case State.child_pid(state, id) do 677 | {:ok, pid} -> {:error, {:already_started, pid}} 678 | :error -> :ok 679 | end 680 | end 681 | 682 | defp check_missing_deps(state, child_spec) do 683 | case Enum.reject(child_spec.binds_to, &State.child?(state, &1)) do 684 | [] -> :ok 685 | missing_deps -> {:error, {:missing_deps, missing_deps}} 686 | end 687 | end 688 | 689 | defp check_valid_shutdown_group(_state, %{shutdown_group: nil}), do: :ok 690 | 691 | defp check_valid_shutdown_group(state, child_spec) do 692 | state 693 | |> State.children_in_shutdown_group(child_spec.shutdown_group) 694 | |> Stream.map(& &1.spec) 695 | |> Stream.concat([child_spec]) 696 | |> Stream.uniq_by(&{&1.restart, &1.ephemeral?}) 697 | |> Enum.take(2) 698 | |> case do 699 | [_] -> :ok 700 | [_ | _] -> {:error, {:non_uniform_shutdown_group, child_spec.shutdown_group}} 701 | end 702 | end 703 | 704 | defp invoke_start_function({mod, fun, args}), do: apply(mod, fun, args) 705 | defp invoke_start_function(fun) when is_function(fun, 0), do: fun.() 706 | 707 | defp do_handle_message(state, {:EXIT, pid, reason}) do 708 | case State.child(state, pid) do 709 | {:ok, child} -> 710 | kill_timer(child.timer_ref, pid) 711 | handle_child_down(state, child, reason) 712 | 713 | :error -> 714 | nil 715 | end 716 | end 717 | 718 | defp do_handle_message(state, {__MODULE__, :child_timeout, pid}) do 719 | child = State.child!(state, pid) 720 | stop_child(child, :timeout) 721 | handle_child_down(state, child, :timeout) 722 | end 723 | 724 | defp do_handle_message(state, {__MODULE__, :resume_restart, stopped_children}) do 725 | state = 726 | stopped_children 727 | |> Enum.reduce( 728 | state, 729 | fn child, state -> 730 | if State.child?(state, child.key) do 731 | {:ok, _, state} = State.pop_child_with_bound_siblings(state, child.key) 732 | state 733 | else 734 | state 735 | end 736 | end 737 | ) 738 | |> Restart.perform(stopped_children) 739 | 740 | {:ignore, state} 741 | end 742 | 743 | defp do_handle_message(state, {__MODULE__, :stopped_children, children}), 744 | do: handle_stopped_children(state, hd(children), tl(children), hd(children).exit_reason) 745 | 746 | defp do_handle_message(state, {:"$gen_call", client, :which_children}) do 747 | GenServer.reply(client, supervisor_which_children()) 748 | {:ignore, state} 749 | end 750 | 751 | defp do_handle_message(state, {:"$gen_call", client, :count_children}) do 752 | GenServer.reply(client, supervisor_count_children()) 753 | {:ignore, state} 754 | end 755 | 756 | defp do_handle_message(state, {:"$gen_call", client, {:get_childspec, child_ref}}) do 757 | GenServer.reply(client, supervisor_get_childspec(child_ref)) 758 | {:ignore, state} 759 | end 760 | 761 | defp do_handle_message(_state, _other), do: nil 762 | 763 | defp handle_child_down(state, child, reason) do 764 | if State.registry?(state), do: Registry.unregister(child.pid) 765 | {:ok, children, state} = State.pop_child_with_bound_siblings(state, child.pid) 766 | child = Map.merge(child, %{record_restart?: true, exit_reason: reason}) 767 | 768 | bound_siblings = 769 | children 770 | |> Stream.reject(&(&1.key == child.key)) 771 | |> Enum.map(&Map.put(&1, :exit_reason, :shutdown)) 772 | 773 | stop_children(bound_siblings, :shutdown) 774 | 775 | handle_stopped_children(state, child, bound_siblings, reason) 776 | end 777 | 778 | defp handle_stopped_children(state, child, bound_siblings, reason) do 779 | cond do 780 | child.spec.restart == :permanent or (child.spec.restart == :transient and reason != :normal) -> 781 | state = Restart.perform(state, [child | bound_siblings]) 782 | {:ignore, state} 783 | 784 | child.spec.ephemeral? -> 785 | {{:stopped_children, stopped_children([child | bound_siblings])}, state} 786 | 787 | true -> 788 | # Non-ephemeral temporary or transient child stopped and won't be restarted. 789 | # We'll keep all non-ephemeral children with pid undefined. 790 | 791 | {ephemeral, non_ephemeral} = 792 | Enum.split_with([child | bound_siblings], & &1.spec.ephemeral?) 793 | 794 | state = Restart.perform(state, non_ephemeral, restart?: false) 795 | 796 | if ephemeral == [], 797 | do: {:ignore, state}, 798 | else: {{:stopped_children, stopped_children(ephemeral)}, state} 799 | end 800 | end 801 | 802 | @doc false 803 | def enqueue_resume_restart(children_to_restart) do 804 | unless Enum.empty?(children_to_restart) do 805 | # some children have not been started -> defer auto-restart to later moment 806 | send(self(), {__MODULE__, :resume_restart, children_to_restart}) 807 | end 808 | end 809 | 810 | @doc false 811 | def notify_stopped_children(children), 812 | do: send(self(), {__MODULE__, :stopped_children, children}) 813 | 814 | defp stopped_children(children), 815 | do: Enum.into(children, %{}, &{with(nil <- &1.spec.id, do: &1.pid), &1}) 816 | 817 | @doc false 818 | def give_up!(state, exit, error) do 819 | Logger.error(error) 820 | store(state) 821 | shutdown_all() 822 | exit(exit) 823 | end 824 | 825 | @doc false 826 | def stop_children(children, reason) do 827 | children 828 | |> Enum.sort_by(& &1.startup_index, :desc) 829 | |> Enum.each(&stop_child(&1, reason)) 830 | end 831 | 832 | defp stop_child(child, reason) do 833 | kill_timer(child.timer_ref, child.pid) 834 | exit_signal = if child.spec.shutdown == :brutal_kill, do: :kill, else: reason 835 | wait_time = if exit_signal == :kill, do: :infinity, else: child.spec.shutdown 836 | sync_stop_process(child.pid, exit_signal, wait_time) 837 | if State.registry?(state()), do: Registry.unregister(child.pid) 838 | end 839 | 840 | defp sync_stop_process(:undefined, _exit_signal, _wait_time), do: :ok 841 | 842 | defp sync_stop_process(pid, exit_signal, wait_time) do 843 | # Using monitors to detect process termination. In most cases links would suffice, but 844 | # monitors can help us deal with a child which unlinked itself from the parent. 845 | mref = Process.monitor(pid) 846 | Process.exit(pid, exit_signal) 847 | 848 | # TODO: we should check the reason and log an error if it's not `exit_signal` (or :killed in 849 | # the second receive). 850 | receive do 851 | {:DOWN, ^mref, :process, ^pid, _reason} -> :ok 852 | after 853 | wait_time -> 854 | Process.exit(pid, :kill) 855 | 856 | receive do 857 | {:DOWN, ^mref, :process, ^pid, _reason} -> :ok 858 | end 859 | end 860 | 861 | # cleanup the exit signal 862 | receive do 863 | {:EXIT, ^pid, _reason} -> :ok 864 | after 865 | # timeout 0 is fine b/c exit signals are sent before monitors 866 | 0 -> 867 | # if we end up here, the child has unlinked itself 868 | :ok 869 | end 870 | end 871 | 872 | defp kill_timer(nil, _pid), do: :ok 873 | 874 | defp kill_timer(timer_ref, pid) do 875 | Process.cancel_timer(timer_ref) 876 | 877 | receive do 878 | {Parent, :child_timeout, ^pid} -> :ok 879 | after 880 | 0 -> :ok 881 | end 882 | end 883 | 884 | @spec state() :: State.t() 885 | defp state() do 886 | state = Process.get(__MODULE__) 887 | if is_nil(state), do: raise("Parent is not initialized") 888 | state 889 | end 890 | 891 | @spec store(State.t()) :: :ok 892 | defp store(state) do 893 | Process.put(__MODULE__, state) 894 | :ok 895 | end 896 | end 897 | -------------------------------------------------------------------------------- /lib/parent/application.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.Application do 2 | @moduledoc false 3 | use Application 4 | 5 | @impl Application 6 | def start(_type, _args) do 7 | Parent.Supervisor.start_link( 8 | [Parent.MetaRegistry], 9 | name: __MODULE__ 10 | ) 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/parent/client.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.Client do 2 | @moduledoc """ 3 | Functions for interacting with parent's children from other processes. 4 | 5 | All of these functions issue a call to the parent process. Therefore, they can't be used from 6 | inside the parent process. Use functions from the `Parent` module instead to interact with the 7 | children from within the process. 8 | 9 | Likewise these functions can't be invoked inside the child process during its initialization. 10 | Defer interacting with the parent to `c:GenServer.handle_continue/2`, or if you're using another 11 | behaviour which doesn't support such callback, send yourself a message to safely do the post-init 12 | interaction with the parent. 13 | 14 | If parent is configured with the `registry?: true` option, some query functions, such as 15 | `child_pid/2` will perform an ETS lookup instead of issuing a call, so the caveats above won't 16 | apply. 17 | """ 18 | alias Parent.Registry 19 | 20 | @doc """ 21 | Client interface to `Parent.children/0`. 22 | 23 | If the parent is a registry, the result will be obtained from the ETS table. 24 | """ 25 | @spec children(GenServer.server()) :: [Parent.child()] 26 | def children(parent) do 27 | case Registry.table(parent) do 28 | {:ok, table} -> Registry.children(table) 29 | :error -> call(parent, :children) 30 | end 31 | end 32 | 33 | @doc """ 34 | Client interface to `Parent.child_pid/1`. 35 | 36 | If the parent is a registry, the result will be obtained from the ETS table. 37 | """ 38 | @spec child_pid(GenServer.server(), Parent.child_id()) :: {:ok, pid} | :error 39 | def child_pid(parent, child_id) do 40 | case Registry.table(parent) do 41 | {:ok, table} -> Registry.child_pid(table, child_id) 42 | :error -> call(parent, :child_pid, [child_id]) 43 | end 44 | end 45 | 46 | @doc """ 47 | Client interface to `Parent.child_meta/1`. 48 | 49 | If the parent is a registry, the result will be obtained from the ETS table. 50 | """ 51 | @spec child_meta(GenServer.server(), Parent.child_ref()) :: {:ok, Parent.child_meta()} | :error 52 | def child_meta(parent, child_ref) do 53 | case Registry.table(parent) do 54 | {:ok, table} -> Registry.child_meta(table, child_ref) 55 | :error -> call(parent, :child_meta, [child_ref]) 56 | end 57 | end 58 | 59 | @doc "Client interface to `Parent.start_child/2`." 60 | @spec start_child(GenServer.server(), Parent.start_spec(), Keyword.t()) :: 61 | Parent.on_start_child() 62 | def start_child(parent, child_spec, overrides \\ []), 63 | do: call(parent, :start_child, [child_spec, overrides], :infinity) 64 | 65 | @doc "Client interface to `Parent.shutdown_child/1`." 66 | @spec shutdown_child(GenServer.server(), Parent.child_ref()) :: 67 | {:ok, Parent.stopped_children()} | :error 68 | def shutdown_child(parent, child_ref), do: call(parent, :shutdown_child, [child_ref], :infinity) 69 | 70 | @doc "Client interface to `Parent.restart_child/1`." 71 | @spec restart_child(GenServer.server(), Parent.child_ref()) :: :ok | :error 72 | def restart_child(parent, child_ref), 73 | do: call(parent, :restart_child, [child_ref], :infinity) 74 | 75 | @doc "Client interface to `Parent.shutdown_all/1`." 76 | @spec shutdown_all(GenServer.server(), any) :: Parent.stopped_children() 77 | def shutdown_all(server, reason \\ :shutdown), 78 | do: call(server, :shutdown_all, [reason], :infinity) 79 | 80 | @doc "Client interface to `Parent.return_children/1`." 81 | @spec return_children(GenServer.server(), Parent.stopped_children()) :: :ok 82 | def return_children(parent, stopped_children), 83 | do: call(parent, :return_children, [stopped_children], :infinity) 84 | 85 | @doc "Client interface to `Parent.update_child_meta/2`." 86 | @spec update_child_meta( 87 | GenServer.server(), 88 | Parent.child_id(), 89 | (Parent.child_meta() -> Parent.child_meta()) 90 | ) :: :ok | :error 91 | def update_child_meta(parent, child_ref, updater), 92 | do: call(parent, :update_child_meta, [child_ref, updater], :infinity) 93 | 94 | @doc false 95 | def whereis_name({parent, child_id}) do 96 | case child_pid(parent, child_id) do 97 | {:ok, pid} -> pid 98 | :error -> :undefined 99 | end 100 | end 101 | 102 | defp call(server, function, args \\ [], timeout \\ 5000) 103 | when (is_integer(timeout) and timeout >= 0) or timeout == :infinity do 104 | # This is the custom implementation of a call. We're not using standard GenServer calls to 105 | # ensure that this call won't end up in some custom behaviour's handle_call. 106 | request = {__MODULE__, function, args} 107 | 108 | case GenServer.whereis(server) do 109 | nil -> 110 | exit({:noproc, {__MODULE__, :call, [server, request, timeout]}}) 111 | 112 | pid when pid == self() -> 113 | exit({:calling_self, {__MODULE__, :call, [server, request, timeout]}}) 114 | 115 | pid -> 116 | try do 117 | :gen.call(pid, :"$parent_call", request, timeout) 118 | catch 119 | :exit, reason -> 120 | exit({reason, {__MODULE__, :call, [server, request, timeout]}}) 121 | else 122 | {:ok, res} -> res 123 | end 124 | end 125 | end 126 | end 127 | -------------------------------------------------------------------------------- /lib/parent/gen_server.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.GenServer do 2 | @moduledoc """ 3 | GenServer with parenting capabilities powered by `Parent`. 4 | 5 | This behaviour can be useful in situations where `Parent.Supervisor` won't suffice. 6 | 7 | ## Example 8 | 9 | The following example is roughly similar to a standard 10 | [callback-based Supervisor](https://hexdocs.pm/elixir/Supervisor.html#module-module-based-supervisors): 11 | 12 | defmodule MyApp.Supervisor do 13 | # Automatically defines child_spec/1 14 | use Parent.GenServer 15 | 16 | def start_link(init_arg), 17 | do: Parent.GenServer.start_link(__MODULE__, init_arg, name: __MODULE__) 18 | 19 | @impl GenServer 20 | def init(_init_arg) do 21 | Parent.start_all_children!(children) 22 | {:ok, initial_state} 23 | end 24 | end 25 | 26 | The expression `use Parent.GenServer` will also inject `use GenServer` into your code. Your 27 | parent process is a GenServer, and this behaviour doesn't try to hide it. Except when starting 28 | the process, you work with the parent exactly as you work with any GenServer, using the same 29 | functions, such as `GenServer.call/3`, and providing the same callbacks, such as `init/1`, or 30 | `handle_call/3`. 31 | 32 | ## Interacting with the parent from the outside 33 | 34 | You can issue regular `GenServer` calls and casts, and send messages to the parent, which can 35 | be handled by corresponding `GenServer` callbacks. In addition, you can use functions from 36 | the `Parent.Client` module to manipulate or query the parent state from other processes. As a 37 | good practice, it's advised to wrap such invocations in the module which implements 38 | `Parent.GenServer`. 39 | 40 | ## Interacting with children inside the parent 41 | 42 | From within the parent process, you can interact with the child processes using functions from 43 | the `Parent` module. All child processes should be started using `Parent` functions, such as 44 | `Parent.start_child/2`, because otherwise `Parent` won't be aware of these processes and won't 45 | be able to fulfill its guarantees. 46 | 47 | Note that you can start children from any callback, not just during `init/1`. In addition, you 48 | don't need to start all children at once. Therefore, `Parent.GenServer` can prove useful when 49 | you need to make some runtime decisions: 50 | 51 | {:ok, child1} = Parent.start_child(child1_spec) 52 | 53 | if some_condition_met?, 54 | do: Parent.start_child(child2_spec) 55 | 56 | Parent.start_child(child3_spec) 57 | 58 | However, bear in mind that this code won't be executed again if the processes are restarted. 59 | 60 | ## Handling child termination 61 | 62 | If a child process terminates and isn't restarted, the `c:handle_stopped_children/2` callback is 63 | invoked. The default implementation does nothing. 64 | 65 | The following example uses `c:handle_stopped_children/2` to start a child task and report if it 66 | it crashes: 67 | 68 | defmodule MyJob do 69 | use Parent.GenServer, restart: :temporary 70 | 71 | def start_link(arg), do: Parent.GenServer.start_link(__MODULE__, arg) 72 | 73 | @impl GenServer 74 | def init(_) do 75 | {:ok, _} = Parent.start_child(%{ 76 | id: :job, 77 | start: {Task, :start_link, [fn -> job(arg) end]}, 78 | restart: :temporary, 79 | 80 | # handle_stopped_children won't be invoked without this 81 | ephemeral?: true 82 | }) 83 | {:ok, nil} 84 | end 85 | 86 | @impl Parent.GenServer 87 | def handle_stopped_children(%{job: info}, state) do 88 | if info.reason != :normal do 89 | # report job failure 90 | end 91 | 92 | {:stop, reason, state} 93 | end 94 | end 95 | 96 | `handle_stopped_children` can be useful to implement arbitrary custom behaviour, such as 97 | restarting after a delay, and using incremental backoff periods between two consecutive starts. 98 | 99 | For example, this is how you could introduce a delay between two consecutive starts: 100 | 101 | def handle_stopped_children(stopped_children, state) do 102 | Process.send_after(self, {:restart, stopped_children}, delay) 103 | {:noreply, state} 104 | end 105 | 106 | def handle_info({:restart, stopped_children}, state) do 107 | Parent.return_children(stopped_children) 108 | {:noreply, state} 109 | end 110 | 111 | Keep in mind that `handle_stopped_children` is only invoked if the child crashed on its own, 112 | and if it's not going to be restarted. 113 | 114 | If the child was explicitly stopped via a `Parent` function, such as `Parent.shutdown_child/1`, 115 | this callback will not be invoked. The same holds for `Parent.Client` functions. If you want 116 | to unconditionally react to a termination of a child process, setup a monitor with `Process.monitor` 117 | and add a corresponding `handle_info` clause. 118 | 119 | If the child was taken down because its lifecycle is bound to some other process, the 120 | corresponding `handle_stopped_children` won't be invoked. For example, if process A is bound to 121 | process B, and process B crashes, only one `handle_stopped_children` will be invoked (for the 122 | crash of process B). However, the corresponding `info` will contain the list of all associated 123 | siblings that have been taken down, and `stopped_children` will include information necessary to 124 | restart all of these siblings. Refer to `Parent` documentation for details on lifecycles binding. 125 | 126 | ## Parent termination 127 | 128 | The behaviour takes down the child processes before it terminates, to ensure that no child 129 | process is running after the parent has terminated. The children are terminated synchronously, 130 | one by one, in the reverse start order. 131 | 132 | The termination of the children is done after the `terminate/1` callback returns. Therefore in 133 | `terminate/1` the child processes are still running, and you can interact with them, and even 134 | start additional children. 135 | 136 | ## Caveats 137 | 138 | Like any other `Parent`-based process, `Parent.GenServer` traps exits and uses the `:infinity` 139 | shutdown strategy. As a result, a parent process which blocks for a long time (e.g. because its 140 | communicating with a remote service) won't be able to handle child termination, and your 141 | fault-tolerance might be badly affected. In addition, a blocking parent might completely paralyze 142 | the system (or a subtree) shutdown. Setting a shutdown strategy to a finite time is a hacky 143 | workaround that will lead to lingering orphan processes, and might cause some strange race 144 | conditions which will be very hard to debug. 145 | 146 | Therefore, be wary of having too much logic inside a parent process. Try to push as much 147 | responsibilities as possible to other processes, such as children or siblings, and use parent 148 | only for coordination and reporting tasks. 149 | 150 | Finally, since parent trap exits, it's possible to receive an occasional stray `:EXIT` message 151 | if the child crashes during its initialization. 152 | 153 | By default `use Parent.GenServer` receives such messages and ignores them. If you're implementing 154 | your own `handle_info`, make sure to include a clause for `:EXIT` messages: 155 | 156 | def handle_info({:EXIT, _pid, _reason}, state), do: {:noreply, state} 157 | """ 158 | use GenServer 159 | 160 | @type state :: term 161 | @type options :: [Parent.option() | GenServer.option()] 162 | 163 | @doc """ 164 | Invoked when some children have terminated. 165 | 166 | The `info` map will contain all the children which have been stopped together. For example, 167 | if child A is bound to child B, and child B terminates, parent will also terminate the child 168 | A. In this case, `handle_stopped_children` is invoked only once, with the `info` map containing 169 | entries for both children. 170 | 171 | This callback will not be invoked in the following cases: 172 | 173 | - a child is terminated by invoking `Parent` functions such as `Parent.shutdown_child/1` 174 | - a child is restarted 175 | - a child is not ephemeral (see "Ephemeral children" in `Parent` for details) 176 | """ 177 | @callback handle_stopped_children(info :: Parent.stopped_children(), state) :: 178 | {:noreply, new_state} 179 | | {:noreply, new_state, timeout | :hibernate} 180 | | {:stop, reason :: term, new_state} 181 | when new_state: state 182 | 183 | @doc "Starts the parent process." 184 | @spec start_link(module, arg :: term, options) :: GenServer.on_start() 185 | def start_link(module, arg, options \\ []) do 186 | {parent_opts, gen_server_opts} = 187 | Keyword.split(options, ~w/max_restarts max_seconds registry?/a) 188 | 189 | GenServer.start_link(__MODULE__, {module, arg, parent_opts}, gen_server_opts) 190 | end 191 | 192 | @impl GenServer 193 | def init({callback, arg, options}) do 194 | # needed to simulate a supervisor 195 | Process.put(:"$initial_call", {:supervisor, callback, 1}) 196 | 197 | Process.put({__MODULE__, :callback}, callback) 198 | Parent.initialize(options) 199 | invoke_callback(:init, [arg]) 200 | end 201 | 202 | @impl GenServer 203 | def handle_info(message, state) do 204 | case Parent.handle_message(message) do 205 | {:stopped_children, info} -> 206 | invoke_callback(:handle_stopped_children, [info, state]) 207 | 208 | :ignore -> 209 | {:noreply, state} 210 | 211 | nil -> 212 | invoke_callback(:handle_info, [message, state]) 213 | end 214 | end 215 | 216 | @impl GenServer 217 | def handle_call(:which_children, _from, state), 218 | do: {:reply, Parent.supervisor_which_children(), state} 219 | 220 | def handle_call(:count_children, _from, state), 221 | do: {:reply, Parent.supervisor_count_children(), state} 222 | 223 | def handle_call({:get_childspec, child_id_or_pid}, _from, state), 224 | do: {:reply, Parent.supervisor_get_childspec(child_id_or_pid), state} 225 | 226 | def handle_call(message, from, state), do: invoke_callback(:handle_call, [message, from, state]) 227 | 228 | @impl GenServer 229 | def handle_cast(message, state), do: invoke_callback(:handle_cast, [message, state]) 230 | 231 | @impl GenServer 232 | # Needed to support `:supervisor.get_callback_module` 233 | def format_status(:normal, [_pdict, state]) do 234 | [ 235 | data: [{~c"State", state}], 236 | supervisor: [{~c"Callback", Process.get({__MODULE__, :callback})}] 237 | ] 238 | end 239 | 240 | def format_status(:terminate, pdict_and_state), 241 | do: invoke_callback(:format_status, [:terminate, pdict_and_state]) 242 | 243 | @impl GenServer 244 | def code_change(old_vsn, state, extra), 245 | do: invoke_callback(:code_change, [old_vsn, state, extra]) 246 | 247 | @impl GenServer 248 | def terminate(reason, state) do 249 | invoke_callback(:terminate, [reason, state]) 250 | after 251 | Parent.shutdown_all(reason) 252 | end 253 | 254 | unless Version.compare(System.version(), "1.7.0") == :lt do 255 | @impl GenServer 256 | def handle_continue(continue, state), do: invoke_callback(:handle_continue, [continue, state]) 257 | end 258 | 259 | defp invoke_callback(fun, arg), do: apply(Process.get({__MODULE__, :callback}), fun, arg) 260 | 261 | @doc false 262 | def child_spec(_arg) do 263 | raise("#{__MODULE__} can't be used in a child spec.") 264 | end 265 | 266 | @doc false 267 | defmacro __using__(opts) do 268 | quote location: :keep, bind_quoted: [opts: opts, behaviour: __MODULE__] do 269 | use GenServer, opts 270 | @behaviour behaviour 271 | 272 | @doc """ 273 | Returns a specification to start this module under a supervisor. 274 | See `Supervisor`. 275 | """ 276 | def child_spec(arg) do 277 | default = Parent.parent_spec(id: __MODULE__, start: {__MODULE__, :start_link, [arg]}) 278 | Supervisor.child_spec(default, unquote(Macro.escape(opts))) 279 | end 280 | 281 | @impl behaviour 282 | def handle_stopped_children(_info, state), do: {:noreply, state} 283 | 284 | @impl GenServer 285 | # automatic ignoring of `:EXIT` messages which may occur if a child crashes during its `start_link` 286 | def handle_info({:EXIT, _pid, _reason}, state), do: {:noreply, state} 287 | 288 | def handle_info(msg, state) do 289 | # copied over from `GenServer`, b/c calling `super` is not allowed 290 | proc = 291 | case Process.info(self(), :registered_name) do 292 | {_, []} -> self() 293 | {_, name} -> name 294 | end 295 | 296 | :logger.error( 297 | %{ 298 | label: {GenServer, :no_handle_info}, 299 | report: %{ 300 | module: __MODULE__, 301 | message: msg, 302 | name: proc 303 | } 304 | }, 305 | %{ 306 | domain: [:otp, :elixir], 307 | error_logger: %{tag: :error_msg}, 308 | report_cb: &GenServer.format_report/1 309 | } 310 | ) 311 | 312 | {:noreply, state} 313 | end 314 | 315 | @impl GenServer 316 | def code_change(_old, state, _extra), do: {:ok, state} 317 | 318 | defoverridable handle_info: 2, handle_stopped_children: 2, child_spec: 1, code_change: 3 319 | end 320 | end 321 | end 322 | -------------------------------------------------------------------------------- /lib/parent/meta_registry.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.MetaRegistry do 2 | @moduledoc false 3 | 4 | def register_table!(table) do 5 | {:ok, _} = Registry.register(__MODULE__, self(), table) 6 | :ok 7 | end 8 | 9 | def table(parent_pid) do 10 | case Registry.lookup(__MODULE__, parent_pid) do 11 | [{^parent_pid, table}] -> {:ok, table} 12 | [] -> :error 13 | end 14 | end 15 | 16 | def table!(parent_pid) do 17 | {:ok, table} = table(parent_pid) 18 | table 19 | end 20 | 21 | def child_spec(_opts), do: Registry.child_spec(keys: :unique, name: __MODULE__) 22 | end 23 | -------------------------------------------------------------------------------- /lib/parent/registry.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.Registry do 2 | @moduledoc false 3 | alias Parent.MetaRegistry 4 | 5 | @opaque table :: :ets.tid() 6 | 7 | @spec initialize :: :ok 8 | def initialize do 9 | MetaRegistry.register_table!( 10 | :ets.new(__MODULE__, [ 11 | :protected, 12 | :set, 13 | read_concurrency: true, 14 | write_concurrency: true 15 | ]) 16 | ) 17 | end 18 | 19 | @spec table(GenServer.server()) :: {:ok, table} | :error 20 | def table(parent), do: MetaRegistry.table(GenServer.whereis(parent)) 21 | 22 | @spec register(pid, Parent.child_spec()) :: :ok 23 | def register(child_pid, child_spec) do 24 | table = MetaRegistry.table!(self()) 25 | key = {:id, with(nil <- child_spec.id, do: child_pid)} 26 | main_entry = {key, child_pid, child_spec.meta} 27 | entries = if is_nil(child_spec.id), do: [main_entry], else: [{child_pid, key}, main_entry] 28 | :ets.insert(table, entries) 29 | :ok 30 | end 31 | 32 | @spec unregister(pid) :: :ok 33 | def unregister(child_pid) do 34 | table = MetaRegistry.table!(self()) 35 | :ets.delete(table, key(table, child_pid)) 36 | :ets.delete(table, child_pid) 37 | :ok 38 | end 39 | 40 | @spec update_meta(Parent.child_ref(), Parent.child_meta()) :: :ok 41 | def update_meta(child_ref, meta) do 42 | table = MetaRegistry.table!(self()) 43 | key = key(table, child_ref) 44 | true = :ets.update_element(table, key, {3, meta}) 45 | :ok 46 | end 47 | 48 | @spec children(table) :: [Parent.child()] 49 | def children(table) do 50 | table 51 | |> :ets.match({{:id, :"$1"}, :"$2", :"$3"}) 52 | |> Enum.map(fn [id, pid, meta] -> %{id: id, meta: meta, pid: pid} end) 53 | end 54 | 55 | @spec child_pid(table, Parent.child_id()) :: {:ok, pid} | :error 56 | def child_pid(table, child_id) do 57 | case :ets.match(table, {{:id, child_id}, :"$1", :_}) do 58 | [[pid]] -> {:ok, pid} 59 | [] -> :error 60 | end 61 | end 62 | 63 | @spec child_meta(table, Parent.child_ref()) :: {:ok, Parent.child_meta()} | :error 64 | def child_meta(table, child_ref) do 65 | case :ets.match(table, {key(table, child_ref), :_, :"$1"}) do 66 | [[meta]] -> {:ok, meta} 67 | [] -> :error 68 | end 69 | end 70 | 71 | defp key(table, child_ref) do 72 | if is_pid(child_ref) do 73 | case :ets.match(table, {child_ref, :"$1"}) do 74 | [[key]] -> key 75 | [] -> {:id, child_ref} 76 | end 77 | else 78 | {:id, child_ref} 79 | end 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /lib/parent/restart.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.Restart do 2 | @moduledoc false 3 | alias Parent.State 4 | 5 | # core logic of all restarts, both automatic and manual 6 | def perform(state, children, opts \\ []) do 7 | # Reject already started children (idempotence) 8 | to_start = Enum.reject(children, &State.child?(state, &1.spec.id)) 9 | 10 | {to_start, state} = 11 | if Keyword.get(opts, :restart?, true), 12 | do: record_restart(state, to_start), 13 | else: {to_start, state} 14 | 15 | # First we'll return all entries to the parent without starting the processes. This will 16 | # simplify handling a failed start of some child. 17 | {children, state} = return_children_entries(state, to_start) 18 | 19 | # Now we can proceed to restart the children if needed 20 | if Keyword.get(opts, :restart?, true) do 21 | {not_started, state} = restart_children(state, children) 22 | Enum.reduce(not_started, state, &handle_not_started(&2, &1)) 23 | else 24 | state 25 | end 26 | end 27 | 28 | defp record_restart(state, children) do 29 | Enum.flat_map_reduce( 30 | children, 31 | state, 32 | fn 33 | %{record_restart?: true} = child, state -> 34 | {child, state} = record_restart!(state, child) 35 | {[child], state} 36 | 37 | child, state -> 38 | {[child], state} 39 | end 40 | ) 41 | end 42 | 43 | defp record_restart!(state, child) do 44 | with {:ok, state} <- State.record_restart(state), 45 | {:ok, restart_counter} <- Parent.RestartCounter.record_restart(child.restart_counter) do 46 | {%{child | restart_counter: restart_counter}, state} 47 | else 48 | _ -> 49 | Parent.give_up!(state, :too_many_restarts, "Too many restarts in parent process.") 50 | end 51 | end 52 | 53 | defp return_children_entries(state, children) do 54 | {state, _keys, children} = 55 | children 56 | |> Enum.sort_by(& &1.startup_index) 57 | |> Enum.reduce( 58 | {state, %{}, []}, 59 | fn child, {state, keys, children} -> 60 | # if a child binds to a sibling via a pid we need to update the bindings to the correct key 61 | child = 62 | update_in(child.spec.binds_to, fn binds_to -> 63 | Enum.map(binds_to, &Map.get(keys, &1, &1)) 64 | end) 65 | 66 | {key, state} = 67 | State.reregister_child( 68 | state, 69 | child |> Map.delete(:exit_reason) |> Map.delete(:record_restart?), 70 | :undefined, 71 | nil 72 | ) 73 | 74 | keys = Map.put(keys, child.key, key) 75 | child = Map.merge(child, State.child!(state, key)) 76 | {state, keys, [child | children]} 77 | end 78 | ) 79 | 80 | {Enum.reverse(children), state} 81 | end 82 | 83 | defp restart_children(state, children) do 84 | {stopped, state} = 85 | Enum.reduce( 86 | children, 87 | {[], state}, 88 | fn child, {stopped, state} -> 89 | {new_stopped, state} = 90 | if State.child?(state, child.key), 91 | do: restart_child(state, child), 92 | # A child might not be in a state if it was removed because its dependency failed to start 93 | else: {[], state} 94 | 95 | {new_stopped ++ stopped, state} 96 | end 97 | ) 98 | 99 | {Enum.reverse(stopped), state} 100 | end 101 | 102 | defp restart_child(state, child) do 103 | case Parent.start_validated_child(state, child.spec) do 104 | {:ok, pid, timer_ref} -> 105 | {[], State.set_child_process(state, child.key, pid, timer_ref)} 106 | 107 | {:error, start_error} -> 108 | {:ok, children, state} = State.pop_child_with_bound_siblings(state, child.key) 109 | Parent.stop_children(children, :shutdown) 110 | {[{child.key, start_error, children}], state} 111 | end 112 | end 113 | 114 | defp handle_not_started(state, {key, error, children}) do 115 | {[failed_child], bound_siblings} = Enum.split_with(children, &(&1.key == key)) 116 | failed_child = Map.merge(failed_child, %{exit_reason: error, record_restart?: true}) 117 | 118 | cond do 119 | failed_child.spec.restart != :temporary -> 120 | # Failed start of a non-temporary -> auto restart 121 | {children, state} = return_children_entries(state, [failed_child | bound_siblings]) 122 | Parent.enqueue_resume_restart(children) 123 | state 124 | 125 | failed_child.spec.ephemeral? -> 126 | # Failed start of a temporary ephemeral child -> notify the client about stopped children 127 | Parent.notify_stopped_children([failed_child | bound_siblings]) 128 | state 129 | 130 | true -> 131 | # Failed start of a temporary non-ephemeral child 132 | 133 | {ephemeral, non_ephemeral} = 134 | Enum.split_with([failed_child | bound_siblings], & &1.spec.ephemeral?) 135 | 136 | # non-ephemeral processes in the group are kept with their pid set to `:undefined` 137 | {_children, state} = return_children_entries(state, non_ephemeral) 138 | 139 | # notify the client about stopped ephemeral children 140 | Parent.notify_stopped_children(ephemeral) 141 | state 142 | end 143 | end 144 | end 145 | -------------------------------------------------------------------------------- /lib/parent/restart_counter.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.RestartCounter do 2 | @moduledoc false 3 | 4 | @opaque t :: 5 | %{ 6 | max_restarts: pos_integer, 7 | interval: pos_integer, 8 | recorded: :queue.queue(pos_integer), 9 | size: non_neg_integer 10 | } 11 | | nil 12 | 13 | @time_provider if Mix.env() == :test, 14 | do: __MODULE__.TimeProvider.Test, 15 | else: __MODULE__.TimeProvider.Monotonic 16 | 17 | @spec new(:infinity | non_neg_integer, pos_integer) :: t 18 | def new(:infinity, _max_seconds), do: nil 19 | 20 | def new(max_restarts, max_seconds) do 21 | %{ 22 | max_restarts: max_restarts, 23 | interval: :timer.seconds(max_seconds), 24 | recorded: :queue.new(), 25 | size: 0 26 | } 27 | end 28 | 29 | @spec record_restart(t) :: {:ok, t} | :error 30 | def record_restart(nil), do: {:ok, nil} 31 | 32 | def record_restart(state) do 33 | now = @time_provider.now_ms() 34 | 35 | state = 36 | state 37 | |> purge_old_records(now) 38 | |> Map.update!(:recorded, &:queue.in(now, &1)) 39 | |> Map.update!(:size, &(&1 + 1)) 40 | 41 | if state.size > state.max_restarts, do: :error, else: {:ok, state} 42 | end 43 | 44 | defp purge_old_records(%{interval: interval} = state, now) do 45 | state 46 | |> Stream.iterate(fn state -> 47 | case :queue.out(state.recorded) do 48 | {{:value, time}, recorded} when time + interval - 1 < now -> 49 | %{state | recorded: recorded, size: state.size - 1} 50 | 51 | _other -> 52 | nil 53 | end 54 | end) 55 | |> Stream.take_while(&(not is_nil(&1))) 56 | |> Enum.at(-1) 57 | end 58 | 59 | defmodule TimeProvider do 60 | @moduledoc false 61 | @callback now_ms :: integer 62 | end 63 | 64 | defmodule TimeProvider.Monotonic do 65 | @moduledoc false 66 | @behaviour TimeProvider 67 | 68 | @impl TimeProvider 69 | def now_ms, do: :erlang.monotonic_time(:millisecond) 70 | end 71 | 72 | if Mix.env() == :test do 73 | Mox.defmock(TimeProvider.Test, for: TimeProvider) 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/parent/state.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.State do 2 | @moduledoc false 3 | 4 | alias Parent.RestartCounter 5 | 6 | @opaque t :: %{ 7 | opts: Parent.opts(), 8 | id_to_key: %{Parent.child_id() => key}, 9 | children: %{key => child()}, 10 | startup_index: non_neg_integer, 11 | restart_counter: RestartCounter.t(), 12 | registry?: boolean, 13 | bound: %{key => [key]}, 14 | shutdown_groups: %{Parent.shutdown_group() => [key]} 15 | } 16 | 17 | @opaque key :: pid | {__MODULE__, reference()} 18 | 19 | @type child :: %{ 20 | spec: Parent.child_spec(), 21 | pid: pid | :undefined, 22 | timer_ref: reference() | nil, 23 | startup_index: non_neg_integer(), 24 | restart_counter: RestartCounter.t(), 25 | meta: Parent.child_meta(), 26 | key: key 27 | } 28 | 29 | @spec initialize(Parent.opts()) :: t 30 | def initialize(opts) do 31 | opts = Keyword.merge([max_restarts: 3, max_seconds: 5, registry?: false], opts) 32 | 33 | %{ 34 | opts: opts, 35 | id_to_key: %{}, 36 | children: %{}, 37 | startup_index: 0, 38 | restart_counter: RestartCounter.new(opts[:max_restarts], opts[:max_seconds]), 39 | registry?: Keyword.fetch!(opts, :registry?), 40 | bound: %{}, 41 | shutdown_groups: %{} 42 | } 43 | end 44 | 45 | @spec reinitialize(t) :: t 46 | def reinitialize(state), do: %{initialize(state.opts) | startup_index: state.startup_index} 47 | 48 | @spec registry?(t) :: boolean 49 | def registry?(state), do: state.registry? 50 | 51 | @spec register_child(t, pid | :undefined, Parent.child_spec(), reference | nil) :: t 52 | def register_child(state, pid, spec, timer_ref) do 53 | key = child_key(%{}, pid) 54 | 55 | false = Map.has_key?(state.children, pid) 56 | 57 | child = %{ 58 | spec: spec, 59 | pid: pid, 60 | timer_ref: timer_ref, 61 | startup_index: state.startup_index, 62 | restart_counter: RestartCounter.new(spec.max_restarts, spec.max_seconds), 63 | meta: spec.meta, 64 | key: key 65 | } 66 | 67 | state = 68 | if is_nil(spec.id), 69 | do: state, 70 | else: Map.update!(state, :id_to_key, &Map.put(&1, spec.id, key)) 71 | 72 | state 73 | |> Map.update!(:children, &Map.put(&1, key, child)) 74 | |> Map.update!(:startup_index, &(&1 + 1)) 75 | |> update_bindings(key, spec) 76 | |> update_shutdown_groups(key, spec) 77 | end 78 | 79 | @spec reregister_child(t, child, pid | :undefined, reference | nil) :: {key, t} 80 | def reregister_child(state, child, pid, timer_ref) do 81 | key = child_key(child, pid) 82 | 83 | false = Map.has_key?(state.children, pid) 84 | 85 | child = %{child | pid: pid, timer_ref: timer_ref, meta: child.spec.meta, key: key} 86 | 87 | state = 88 | if is_nil(child.spec.id), 89 | do: state, 90 | else: Map.update!(state, :id_to_key, &Map.put(&1, child.spec.id, key)) 91 | 92 | {key, 93 | state 94 | |> Map.update!(:children, &Map.put(&1, child.key, child)) 95 | |> update_bindings(key, child.spec) 96 | |> update_shutdown_groups(key, child.spec)} 97 | end 98 | 99 | @spec children(t) :: [child()] 100 | def children(state), do: Map.values(state.children) 101 | 102 | @spec children_in_shutdown_group(t, Parent.shutdown_group()) :: [child] 103 | def children_in_shutdown_group(state, shutdown_group), 104 | do: Map.get(state.shutdown_groups, shutdown_group, []) |> Enum.map(&child!(state, &1)) 105 | 106 | @spec record_restart(t) :: {:ok, t} | :error 107 | def record_restart(state) do 108 | with {:ok, counter} <- RestartCounter.record_restart(state.restart_counter), 109 | do: {:ok, %{state | restart_counter: counter}} 110 | end 111 | 112 | @spec pop_child_with_bound_siblings(t, Parent.child_ref()) :: {:ok, [child], t} | :error 113 | def pop_child_with_bound_siblings(state, child_ref) do 114 | with {:ok, child} <- child(state, child_ref) do 115 | {children, state} = pop_child_and_bound_children(state, child.key) 116 | {:ok, children, state} 117 | end 118 | end 119 | 120 | @spec num_children(t) :: non_neg_integer 121 | def num_children(state), do: Enum.count(state.children) 122 | 123 | @spec child(t, Parent.child_ref() | key) :: {:ok, child} | :error 124 | def child(_state, nil), do: :error 125 | def child(state, {__MODULE__, _ref} = key), do: Map.fetch(state.children, key) 126 | def child(state, pid) when is_pid(pid), do: Map.fetch(state.children, pid) 127 | 128 | def child(state, id), 129 | do: with({:ok, key} <- Map.fetch(state.id_to_key, id), do: child(state, key)) 130 | 131 | @spec child?(t, Parent.child_ref()) :: boolean() 132 | def child?(state, child_ref), do: match?({:ok, _child}, child(state, child_ref)) 133 | 134 | @spec child!(t, Parent.child_ref()) :: child 135 | def child!(state, child_ref) do 136 | {:ok, child} = child(state, child_ref) 137 | child 138 | end 139 | 140 | @spec child_id(t, pid) :: {:ok, Parent.child_id()} | :error 141 | def child_id(state, pid) do 142 | with {:ok, child} <- child(state, pid), do: {:ok, child.spec.id} 143 | end 144 | 145 | @spec child_pid(t, Parent.child_id()) :: {:ok, pid} | :error 146 | def child_pid(state, id), do: with({:ok, child} <- child(state, id), do: {:ok, child.pid}) 147 | 148 | @spec child_meta(t, Parent.child_ref()) :: {:ok, Parent.child_meta()} | :error 149 | def child_meta(state, child_ref) do 150 | with {:ok, child} <- child(state, child_ref), do: {:ok, child.meta} 151 | end 152 | 153 | @spec update_child_meta(t, Parent.child_ref(), (Parent.child_meta() -> Parent.child_meta())) :: 154 | {:ok, Parent.child_meta(), t} | :error 155 | def update_child_meta(state, child_ref, updater) do 156 | with {:ok, child, state} <- update(state, child_ref, &update_in(&1.meta, updater)), 157 | do: {:ok, child.meta, state} 158 | end 159 | 160 | @spec set_child_process(t, Parent.child_ref(), pid | :undefined, reference) :: t 161 | def set_child_process(state, child_ref, new_pid, new_timer_ref) do 162 | child = child!(state, child_ref) 163 | {:ok, children, state} = pop_child_with_bound_siblings(state, child.key) 164 | 165 | new_key = if is_pid(new_pid), do: new_pid, else: child.key 166 | 167 | children 168 | |> Enum.sort_by(& &1.startup_index) 169 | |> Enum.reduce( 170 | state, 171 | fn new_child, state -> 172 | new_child = 173 | update_in(new_child.spec.binds_to, fn binds_to -> 174 | Enum.map(binds_to, &Map.get(%{child.key => new_key}, &1, &1)) 175 | end) 176 | 177 | pid = if new_child.key == child.key, do: new_pid, else: new_child.pid 178 | timer_ref = if new_child.key == child.key, do: new_timer_ref, else: new_child.timer_ref 179 | 180 | {_, state} = reregister_child(state, new_child, pid, timer_ref) 181 | state 182 | end 183 | ) 184 | end 185 | 186 | defp child_key(_, pid) when is_pid(pid), do: pid 187 | defp child_key(%{key: {__MODULE__, _} = key}, :undefined), do: key 188 | defp child_key(_, :undefined), do: {__MODULE__, make_ref()} 189 | 190 | defp update_bindings(state, key, child_spec) do 191 | Enum.reduce( 192 | child_spec.binds_to, 193 | state, 194 | fn child_ref, state -> 195 | bound = child!(state, child_ref) 196 | %{state | bound: Map.update(state.bound, bound.key, [key], &[key | &1])} 197 | end 198 | ) 199 | end 200 | 201 | defp update_shutdown_groups(state, _key, %{shutdown_group: nil}), do: state 202 | 203 | defp update_shutdown_groups(state, key, spec) do 204 | Map.update!( 205 | state, 206 | :shutdown_groups, 207 | &Map.update(&1, spec.shutdown_group, [key], fn keys -> [key | keys] end) 208 | ) 209 | end 210 | 211 | defp update(state, child_ref, updater) do 212 | with {:ok, child} <- child(state, child_ref), 213 | updated_child = updater.(child), 214 | updated_children = Map.put(state.children, child.key, updated_child), 215 | do: {:ok, updated_child, %{state | children: updated_children}} 216 | end 217 | 218 | defp pop_child_and_bound_children(state, child_ref) do 219 | child = child!(state, child_ref) 220 | children = child_with_bound_siblings(state, child) 221 | state = Enum.reduce(children, state, &remove_child(&2, &1)) 222 | {children, state} 223 | end 224 | 225 | defp child_with_bound_siblings(state, child), 226 | do: Map.values(child_with_bound_siblings(state, child, %{})) 227 | 228 | defp child_with_bound_siblings(state, child, collected) do 229 | # collect all siblings in the same shutdown group 230 | group_children = 231 | if is_nil(child.spec.shutdown_group), 232 | do: [child], 233 | else: children_in_shutdown_group(state, child.spec.shutdown_group) 234 | 235 | collected = Enum.reduce(group_children, collected, &Map.put_new(&2, &1.key, &1)) 236 | 237 | for child <- group_children, 238 | bound_pid <- Map.get(state.bound, child.key, []), 239 | bound_sibling = child!(state, bound_pid), 240 | sibling_key = bound_sibling.key, 241 | not Map.has_key?(collected, bound_sibling.key), 242 | reduce: collected do 243 | %{^sibling_key => _} = collected -> 244 | collected 245 | 246 | collected -> 247 | child_with_bound_siblings( 248 | state, 249 | bound_sibling, 250 | Map.put(collected, bound_sibling.key, bound_sibling) 251 | ) 252 | end 253 | end 254 | 255 | defp remove_child(state, child) do 256 | group = child.spec.shutdown_group 257 | 258 | state 259 | |> Map.update!(:id_to_key, &Map.delete(&1, child.spec.id)) 260 | |> Map.update!(:children, &Map.delete(&1, child.key)) 261 | |> Map.update!(:bound, &Map.delete(&1, child.key)) 262 | |> remove_child_from_bound(child) 263 | |> Map.update!(:shutdown_groups, fn 264 | groups -> 265 | with %{^group => children} <- groups do 266 | case children -- [child.key] do 267 | [] -> Map.delete(groups, group) 268 | children -> %{groups | group => children} 269 | end 270 | end 271 | end) 272 | end 273 | 274 | defp remove_child_from_bound(state, child) do 275 | Enum.reduce( 276 | child.spec.binds_to, 277 | state, 278 | fn dep, state -> 279 | case child(state, dep) do 280 | {:ok, dep_child} -> update_in(state.bound[dep_child.key], &(&1 -- [child.key])) 281 | :error -> state 282 | end 283 | end 284 | ) 285 | end 286 | end 287 | -------------------------------------------------------------------------------- /lib/parent/supervisor.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.Supervisor do 2 | @moduledoc """ 3 | Supervisor of child processes. 4 | 5 | This module works similarly to callbackless supervisors started with `Supervisor.start_link/2`. 6 | To start a supervisor and some children you can do the following: 7 | 8 | Parent.Supervisor.start_link([ 9 | child_spec1, 10 | child_spec2, 11 | # ... 12 | ]) 13 | 14 | To install a parent supervisor in the supervision tree you can provide child specification in the 15 | shape of `{Parent.Supervisor, {children, parent_options}}`. 16 | 17 | To build a dedicate supervisor module you can do: 18 | 19 | defmodule MySupervisor do 20 | use Parent.Supervisor 21 | 22 | def start_link({children, options}), 23 | do: Parent.Supervisor.start_link(children, options) 24 | 25 | # ... 26 | end 27 | 28 | And now, you can install this supervisor in the supervision tree by passing 29 | `{MySupervisor, {child_specs, parent_options}}` as the child specification to the parent. 30 | 31 | You can interact with the running supervisor using functions from the `Parent.Client` module. 32 | Refer to the `Parent` module for detailed explanation of child specifications, parent options, 33 | and behaviour of parent processes. 34 | 35 | In case you need more flexibility, take a look at `Parent.GenServer`. 36 | """ 37 | use Parent.GenServer 38 | 39 | @doc """ 40 | Starts the parent process. 41 | 42 | This function returns only after all the children have been started. If a child fails to start, 43 | the parent process will terminate all successfully started children, and then itself. 44 | """ 45 | @spec start_link([Parent.start_spec()], Parent.GenServer.options()) :: GenServer.on_start() 46 | def start_link(children, options \\ []), 47 | do: Parent.GenServer.start_link(__MODULE__, children, options) 48 | 49 | @impl GenServer 50 | def init(children) do 51 | Parent.start_all_children!(children) 52 | {:ok, nil} 53 | end 54 | 55 | @spec child_spec({[Parent.start_spec()], Parent.GenServer.options()}) :: Parent.child_spec() 56 | def child_spec({children, options}) do 57 | [start: {__MODULE__, :start_link, [children, options]}] 58 | |> Parent.parent_spec() 59 | |> Parent.child_spec(id: Keyword.get(options, :name, __MODULE__)) 60 | end 61 | 62 | @doc false 63 | defmacro __using__(_) do 64 | quote do 65 | def child_spec(arg) do 66 | [start: {__MODULE__, :start_link, [arg]}] 67 | |> Parent.parent_spec() 68 | |> Parent.child_spec(id: __MODULE__) 69 | end 70 | 71 | defoverridable child_spec: 1 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/periodic.ex: -------------------------------------------------------------------------------- 1 | defmodule Periodic do 2 | @moduledoc """ 3 | Periodic job execution. 4 | 5 | ## Quick start 6 | 7 | It is recommended (but not required) to implement the job in a dedicated module. For example: 8 | 9 | defmodule SomeCleanup do 10 | def child_spec(_arg) do 11 | Periodic.child_spec( 12 | id: __MODULE__, 13 | run: &run/0, 14 | every: :timer.hours(1) 15 | ) 16 | end 17 | 18 | defp run(), do: # ... 19 | end 20 | 21 | With such module implemented, you can place the job somewhere in the supervision tree: 22 | 23 | Supervisor.start_link( 24 | [ 25 | SomeCleanup, 26 | # ... 27 | ], 28 | # ... 29 | ) 30 | 31 | You can of course start multiple periodic jobs in the system, and they don't have to be the 32 | children of the same supervisor. You're advised to place the job in the proper part of the 33 | supervision tree. For example, a database cleanup job should share the ancestor with the 34 | repo, while a job working with Phoenix channels should share the ancestor with the 35 | endpoint. 36 | 37 | As mentioned, you don't need to create a dedicated module to run a job. It's also possible to 38 | provide `{Periodic, opts}` in the supervisor child list. Finally, if you need more runtime 39 | flexibility, you can also start the job with `start_link/1`. 40 | 41 | 42 | ## Process structure 43 | 44 | The process started with `start_link` is called the _scheduler_. This is the process which 45 | regularly "ticks" in the given interval and executes the _job_. The job is executed in a separate 46 | one-off process, which is the child of the scheduler. When the job is done, the job process 47 | stops. Therefore, each job instance is running in a separate process. 48 | 49 | Depending on the overlapping mode (see the `:on_overlap` option), it can happen that multiple 50 | instances of the same job are running simultaneously. 51 | 52 | 53 | ## Options 54 | 55 | - `:run` (required) - Zero arity function or MFA invoked to run the job. This function is 56 | invoked in a separate one-off process which is a child of the scheduler. 57 | - `:every` (required) - Time in milliseconds between two consecutive job executions (see 58 | `:delay_mode` option for details). 59 | - `:initial_delay` - Time in milliseconds before the first execution of the job. If not provided, 60 | the default value of `:every` is used. In other words, the first execution will by default 61 | take place after the `:initial_delay` interval has passed. 62 | - `:delay_mode` - Controls how the `:every` interval is interpreted. Following options are 63 | possible: 64 | - `:regular` (default) - `:every` represents the time between two consecutive starts 65 | - `:shifted` - `:every` represents the time between the termination of the previous and the 66 | start of the next instance. 67 | 68 | See the "Delay mode" section for more details. 69 | - `:when` - Function which acts as an additional runtime guard to decide if the job will be 70 | started. This can be useful for implementing fixed scheduled jobs. See the "Fixed scheduling" 71 | section for details. 72 | - `:on_overlap` - Defines the desired behaviour when the job is about to be started while the 73 | previous instance is still running. 74 | - `:run` (default) - always start the new job 75 | - `:ignore` - don't start the new job if the previous instance is still running 76 | - `:stop_previous` - stop the previous instance before starting the new one 77 | - `:timeout` - Defines the maximum running time of the job. If the job doesn't finish in the 78 | given time, it is terminated according to its shutdown specification. Defaults to `:infinity`. 79 | - `:job_shutdown` - Shutdown value of the job process. See the "Shutdown" section 80 | for details. 81 | - `:id` - Supervisor child id of the scheduler process. Defaults to `Periodic`. If you plan on 82 | running multiple periodic jobs under the same supervisor, make sure that they have different 83 | id values. 84 | - `:name` - Registered name of the scheduler process. If not provided, the process will not be 85 | registered. 86 | - `:telemetry_id` - Id used in telemetry event names. See the "Telemetry" section for more 87 | details. If not provided, telemetry events won't be emitted. 88 | - `:mode` - When set to `:manual`, the jobs won't be started automatically. Instead you have to 89 | manually send tick signals to the scheduler. This should be used only in `:test` mix env. See 90 | the "Testing" section for details. 91 | 92 | 93 | ## Delay mode 94 | 95 | In the `:regular` mode (which is the default), the interval indicates time between two 96 | consecutive starts. This mode is typically useful if you want to maintain a stable execution 97 | rate (the number of executions per some time period). It is also a better choice if you're 98 | implementing fixed scheduling, as advised in the "Fixed scheduling" section. 99 | 100 | In the `:shifted` mode the interval represents the pause time between the end of the job and the 101 | start of the next one. This mode is likely a better choice if you want to have a fixed "cool off" 102 | period between two consecutive executions, to reduce the load on the system. 103 | 104 | Internally, Periodic relies on Erlang's monotonic time, which improves rate stability regardless 105 | of system time changes (see [Time correction](http://erlang.org/doc/apps/erts/time_correction.html#time-correction)). 106 | Consider using the "Multi-time warp mode" (see [here](http://erlang.org/doc/apps/erts/time_correction.html#time-warp-modes)) 107 | to further improve rate stability in the situations when system time changes. 108 | 109 | In general, the overhead introduced by Periodic as well as job processing will be compensated, 110 | and you can usually expect stable intervals with very small variations (typically in sub 111 | milliseconds range), and no steady shift over time. However, in some cases, for example when the 112 | system is overloaded, the variations might be more significant. 113 | 114 | In the `:shifted` mode the job duration will affect the execution of the next job. In addition, 115 | Periodic will induce a slight (usually less than 100 microseconds), but a steady skew, due to 116 | its own internal processing. 117 | 118 | 119 | ## Shutdown 120 | 121 | To stop the scheduler, you need to ask its parent supervisor to stop the scheduler using 122 | [Supervisor.terminate_child](https://hexdocs.pm/elixir/Supervisor.html#terminate_child/2). 123 | 124 | The scheduler process acts as a supervisor, and so it has the same shutdown behaviour. When 125 | ordered to terminate by its parent, the scheduler will stop currently running job instances 126 | according to the `:job_shutdown` configuration. 127 | 128 | The default behaviour is to wait 5 seconds for the job to finish. However, in order for this 129 | waiting to actually happen, you need to invoke `Process.flag(:trap_exit, true)` from the run 130 | function. 131 | 132 | You can change the waiting time with the `:job_shutdown` option, which has the same semantics as 133 | in `Supervisor`. See [corresponding Supervisor documentation] 134 | (https://hexdocs.pm/elixir/Supervisor.html#module-shutdown-values-shutdown) for details. 135 | 136 | 137 | ## Fixed scheduling 138 | 139 | Periodic doesn't have explicit support for scheduling jobs at some particular time (e.g. every 140 | day at midnight). However, you can implement this on top of the provided functionality using 141 | the `:when` option 142 | 143 | defmodule SomeCleanup do 144 | def child_spec(_arg) do 145 | Periodic.child_spec( 146 | # check every minute if we need to run the cleanup 147 | every: :timer.minutes(1), 148 | 149 | # start the job only if it's midnight 150 | when: fn -> match?(%Time{hour: 0, minute: 0}, Time.utc_now()) end, 151 | 152 | # ... 153 | ) 154 | end 155 | 156 | # ... 157 | end 158 | 159 | Note that the execution guarantees here are "at most once". If the system is down at the 160 | scheduled time, the job won't be executed. Stronger guarantees can be obtained by basing the 161 | conditional logic on some persistence mechanism. 162 | 163 | Note that the `:when` guard is executed in the scheduler process. If the guard execution time is 164 | larger than the ticking period, time drifts will occur. 165 | 166 | 167 | ## Telemetry 168 | 169 | The scheduler optionally emits telemetry events. To configure telemetry you need to provide 170 | the `:telemetry_id` option. For example: 171 | 172 | Periodic.start_link(telemetry_id: :db_cleanup, ...) 173 | 174 | This will emit various events in the shape of `[Periodic, telemetry_id, event]`. Currently 175 | supported events are: 176 | 177 | - `:started` - a new job instance is started 178 | - `:finished` - job instance has finished or crashed (see related metadata for the reason) 179 | - `:skipped` - new instance hasn't been started because the previous one is still running 180 | - `:stopped_previous` - previous instance has been stopped because the new one is about to be 181 | started 182 | 183 | To consume the desired events, install the corresponding telemetry handler. 184 | 185 | 186 | ## Logging 187 | 188 | Basic logger is provided in `Periodic.Logger`. To use it, the scheduler needs to be started with 189 | the `:telemetry_id` option. 190 | 191 | To install logger handlers, you can invoke `Periodic.Logger.install(telemetry_id)`. This function 192 | should be invoked only once per each scheduler during the system lifetime, preferably before the 193 | scheduler is started. A convenient place to do it is your application start callback. 194 | 195 | 196 | ## Testing 197 | 198 | The scheduler can be deterministically tested by setting the `:mode` option to `:manual`. 199 | In this mode, the scheduler won't tick on its own, and so it won't start any jobs unless 200 | instructed to by the client code. 201 | 202 | The `:mode` should be set to `:manual` only in test mix environment. Here's a simple approach 203 | which doesn't require app env and config files: 204 | 205 | defmodule MyPeriodicJob do 206 | @mode if Mix.env() != :test, do: :auto, else: :manual 207 | 208 | def child_spec(_arg) do 209 | Periodic.child_spec( 210 | mode: @mode, 211 | name: __MODULE__, 212 | # ... 213 | ) 214 | end 215 | 216 | # ... 217 | end 218 | 219 | Of course, you can alternatively use app env or any other approach you prefer. Just make sure 220 | to set the mode to manual only in test env. 221 | 222 | Notice that we're also setting the registered name and telemetry id. We'll need both to 223 | interact with the scheduler 224 | 225 | With such setup in place, the general shape of the periodic job test would look like this: 226 | 227 | def MyPeriodicJobTest do 228 | use ExUnit.Case, async: true 229 | require Periodic.Test 230 | 231 | test "my periodic job" do 232 | bring_the_system_into_the_desired_state() 233 | 234 | # tick the scheduler 235 | assert Periodic.Test.sync_tick(MyPeriodicJob) == {:ok, :normal} 236 | 237 | verify_side_effect_of_the_job() 238 | end 239 | end 240 | 241 | Note that this won't suffice for fixed schedules. Consider again the cleanup job which runs 242 | at midnight: 243 | 244 | defmodule SomeCleanup do 245 | def child_spec(_arg) do 246 | Periodic.child_spec( 247 | every: :timer.minutes(1), 248 | when: fn -> match?(%Time{hour: 0, minute: 0}, Time.utc_now()) end, 249 | 250 | # ... 251 | ) 252 | end 253 | 254 | # ... 255 | end 256 | 257 | Manually ticking won't start the job, unless the test is running exactly at midnight. To make 258 | this module testable, you need to use a different implementation of `:when` in test environment: 259 | 260 | defmodule SomeCleanup do 261 | def child_spec(_arg) do 262 | Periodic.child_spec( 263 | every: :timer.minutes(1), 264 | when: &should_run?/0 265 | 266 | # ... 267 | ) 268 | end 269 | 270 | if Mix.env() != :test do 271 | defp should_run?(), do: match?(%Time{hour: 0, minute: 0}, Time.utc_now()) 272 | else 273 | defp should_run?(), do: true 274 | end 275 | 276 | # ... 277 | end 278 | 279 | 280 | ## Comparison to other schedulers 281 | 282 | There are various other abstractions for running periodic jobs in BEAM, such as: 283 | 284 | - the built-in [:timer](https://erlang.org/doc/man/timer.html) module from Erlang stdlib 285 | - [erlcron](https://github.com/erlware/erlcron) 286 | - [quantum](https://hexdocs.pm/quantum/readme.html) 287 | - [Oban](https://hexdocs.pm/oban/Oban.html#module-periodic-cron-jobs) 288 | 289 | Compared to `:timer`, Periodic offers some additional features, such as overlap handling, 290 | distributed scheduling, and telemetry support. 291 | 292 | Compared to most other third party libraries, Periodic will likely provide much less features 293 | out of the box. So in some situations, such as database persistence or back-pressure, you might 294 | need to invest more effort with Periodic. On the plus side Periodic should be simpler to use 295 | in typical scenarios, and much easier to reason about, while still providing enough flexibility 296 | to handle arbitrarily complex scenarios. 297 | 298 | For a more detailed discussion, see [this blog post](https://www.theerlangelist.com/article/periodic). 299 | """ 300 | use Parent.GenServer 301 | require Logger 302 | 303 | @type opts :: [ 304 | id: term, 305 | name: GenServer.name(), 306 | telemetry_id: term, 307 | mode: :auto | :manual, 308 | every: pos_integer, 309 | initial_delay: non_neg_integer, 310 | delay_mode: :regular | :shifted, 311 | run: (() -> term) | {module, atom, [term]}, 312 | when: (() -> boolean) | {module, atom, [term]}, 313 | on_overlap: :run | :ignore | :stop_previous, 314 | timeout: pos_integer | :infinity, 315 | job_shutdown: :brutal_kill | :infinity | non_neg_integer() 316 | ] 317 | 318 | @doc "Starts the periodic executor." 319 | @spec start_link(opts) :: GenServer.on_start() 320 | def start_link(opts) do 321 | gen_server_opts = Keyword.take(opts, [:name]) 322 | Parent.GenServer.start_link(__MODULE__, Map.new(opts), gen_server_opts) 323 | end 324 | 325 | @doc "Builds a child specification for starting the periodic executor." 326 | @spec child_spec(opts) :: Supervisor.child_spec() 327 | def child_spec(opts) do 328 | opts 329 | |> super() 330 | |> Supervisor.child_spec(id: Keyword.get(opts, :id, __MODULE__)) 331 | end 332 | 333 | @impl GenServer 334 | def init(opts) do 335 | state = Map.merge(defaults(), opts) 336 | {initial_delay, state} = Map.pop(state, :initial_delay, state.every) 337 | enqueue_next_tick(state, initial_delay) 338 | {:ok, state} 339 | end 340 | 341 | @impl GenServer 342 | def handle_info({:tick, expected_now}, state) do 343 | handle_tick(state, now: expected_now) 344 | {:noreply, state} 345 | end 346 | 347 | @impl GenServer 348 | def handle_call({:tick, opts}, from, %{mode: :manual} = state) do 349 | caller = if Keyword.get(opts, :await_job?), do: from, else: nil 350 | response = handle_tick(state, caller: caller) 351 | 352 | cond do 353 | is_nil(caller) -> {:reply, :ok, state} 354 | match?({:ok, _pid}, response) -> {:noreply, state} 355 | true -> {:reply, {:error, :job_not_started}, state} 356 | end 357 | end 358 | 359 | def handle_call({:tick, _opts}, _from, state) do 360 | {:reply, {:error, :not_in_manual_mode}, state} 361 | end 362 | 363 | @impl Parent.GenServer 364 | def handle_stopped_children(info, state) do 365 | [info] = Map.values(info) 366 | 367 | with from when not is_nil(from) <- info.meta.caller, 368 | do: GenServer.reply(from, {:ok, info.exit_reason}) 369 | 370 | if state.delay_mode == :shifted, do: enqueue_next_tick(state, state.every) 371 | 372 | duration = 373 | :erlang.convert_time_unit( 374 | :erlang.monotonic_time() - info.meta.started_at, 375 | :native, 376 | :microsecond 377 | ) 378 | 379 | telemetry(state, :finished, %{job: info.pid, reason: info.exit_reason}, %{time: duration}) 380 | {:noreply, state} 381 | end 382 | 383 | defp defaults() do 384 | %{ 385 | telemetry_id: nil, 386 | mode: :auto, 387 | delay_mode: :regular, 388 | on_overlap: :run, 389 | timeout: :infinity, 390 | job_shutdown: :timer.seconds(5), 391 | when: nil 392 | } 393 | end 394 | 395 | defp handle_tick(state, opts) do 396 | if state.delay_mode == :regular, do: enqueue_next_tick(state, state.every, opts) 397 | if job_guard_satisfied?(state), do: start_job(state, opts) 398 | end 399 | 400 | defp job_guard_satisfied?(%{when: nil}), do: true 401 | defp job_guard_satisfied?(%{when: {m, f, a}}), do: apply(m, f, a) 402 | defp job_guard_satisfied?(%{when: fun}) when is_function(fun), do: fun.() 403 | 404 | defp start_job(%{on_overlap: :run} = state, opts), 405 | do: start_job_process(state, opts) 406 | 407 | defp start_job(%{on_overlap: :ignore} = state, opts) do 408 | case previous_instance() do 409 | {:ok, pid} -> 410 | telemetry(state, :skipped, %{still_running: pid}) 411 | nil 412 | 413 | :error -> 414 | start_job_process(state, opts) 415 | end 416 | end 417 | 418 | defp start_job(%{on_overlap: :stop_previous} = state, opts) do 419 | with {:ok, pid} <- previous_instance() do 420 | Parent.shutdown_all(:kill) 421 | telemetry(state, :stopped_previous, %{pid: pid}) 422 | end 423 | 424 | start_job_process(state, opts) 425 | end 426 | 427 | defp start_job_process(state, opts) do 428 | job = state.run 429 | 430 | {:ok, pid} = 431 | Parent.start_child(%{ 432 | id: make_ref(), 433 | start: {Task, :start_link, [fn -> invoke_job(job) end]}, 434 | timeout: state.timeout, 435 | shutdown: state.job_shutdown, 436 | restart: :temporary, 437 | meta: %{started_at: :erlang.monotonic_time(), caller: Keyword.get(opts, :caller)}, 438 | ephemeral?: true 439 | }) 440 | 441 | telemetry(state, :started, %{job: pid}) 442 | {:ok, pid} 443 | end 444 | 445 | defp invoke_job({mod, fun, args}), do: apply(mod, fun, args) 446 | defp invoke_job(fun) when is_function(fun, 0), do: fun.() 447 | 448 | defp previous_instance() do 449 | case Parent.children() do 450 | [child] -> {:ok, child.pid} 451 | [] -> :error 452 | end 453 | end 454 | 455 | defp enqueue_next_tick(state, delay, opts \\ []) do 456 | telemetry(state, :next_tick, %{in: delay}) 457 | 458 | if state.mode == :auto do 459 | now = Keyword.get_lazy(opts, :now, fn -> :erlang.monotonic_time(:millisecond) end) 460 | next_tick_abs_time = now + delay 461 | Process.send_after(self(), {:tick, next_tick_abs_time}, next_tick_abs_time, abs: true) 462 | end 463 | end 464 | 465 | defp telemetry(state, event, data, measurements \\ %{}) 466 | 467 | if Mix.env() != :test do 468 | defp telemetry(_state, :next_tick, _data, _measurements), do: :ok 469 | end 470 | 471 | defp telemetry(%{telemetry_id: nil}, _event, _data, _measurements), do: :ok 472 | 473 | defp telemetry(state, event, data, measurements) do 474 | :telemetry.execute( 475 | [__MODULE__, state.telemetry_id, event], 476 | measurements, 477 | Map.merge(data, %{scheduler: self()}) 478 | ) 479 | end 480 | end 481 | -------------------------------------------------------------------------------- /lib/periodic/logger.ex: -------------------------------------------------------------------------------- 1 | defmodule Periodic.Logger do 2 | @moduledoc "Telemetry handler that support basic logging of periodic scheduler events." 3 | 4 | require Logger 5 | 6 | @doc "Installs telemetry handlers for the given scheduler." 7 | @spec install(any) :: :ok 8 | def install(telemetry_id) do 9 | Enum.each( 10 | ~w/started finished skipped stopped_previous/a, 11 | &attach_telemetry_handler(telemetry_id, &1) 12 | ) 13 | end 14 | 15 | defp attach_telemetry_handler(telemetry_id, event) do 16 | handler_id = make_ref() 17 | event_name = [Periodic, telemetry_id, event] 18 | :telemetry.attach(handler_id, event_name, &__MODULE__.telemetry_handler/4, nil) 19 | end 20 | 21 | @doc false 22 | def telemetry_handler([Periodic, telemetry_id, event], measurements, meta, nil) do 23 | Logger.log(log_level(event, meta), message(telemetry_id, event, measurements, meta)) 24 | end 25 | 26 | defp log_level(:started, _meta), do: :info 27 | defp log_level(:skipped, _meta), do: :info 28 | defp log_level(:stopped_previous, _meta), do: :warn 29 | 30 | defp log_level(:finished, meta), 31 | do: if(meta.reason in [:shutdown, :normal], do: :info, else: :error) 32 | 33 | defp message(telemetry_id, event, measurements, meta) do 34 | "Periodic(#{inspect(telemetry_id)}): #{message(event, meta, measurements)}" 35 | end 36 | 37 | defp message(:started, meta, _measurements), do: "job #{inspect(meta.job)} started" 38 | 39 | defp message(:finished, meta, measurements) do 40 | [ 41 | "job #{inspect(meta.job)} ", 42 | case meta.reason do 43 | :normal -> "finished" 44 | :shutdown -> "shut down" 45 | :killed -> "killed" 46 | other -> "exited with reason #{inspect(other)}" 47 | end, 48 | ", duration=#{measurements.time}us" 49 | ] 50 | end 51 | 52 | defp message(:skipped, _meta, _measurements), 53 | do: "skipped starting the job because the previous instance is still running" 54 | 55 | defp message(:stopped_previous, _meta, _measurements), 56 | do: "killed previous job instance, because the new job is about to be started" 57 | end 58 | -------------------------------------------------------------------------------- /lib/periodic/test.ex: -------------------------------------------------------------------------------- 1 | defmodule Periodic.Test do 2 | @moduledoc """ 3 | Helpers for testing a periodic job. 4 | 5 | See the "Testing" section in `Periodic` documentation for details. 6 | """ 7 | 8 | public_telemetry_events = ~w/started finished skipped stopped_previous/a 9 | 10 | @telemetry_events if Mix.env() != :test, 11 | do: public_telemetry_events, 12 | else: [:next_tick | public_telemetry_events] 13 | 14 | @doc """ 15 | Sends a tick signal to the given scheduler. 16 | 17 | This function returns after the tick signal has been sent, and the job has been started. 18 | However, the function doesn't wait for the job to finish. If you want complete synchronism, use 19 | `sync_tick/2`. 20 | 21 | If the scheduler was not started with `:mode` set to `:manual`, this function returns 22 | `{:error, :not_in_manual_mode}`. 23 | """ 24 | @spec tick(GenServer.server()) :: :ok | {:error, :not_in_manual_mode} 25 | def tick(pid_or_name), do: GenServer.call(pid_or_name, {:tick, []}) 26 | 27 | @doc """ 28 | Sends a tick signal to the given scheduler and waits for the job to finish. 29 | 30 | The function returns the job exit reason, or error if the job hasn't been started. 31 | 32 | If the scheduler was not started with `:mode` set to `:manual`, this function returns 33 | `{:error, :not_in_manual_mode}`. 34 | """ 35 | @spec sync_tick(GenServer.server(), non_neg_integer | :infinity) :: 36 | {:ok, job_exit_reason :: any} | {:error, :job_not_started | :not_in_manual_mode} 37 | def sync_tick(pid_or_name, timeout \\ :timer.seconds(5)) do 38 | GenServer.call(pid_or_name, {:tick, [await_job?: true]}, timeout) 39 | end 40 | 41 | @doc "Subscribes to telemetry events of the given scheduler." 42 | @spec observe(any) :: :ok 43 | def observe(telemetry_id), 44 | do: Enum.each(@telemetry_events, &attach_telemetry_handler(telemetry_id, &1)) 45 | 46 | @doc "Waits for the given telemetry event." 47 | defmacro assert_periodic_event( 48 | telemetry_id, 49 | event, 50 | metadata \\ quote(do: _), 51 | measurements \\ quote(do: _) 52 | ) do 53 | quote do 54 | assert_receive { 55 | unquote(__MODULE__), 56 | unquote(telemetry_id), 57 | unquote(event), 58 | unquote(metadata), 59 | unquote(measurements) 60 | }, 61 | 100 62 | end 63 | end 64 | 65 | @doc "Asserts that the given telemetry event won't be emitted." 66 | defmacro refute_periodic_event( 67 | telemetry_id, 68 | event, 69 | metadata \\ quote(do: _), 70 | measurements \\ quote(do: _) 71 | ) do 72 | quote do 73 | refute_receive { 74 | unquote(__MODULE__), 75 | unquote(telemetry_id), 76 | unquote(event), 77 | unquote(metadata), 78 | unquote(measurements) 79 | }, 80 | 100 81 | end 82 | end 83 | 84 | defp attach_telemetry_handler(telemetry_id, event) do 85 | handler_id = make_ref() 86 | event_name = [Periodic, telemetry_id, event] 87 | :telemetry.attach(handler_id, event_name, &__MODULE__.telemetry_handler/4, self()) 88 | ExUnit.Callbacks.on_exit(fn -> :telemetry.detach(handler_id) end) 89 | end 90 | 91 | @doc false 92 | def telemetry_handler([Periodic, telemetry_id, event], measurements, metadata, test_pid), 93 | do: send(test_pid, {__MODULE__, telemetry_id, event, metadata, measurements}) 94 | end 95 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Parent.MixProject do 2 | use Mix.Project 3 | 4 | @version "0.12.1" 5 | 6 | def project do 7 | [ 8 | app: :parent, 9 | version: @version, 10 | elixir: "~> 1.10", 11 | start_permanent: Mix.env() == :prod, 12 | deps: deps(), 13 | elixirc_paths: elixirc_paths(Mix.env()), 14 | dialyzer: [plt_add_deps: :transitive, plt_add_apps: [:ex_unit]], 15 | docs: docs(), 16 | package: package() 17 | ] 18 | end 19 | 20 | def application do 21 | [ 22 | extra_applications: [:logger], 23 | mod: {Parent.Application, []} 24 | ] 25 | end 26 | 27 | defp deps do 28 | [ 29 | {:dialyxir, "~> 0.5", runtime: false, only: [:dev, :test]}, 30 | {:ex_doc, "~> #{ex_doc_version()}", only: :dev, runtime: false}, 31 | {:mox, "~> 0.5.0", only: :test}, 32 | {:telemetry, "~> 0.4 or ~> 1.0"} 33 | ] 34 | end 35 | 36 | defp ex_doc_version() do 37 | if Version.compare(System.version(), "1.7.0") == :lt, do: "0.18.0", else: "0.19" 38 | end 39 | 40 | defp elixirc_paths(:test), do: ["lib", "test/support"] 41 | defp elixirc_paths(_), do: ["lib"] 42 | 43 | defp docs() do 44 | [ 45 | extras: ["README.md"], 46 | main: "readme", 47 | source_url: "https://github.com/sasa1977/parent/", 48 | source_ref: @version, 49 | groups_for_modules: [ 50 | Core: [Parent, Parent.Client], 51 | Behaviours: [Parent.GenServer, Parent.Supervisor], 52 | "Periodic job execution": ~r/Periodic(\..+)?/ 53 | ] 54 | ] 55 | end 56 | 57 | defp package() do 58 | [ 59 | description: "Custom parenting of processes.", 60 | maintainers: ["Saša Jurić"], 61 | licenses: ["MIT"], 62 | links: %{ 63 | "Github" => "https://github.com/sasa1977/parent", 64 | "Changelog" => 65 | "https://github.com/sasa1977/parent/blob/#{@version}/CHANGELOG.md##{ 66 | String.replace(@version, ".", "") 67 | }" 68 | } 69 | ] 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "dialyxir": {:hex, :dialyxir, "0.5.1", "b331b091720fd93e878137add264bac4f644e1ddae07a70bf7062c7862c4b952", [:mix], [], "hexpm", "6c32a70ed5d452c6650916555b1f96c79af5fc4bf286997f8b15f213de786f73"}, 3 | "earmark": {:hex, :earmark, "1.4.3", "364ca2e9710f6bff494117dbbd53880d84bebb692dafc3a78eb50aa3183f2bfd", [:mix], [], "hexpm", "8cf8a291ebf1c7b9539e3cddb19e9cef066c2441b1640f13c34c1d3cfc825fec"}, 4 | "earmark_parser": {:hex, :earmark_parser, "1.4.12", "b245e875ec0a311a342320da0551da407d9d2b65d98f7a9597ae078615af3449", [:mix], [], "hexpm", "711e2cc4d64abb7d566d43f54b78f7dc129308a63bc103fbd88550d2174b3160"}, 5 | "ex_doc": {:hex, :ex_doc, "0.23.0", "a069bc9b0bf8efe323ecde8c0d62afc13d308b1fa3d228b65bca5cf8703a529d", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "f5e2c4702468b2fd11b10d39416ddadd2fcdd173ba2a0285ebd92c39827a5a16"}, 6 | "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"}, 7 | "makeup_elixir": {:hex, :makeup_elixir, "0.15.0", "98312c9f0d3730fde4049985a1105da5155bfe5c11e47bdc7406d88e01e4219b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "75ffa34ab1056b7e24844c90bfc62aaf6f3a37a15faa76b07bc5eba27e4a8b4a"}, 8 | "mox": {:hex, :mox, "0.5.2", "55a0a5ba9ccc671518d068c8dddd20eeb436909ea79d1799e2209df7eaa98b6c", [:mix], [], "hexpm", "df4310628cd628ee181df93f50ddfd07be3e5ecc30232d3b6aadf30bdfe6092b"}, 9 | "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"}, 10 | "telemetry": {:hex, :telemetry, "1.0.0", "0f453a102cdf13d506b7c0ab158324c337c41f1cc7548f0bc0e130bbf0ae9452", [:rebar3], [], "hexpm", "73bc09fa59b4a0284efb4624335583c528e07ec9ae76aca96ea0673850aec57a"}, 11 | } 12 | -------------------------------------------------------------------------------- /scripts/bench.exs: -------------------------------------------------------------------------------- 1 | {:ok, parent} = Parent.Supervisor.start_link([]) 2 | 3 | :timer.tc(fn -> 4 | Enum.each( 5 | 1..100_000, 6 | fn _i -> 7 | {:ok, pid} = 8 | Parent.Client.start_child( 9 | parent, 10 | %{start: {Agent, :start_link, [fn -> :ok end]}, restart: :temporary} 11 | ) 12 | 13 | Parent.Client.restart_child(parent, pid) 14 | end 15 | ) 16 | end) 17 | |> elem(0) 18 | |> Kernel.div(1000) 19 | |> IO.inspect() 20 | 21 | IO.inspect(Process.info(parent, :memory)) 22 | 23 | {:ok, sup} = DynamicSupervisor.start_link(strategy: :one_for_one) 24 | 25 | :timer.tc(fn -> 26 | Enum.each( 27 | 1..100_000, 28 | fn i -> 29 | spec = %{id: i, start: {Agent, :start_link, [fn -> :ok end]}, restart: :temporary} 30 | {:ok, pid} = DynamicSupervisor.start_child(sup, spec) 31 | DynamicSupervisor.terminate_child(sup, pid) 32 | DynamicSupervisor.start_child(sup, spec) 33 | end 34 | ) 35 | end) 36 | |> elem(0) 37 | |> Kernel.div(1000) 38 | |> IO.inspect() 39 | 40 | IO.inspect(Process.info(sup, :memory)) 41 | -------------------------------------------------------------------------------- /test/parent/client_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Parent.ClientTest do 2 | use ExUnit.Case, async: true 3 | import Parent.CaptureLog 4 | alias Parent.Client 5 | 6 | setup do 7 | Mox.stub(Parent.RestartCounter.TimeProvider.Test, :now_ms, fn -> 8 | :erlang.unique_integer([:monotonic, :positive]) * :timer.seconds(5) 9 | end) 10 | 11 | :ok 12 | end 13 | 14 | describe "child_pid/1" do 15 | for registry? <- [true, false] do 16 | test "returns the pid of the given child when registry is #{registry?}" do 17 | parent = 18 | start_parent!( 19 | [child_spec(id: :child1), child_spec(id: :child2)], 20 | registry?: unquote(registry?) 21 | ) 22 | 23 | assert {:ok, pid1} = Client.child_pid(parent, :child1) 24 | assert {:ok, pid2} = Client.child_pid(parent, :child2) 25 | 26 | assert [{:child1, ^pid1, _, _}, {:child2, ^pid2, _, _}] = 27 | :supervisor.which_children(parent) 28 | end 29 | 30 | test "can dereference aliases when registry is #{registry?}" do 31 | registered_name = :"alias_#{System.unique_integer([:positive, :monotonic])}" 32 | parent = start_parent!([child_spec(id: :child)], name: registered_name) 33 | :global.register_name(registered_name, parent) 34 | 35 | assert {:ok, _} = Client.child_pid(registered_name, :child) 36 | assert {:ok, _} = Client.child_pid({:global, registered_name}, :child) 37 | assert {:ok, _} = Client.child_pid({:via, :global, registered_name}, :child) 38 | end 39 | 40 | test "returns error when child is unknown when registry is #{registry?}" do 41 | parent = start_parent!([], registry?: unquote(registry?)) 42 | assert Client.child_pid(parent, :child) == :error 43 | end 44 | 45 | test "returns error if child is stopped when registry is #{registry?}" do 46 | parent = 47 | start_parent!( 48 | [child_spec(id: :child1), child_spec(id: :child2)], 49 | registry?: unquote(registry?) 50 | ) 51 | 52 | Client.shutdown_child(parent, :child1) 53 | 54 | assert Client.child_pid(parent, :child1) == :error 55 | refute Client.child_pid(parent, :child2) == :error 56 | end 57 | end 58 | end 59 | 60 | describe "children/1" do 61 | for registry? <- [true, false] do 62 | test "returns children when registry is #{registry?}" do 63 | parent = 64 | start_parent!( 65 | [child_spec(id: :child1, meta: :meta1), child_spec(id: :child2, meta: :meta2)], 66 | registry?: unquote(registry?) 67 | ) 68 | 69 | {:ok, child1} = Client.child_pid(parent, :child1) 70 | {:ok, child2} = Client.child_pid(parent, :child2) 71 | 72 | assert Enum.sort_by(Client.children(parent), &"#{&1.id}") == [ 73 | %{id: :child1, meta: :meta1, pid: child1}, 74 | %{id: :child2, meta: :meta2, pid: child2} 75 | ] 76 | end 77 | end 78 | end 79 | 80 | describe "via tuple" do 81 | for registry? <- [true, false] do 82 | test "resolves the pid of the given child when registry is #{registry?}" do 83 | parent = 84 | start_parent!( 85 | [child_spec(id: :child1), child_spec(id: :child2)], 86 | registry?: unquote(registry?) 87 | ) 88 | 89 | assert pid1 = GenServer.whereis({:via, Client, {parent, :child1}}) 90 | assert pid2 = GenServer.whereis({:via, Client, {parent, :child2}}) 91 | 92 | assert [{:child1, ^pid1, _, _}, {:child2, ^pid2, _, _}] = 93 | :supervisor.which_children(parent) 94 | end 95 | 96 | test "returns nil when child is unknown when registry is #{registry?}" do 97 | parent = start_parent!([], registry?: unquote(registry?)) 98 | assert GenServer.whereis({:via, Client, {parent, :child}}) == nil 99 | end 100 | end 101 | end 102 | 103 | describe "child_meta/1" do 104 | for registry? <- [true, false] do 105 | test "returns the meta of the given child when registry is #{registry?}" do 106 | parent = 107 | start_parent!( 108 | [ 109 | child_spec(id: :child1, meta: :meta1), 110 | child_spec(meta: :meta2) 111 | ], 112 | registry?: unquote(registry?) 113 | ) 114 | 115 | child1 = child_pid!(parent, :child1) 116 | child2 = (parent |> Client.children() |> Enum.reject(&(&1.pid == child1)) |> hd).pid 117 | 118 | assert Client.child_meta(parent, :child1) == {:ok, :meta1} 119 | assert Client.child_meta(parent, child1) == {:ok, :meta1} 120 | assert Client.child_meta(parent, child2) == {:ok, :meta2} 121 | end 122 | 123 | test "returns error when child is unknown when registry is #{registry?}" do 124 | parent = start_parent!() 125 | assert Client.child_meta(parent, :child) == :error 126 | end 127 | end 128 | end 129 | 130 | describe "update_child_meta/1" do 131 | test "succeeds if child exists" do 132 | parent = start_parent!([child_spec(id: :child, meta: 1)]) 133 | assert Client.update_child_meta(parent, :child, &(&1 + 1)) 134 | assert Client.child_meta(parent, :child) == {:ok, 2} 135 | end 136 | 137 | test "returns error when child is unknown" do 138 | parent = start_parent!() 139 | assert Client.update_child_meta(parent, :child, & &1) == :error 140 | end 141 | end 142 | 143 | describe "start_child/1" do 144 | test "adds the additional child" do 145 | parent = start_parent!([child_spec(id: :child1)]) 146 | assert {:ok, child2} = Client.start_child(parent, child_spec(id: :child2)) 147 | assert child_pid!(parent, :child2) == child2 148 | end 149 | 150 | test "returns error" do 151 | parent = start_parent!([child_spec(id: :child1)]) 152 | {:ok, child2} = Client.start_child(parent, child_spec(id: :child2)) 153 | 154 | assert Client.start_child(parent, child_spec(id: :child2)) == 155 | {:error, {:already_started, child2}} 156 | 157 | assert child_ids(parent) == [:child1, :child2] 158 | assert child_pid!(parent, :child2) == child2 159 | end 160 | 161 | test "handles child start crash" do 162 | parent = start_parent!([child_spec(id: :child1)]) 163 | 164 | capture_log(fn -> 165 | spec = 166 | child_spec(id: :child2, start: {Agent, :start_link, [fn -> raise "some error" end]}) 167 | 168 | {:error, {_error, _stacktrace}} = Client.start_child(parent, spec) 169 | Process.sleep(100) 170 | end) 171 | 172 | assert child_ids(parent) == [:child1] 173 | end 174 | end 175 | 176 | describe "shutdown_child/1" do 177 | test "stops the given child" do 178 | parent = start_parent!([child_spec(id: :child)]) 179 | assert {:ok, _info} = Client.shutdown_child(parent, :child) 180 | assert Client.child_pid(parent, :child) == :error 181 | assert child_ids(parent) == [] 182 | end 183 | 184 | test "returns error when child is unknown" do 185 | parent = start_parent!() 186 | assert Client.shutdown_child(parent, :child) == :error 187 | end 188 | end 189 | 190 | describe "restart_child/1" do 191 | test "stops the given child" do 192 | parent = start_parent!([child_spec(id: :child)]) 193 | pid1 = child_pid!(parent, :child) 194 | assert Client.restart_child(parent, :child) == :ok 195 | assert child_ids(parent) == [:child] 196 | refute child_pid!(parent, :child) == pid1 197 | end 198 | 199 | test "returns error when child is unknown" do 200 | pid = start_parent!() 201 | assert Client.restart_child(pid, :child) == :error 202 | end 203 | end 204 | 205 | describe "shutdown_all/1" do 206 | test "stops all children" do 207 | parent = start_parent!([child_spec(id: :child1), child_spec(id: :child2)]) 208 | assert Map.keys(Client.shutdown_all(parent)) == ~w/child1 child2/a 209 | assert child_ids(parent) == [] 210 | end 211 | end 212 | 213 | describe "return_children/1" do 214 | test "returns all given children" do 215 | parent = 216 | start_parent!([ 217 | child_spec(id: :child1, shutdown_group: :group1), 218 | child_spec(id: :child2, binds_to: [:child1], shutdown_group: :group2), 219 | child_spec(id: :child3, binds_to: [:child2]), 220 | child_spec(id: :child4, shutdown_group: :group1), 221 | child_spec(id: :child5, shutdown_group: :group2), 222 | child_spec(id: :child6) 223 | ]) 224 | 225 | {:ok, stopped_children} = Client.shutdown_child(parent, :child4) 226 | 227 | assert child_ids(parent) == [:child6] 228 | assert Client.return_children(parent, stopped_children) == :ok 229 | assert child_ids(parent) == ~w/child1 child2 child3 child4 child5 child6/a 230 | end 231 | end 232 | 233 | defp start_parent!(children \\ [], opts \\ []) do 234 | parent = start_supervised!({Parent.Supervisor, {children, opts}}) 235 | Mox.allow(Parent.RestartCounter.TimeProvider.Test, self(), parent) 236 | parent 237 | end 238 | 239 | defp child_spec(overrides), 240 | do: Parent.child_spec(%{start: {Agent, :start_link, [fn -> :ok end]}}, overrides) 241 | 242 | defp child_pid!(parent, child_id) do 243 | {:ok, pid} = Client.child_pid(parent, child_id) 244 | pid 245 | end 246 | 247 | defp child_ids(parent), do: Enum.map(Client.children(parent), & &1.id) 248 | end 249 | -------------------------------------------------------------------------------- /test/parent/gen_server_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Parent.GenServerTest do 2 | use ExUnit.Case, async: true 3 | alias Parent.{Client, TestServer} 4 | 5 | setup do 6 | Mox.stub(Parent.RestartCounter.TimeProvider.Test, :now_ms, fn -> 7 | :erlang.unique_integer([:monotonic, :positive]) * :timer.seconds(5) 8 | end) 9 | 10 | :ok 11 | end 12 | 13 | test "init" do 14 | server = start_server!() 15 | assert :sys.get_state(server) == :initial_state 16 | end 17 | 18 | test "call" do 19 | server = start_server!() 20 | 21 | assert TestServer.call(server, fn state -> {{:response, state}, :new_state} end) == 22 | {:response, :initial_state} 23 | 24 | assert :sys.get_state(server) == :new_state 25 | end 26 | 27 | test "call which throws a reply" do 28 | server = start_server!() 29 | 30 | assert TestServer.call(server, fn _state -> throw({:reply, :response, :new_state}) end) 31 | assert :sys.get_state(server) == :new_state 32 | end 33 | 34 | test "cast" do 35 | server = start_server!() 36 | assert TestServer.cast(server, fn state -> {:updated_state, state} end) == :ok 37 | assert :sys.get_state(server) == {:updated_state, :initial_state} 38 | end 39 | 40 | test "send" do 41 | server = start_server!() 42 | TestServer.send(server, fn state -> {:updated_state, state} end) 43 | assert :sys.get_state(server) == {:updated_state, :initial_state} 44 | end 45 | 46 | test "terminates children before the parent stops" do 47 | server = start_server!(children: [child_spec(id: :child1), child_spec(id: :child2)]) 48 | child1 = child_pid!(server, :child1) 49 | child2 = child_pid!(server, :child2) 50 | 51 | Process.monitor(server) 52 | Process.monitor(child1) 53 | Process.monitor(child2) 54 | 55 | GenServer.stop(server) 56 | 57 | assert_receive {:DOWN, _mref, :process, pid1, _reason} 58 | assert_receive {:DOWN, _mref, :process, pid2, _reason} 59 | assert_receive {:DOWN, _mref, :process, pid3, _reason} 60 | 61 | assert [pid1, pid2, pid3] == [child2, child1, server] 62 | end 63 | 64 | test "invokes handle_stopped_children/2 when a temporary worker stops" do 65 | server = start_server!() 66 | 67 | child = start_child!(server, id: :child, meta: :meta, restart: :temporary, ephemeral?: true) 68 | 69 | :erlang.trace(server, true, [:call]) 70 | :erlang.trace_pattern({TestServer, :handle_stopped_children, 2}, []) 71 | 72 | Process.exit(child, :kill) 73 | 74 | assert_receive {:trace, ^server, :call, 75 | {Parent.TestServer, :handle_stopped_children, [info, :initial_state]}} 76 | 77 | assert %{child: %{pid: ^child, meta: :meta, exit_reason: :killed}} = info 78 | end 79 | 80 | test "restarts the child automatically" do 81 | server = start_server!(name: :my_server, children: [child_spec(id: :child)]) 82 | 83 | trace_function_calls(server, Parent.Restart, :perform) 84 | Agent.stop(child_pid!(server, :child)) 85 | await_function_return(server, Parent.Restart, :perform) 86 | 87 | assert child_ids(server) == [:child] 88 | end 89 | 90 | test "registers the process" do 91 | server = start_server!(name: :registered_name) 92 | assert Process.whereis(:registered_name) == server 93 | end 94 | 95 | describe "supervisor calls" do 96 | test "which_children" do 97 | server = 98 | start_server!( 99 | children: [ 100 | [id: :child1, type: :worker], 101 | [id: :child2, type: :supervisor] 102 | ] 103 | ) 104 | 105 | assert [child1, child2] = :supervisor.which_children(server) 106 | assert {:child1, _pid, :worker, _} = child1 107 | assert {:child2, _pid, :supervisor, _} = child2 108 | end 109 | 110 | test "count_children" do 111 | server = 112 | start_server!( 113 | children: [ 114 | [id: :child1, type: :worker], 115 | [id: :child2, type: :supervisor] 116 | ] 117 | ) 118 | 119 | assert :supervisor.count_children(server) == [ 120 | active: 2, 121 | specs: 2, 122 | supervisors: 1, 123 | workers: 1 124 | ] 125 | end 126 | 127 | test "get_childspec" do 128 | server = start_server!(children: [[id: :child1, type: :worker]]) 129 | assert {:ok, %{id: :child1, type: :worker}} = :supervisor.get_childspec(server, :child1) 130 | end 131 | 132 | test "get callback module" do 133 | server = start_server!() 134 | assert :supervisor.get_callback_module(server) == TestServer 135 | end 136 | end 137 | 138 | defp start_server!(opts \\ []) do 139 | {children, opts} = Keyword.pop(opts, :children, []) 140 | server = start_supervised!({TestServer, {fn -> :initial_state end, opts}}) 141 | Mox.allow(Parent.RestartCounter.TimeProvider.Test, self(), server) 142 | Enum.each(children, &start_child!(server, &1)) 143 | server 144 | end 145 | 146 | defp start_child(server, overrides) do 147 | child_spec = Parent.child_spec(%{start: {Agent, :start_link, [fn -> :ok end]}}, overrides) 148 | start = {Agent, :start_link, fn -> :ok end} 149 | child_spec = Map.merge(%{meta: {child_spec.id, :meta}, start: start}, child_spec) 150 | Client.start_child(server, child_spec) 151 | end 152 | 153 | defp start_child!(server, overrides) do 154 | {:ok, pid} = start_child(server, overrides) 155 | pid 156 | end 157 | 158 | defp child_spec(overrides), 159 | do: Parent.child_spec(%{start: {Agent, :start_link, [fn -> :ok end]}}, overrides) 160 | 161 | defp child_pid!(server, child_id) do 162 | {:ok, pid} = Client.child_pid(server, child_id) 163 | pid 164 | end 165 | 166 | defp child_ids(parent), do: Enum.map(Client.children(parent), & &1.id) 167 | 168 | defp trace_function_calls(server, module, function) do 169 | :erlang.trace(server, true, [:call]) 170 | Code.ensure_loaded(module) 171 | :erlang.trace_pattern({module, function, :_}, [{:_, [], [{:return_trace}]}], []) 172 | end 173 | 174 | defp await_function_return(server, module, function), 175 | do: assert_receive({:trace, ^server, :return_from, {^module, ^function, _args}, _result}) 176 | end 177 | -------------------------------------------------------------------------------- /test/parent/supervisor_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Parent.SupervisorTest do 2 | use ExUnit.Case, async: true 3 | import Parent.CaptureLog 4 | alias Parent.Supervisor 5 | 6 | setup do 7 | Mox.stub(Parent.RestartCounter.TimeProvider.Test, :now_ms, fn -> 8 | :erlang.unique_integer([:monotonic, :positive]) * :timer.seconds(5) 9 | end) 10 | 11 | :ok 12 | end 13 | 14 | describe "start_link/1" do 15 | test "starts the given children" do 16 | start_supervisor!( 17 | [ 18 | child_spec(id: :child1), 19 | child_spec(id: :child2, start: fn -> :ignore end), 20 | child_spec(id: :child3) 21 | ], 22 | name: :my_supervisor 23 | ) 24 | 25 | assert [%{id: :child1}, %{id: :child2}, %{id: :child3}] = 26 | Parent.Client.children(:my_supervisor) 27 | end 28 | 29 | test "fails to start if a child fails to start" do 30 | Process.flag(:trap_exit, true) 31 | children = [child_spec(id: :child1, start: fn -> {:error, :some_reason} end)] 32 | 33 | assert capture_log(fn -> 34 | assert Supervisor.start_link(children) == {:error, :start_error} 35 | end) =~ "[error] Error starting the child :child1: :some_reason" 36 | end 37 | end 38 | 39 | describe "__using__/1" do 40 | defmodule MySupervisor do 41 | use Supervisor 42 | end 43 | 44 | test "works when only module is passed" do 45 | spec = Elixir.Supervisor.child_spec(MySupervisor, []) 46 | assert spec.type == :supervisor 47 | assert spec.shutdown == :infinity 48 | assert spec.start == {MySupervisor, :start_link, [[]]} 49 | end 50 | 51 | test "passes arg to start_link" do 52 | spec = Elixir.Supervisor.child_spec({MySupervisor, :arg}, []) 53 | assert spec.start == {MySupervisor, :start_link, [:arg]} 54 | end 55 | end 56 | 57 | defp start_supervisor!(children, opts) do 58 | pid = start_supervised!({Supervisor, {children, opts}}) 59 | Mox.allow(Parent.RestartCounter.TimeProvider.Test, self(), pid) 60 | pid 61 | end 62 | 63 | defp child_spec(overrides), 64 | do: Parent.child_spec(%{start: {Agent, :start_link, [fn -> :ok end]}}, overrides) 65 | end 66 | -------------------------------------------------------------------------------- /test/parent_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ParentTest do 2 | use ExUnit.Case, async: true 3 | import Parent.CaptureLog 4 | 5 | setup do 6 | Mox.stub(Parent.RestartCounter.TimeProvider.Test, :now_ms, fn -> 7 | :erlang.unique_integer([:monotonic, :positive]) * :timer.seconds(5) 8 | end) 9 | 10 | :ets.new(__MODULE__, [:named_table, :public]) 11 | 12 | :ok 13 | end 14 | 15 | describe "child_spec/2" do 16 | defmodule TestChildSpec do 17 | def child_spec(arg), do: %{start: {__MODULE__, :start_link, arg}} 18 | end 19 | 20 | test "expands module, passing empty list as the arg" do 21 | expanded_spec = Parent.child_spec(TestChildSpec) 22 | assert is_map(expanded_spec) 23 | assert expanded_spec.start == {TestChildSpec, :start_link, []} 24 | end 25 | 26 | test "expands {module, opts}" do 27 | expanded_spec = Parent.child_spec({TestChildSpec, :some_arg}) 28 | assert is_map(expanded_spec) 29 | assert expanded_spec.start == {TestChildSpec, :start_link, :some_arg} 30 | end 31 | 32 | for {opt, default} <- [ 33 | binds_to: [], 34 | ephemeral?: false, 35 | id: nil, 36 | max_restarts: :infinity, 37 | max_seconds: 5000, 38 | meta: nil, 39 | restart: :permanent, 40 | shutdown: 5000, 41 | shutdown_group: nil, 42 | timeout: :infinity, 43 | type: :worker 44 | ] do 45 | test "sets #{opt} to #{inspect(default)} by default" do 46 | spec = Parent.child_spec(%{start: fn -> :ok end}) 47 | assert Map.fetch!(spec, unquote(opt)) == unquote(default) 48 | end 49 | end 50 | 51 | test "sets shutdown to infinity if the child is a supervisor" do 52 | assert Parent.child_spec(%{start: fn -> :ok end, type: :supervisor}).shutdown == :infinity 53 | end 54 | 55 | test "sets shutdown to 5000 if the child is a worker" do 56 | assert Parent.child_spec(%{start: fn -> :ok end, type: :worker}).shutdown == 5000 57 | end 58 | 59 | test "sets the default modules" do 60 | assert Parent.child_spec({Agent, fn -> :ok end}).modules == [Agent] 61 | assert Parent.child_spec(%{start: fn -> :ok end}).modules == [__MODULE__] 62 | end 63 | 64 | test "applies overrides on top of defaults" do 65 | assert Parent.child_spec({Agent, fn -> :ok end}, shutdown: 0).shutdown == 0 66 | end 67 | 68 | test "succeeds if base spec doesn't contain the start spec" do 69 | start = {Agent, :start_link, fn -> :ok end} 70 | assert Parent.child_spec(%{}, start: start).start == start 71 | end 72 | end 73 | 74 | describe "initialize/0" do 75 | test "traps exists" do 76 | Process.flag(:trap_exit, false) 77 | Parent.initialize() 78 | assert Process.info(self(), :trap_exit) == {:trap_exit, true} 79 | end 80 | 81 | test "raises if called multiple times" do 82 | Parent.initialize() 83 | assert_raise RuntimeError, "Parent state is already initialized", &Parent.initialize/0 84 | end 85 | end 86 | 87 | describe "start_child" do 88 | test "returns the pid of the started process on success" do 89 | Parent.initialize() 90 | parent = self() 91 | assert {:ok, child} = Parent.start_child({Task, fn -> send(parent, self()) end}) 92 | assert_receive ^child 93 | end 94 | 95 | test "implicitly sets the id" do 96 | Parent.initialize() 97 | {:ok, child1} = Parent.start_child(%{start: fn -> Agent.start_link(fn -> :ok end) end}) 98 | {:ok, child2} = Parent.start_child(%{start: fn -> Agent.start_link(fn -> :ok end) end}) 99 | assert [%{pid: ^child1}, %{pid: ^child2}] = Parent.children() 100 | end 101 | 102 | test "accepts module for child spec" do 103 | defmodule TestChild1 do 104 | def child_spec(_arg) do 105 | %{ 106 | id: __MODULE__, 107 | start: {Agent, :start_link, [fn -> :ok end]} 108 | } 109 | end 110 | end 111 | 112 | Parent.initialize() 113 | assert {:ok, _child} = Parent.start_child(TestChild1) 114 | assert Parent.child?(TestChild1) 115 | end 116 | 117 | test "accepts {module, arg} for child spec" do 118 | defmodule TestChild2 do 119 | def child_spec(caller) do 120 | %{ 121 | id: __MODULE__, 122 | start: {Agent, :start_link, [fn -> send(caller, :called) end]} 123 | } 124 | end 125 | end 126 | 127 | Parent.initialize() 128 | assert {:ok, _child} = Parent.start_child({TestChild2, self()}) 129 | assert_receive :called 130 | assert Parent.child?(TestChild2) 131 | end 132 | 133 | test "accepts a child specification map" do 134 | Parent.initialize() 135 | 136 | assert {:ok, _child} = 137 | Parent.start_child(%{id: :child, start: {Agent, :start_link, [fn -> :ok end]}}) 138 | end 139 | 140 | test "accepts a zero arity function in the :start key of the child spec" do 141 | Parent.initialize() 142 | 143 | assert {:ok, _child} = 144 | Parent.start_child(%{id: :child, start: fn -> Agent.start_link(fn -> :ok end) end}) 145 | end 146 | 147 | test "succeeds if deps are started" do 148 | Parent.initialize() 149 | start_child!(id: :child1) 150 | start_child!(id: :child2) 151 | assert {:ok, _pid} = start_child(binds_to: ~w/child1 child2/a) 152 | end 153 | 154 | test "starts a child if it depends on non-running process" do 155 | Parent.initialize() 156 | start_child!(id: :child1, start: fn -> :ignore end) 157 | assert {:ok, _} = start_child(id: :child2, binds_to: [:child1]) 158 | end 159 | 160 | test "keeps ignored non-ephemeral children in the state" do 161 | Parent.initialize() 162 | 163 | assert start_child(id: :child1, start: fn -> :ignore end, ephemeral?: false) == 164 | {:ok, :undefined} 165 | 166 | assert start_child(id: :child2, start: fn -> :ignore end, ephemeral?: false) == 167 | {:ok, :undefined} 168 | 169 | assert Parent.child_pid(:child1) == {:ok, :undefined} 170 | assert Parent.child_pid(:child2) == {:ok, :undefined} 171 | end 172 | 173 | test "doesn't keep ephemeral child in the state" do 174 | Parent.initialize() 175 | 176 | assert start_child(id: :child, start: fn -> :ignore end, ephemeral?: true) == 177 | {:ok, :undefined} 178 | 179 | assert Parent.child_pid(:child) == :error 180 | end 181 | 182 | test "handles error by the started process" do 183 | Parent.initialize() 184 | assert start_child(start: fn -> {:error, :some_reason} end) == {:error, :some_reason} 185 | end 186 | 187 | test "fails if id is already taken" do 188 | Parent.initialize() 189 | child = start_child!(id: :child) 190 | assert start_child(id: :child) == {:error, {:already_started, child}} 191 | end 192 | 193 | test "fails if id is pid" do 194 | Parent.initialize() 195 | assert start_child(id: self()) == {:error, :invalid_child_id} 196 | end 197 | 198 | test "fails if deps are not started" do 199 | Parent.initialize() 200 | start_child!(id: :child1) 201 | start_child!(id: :child2) 202 | 203 | assert {:error, error} = start_child(binds_to: ~w/child1 child2 child4 child5/a) 204 | assert error == {:missing_deps, ~w/child4 child5/a} 205 | end 206 | 207 | test "fails when children in a shutdown group don't have the same restart type" do 208 | Parent.initialize() 209 | 210 | for r1 <- ~w/temporary transient temporary/a, 211 | r2 <- ~w/temporary transient temporary/a, 212 | r1 != r2 do 213 | Parent.shutdown_all() 214 | start_child!(id: :child1, restart: r1, shutdown_group: :group) 215 | 216 | assert start_child(id: :child2, restart: r2, shutdown_group: :group) == 217 | {:error, {:non_uniform_shutdown_group, :group}} 218 | end 219 | end 220 | 221 | test "fails when children in a shutdown group don't have the same ephemeral setting" do 222 | Parent.initialize() 223 | 224 | start_child!(id: :child1, ephemeral?: false, shutdown_group: :group) 225 | 226 | assert start_child(id: :child2, ephemeral?: true, shutdown_group: :group) == 227 | {:error, {:non_uniform_shutdown_group, :group}} 228 | end 229 | 230 | test "fails if the parent is not initialized" do 231 | assert_raise RuntimeError, "Parent is not initialized", &start_child/0 232 | end 233 | end 234 | 235 | describe "start_all_children/1" do 236 | test "starts all processes" do 237 | Parent.initialize() 238 | 239 | assert [child1, :undefined, child3] = 240 | Parent.start_all_children!([ 241 | Parent.child_spec({Agent, fn -> :ok end}, id: :child1), 242 | %{id: :child2, start: fn -> :ignore end}, 243 | Parent.child_spec({Agent, fn -> :ok end}, id: :child3) 244 | ]) 245 | 246 | assert Parent.child_pid(:child1) == {:ok, child1} 247 | assert Parent.child_pid(:child3) == {:ok, child3} 248 | end 249 | 250 | test "exits at first error" do 251 | Parent.initialize() 252 | test_pid = self() 253 | 254 | log = 255 | capture_log(fn -> 256 | assert catch_exit( 257 | Parent.start_all_children!([ 258 | {Agent, fn -> :ok end}, 259 | %{id: :child2, start: fn -> {:error, :some_error} end}, 260 | {Agent, fn -> send(test_pid, :child3_started) end} 261 | ]) 262 | ) == :start_error 263 | end) 264 | 265 | assert log =~ "Error starting the child :child2: :some_error" 266 | 267 | refute_receive :child3_started 268 | assert Parent.num_children() == 0 269 | end 270 | 271 | test "fails if the parent is not initialized" do 272 | assert_raise RuntimeError, "Parent is not initialized", fn -> 273 | Parent.start_all_children!([Agent]) 274 | end 275 | end 276 | end 277 | 278 | describe "shutdown_child/1" do 279 | test "stops the child synchronously, handling the exit message" do 280 | Parent.initialize() 281 | child = start_child!(id: :child) 282 | 283 | assert {:ok, stopped_children} = Parent.shutdown_child(:child) 284 | assert Map.keys(stopped_children) == [:child] 285 | 286 | refute Process.alive?(child) 287 | refute_receive {:EXIT, ^child, _reason} 288 | assert Parent.children() == [] 289 | end 290 | 291 | test "stops the child referenced by the pid" do 292 | Parent.initialize() 293 | child = start_child!(id: :child) 294 | 295 | Parent.shutdown_child(child) 296 | 297 | refute Process.alive?(child) 298 | assert Parent.children() == [] 299 | end 300 | 301 | test "forcefully terminates the child if shutdown is `:brutal_kill`" do 302 | Parent.initialize() 303 | test_pid = self() 304 | 305 | child = 306 | start_child!( 307 | id: :child, 308 | shutdown: :brutal_kill, 309 | start: fn -> 310 | Task.start_link(fn -> 311 | Process.flag(:trap_exit, true) 312 | send(test_pid, :continue) 313 | Process.sleep(:infinity) 314 | end) 315 | end 316 | ) 317 | 318 | Process.monitor(child) 319 | assert_receive :continue 320 | Parent.shutdown_child(:child) 321 | assert_receive {:DOWN, _mref, :process, ^child, :killed} 322 | end 323 | 324 | test "forcefully terminates a child if it doesn't stop in the given time" do 325 | Parent.initialize() 326 | test_pid = self() 327 | 328 | child = 329 | start_child!( 330 | id: :child, 331 | shutdown: 10, 332 | start: fn -> 333 | Task.start_link(fn -> 334 | Process.flag(:trap_exit, true) 335 | send(test_pid, :continue) 336 | Process.sleep(:infinity) 337 | end) 338 | end 339 | ) 340 | 341 | Process.monitor(child) 342 | assert_receive :continue 343 | Parent.shutdown_child(:child) 344 | assert_receive {:DOWN, _mref, :process, ^child, :killed} 345 | end 346 | 347 | test "can stop a non-running child" do 348 | Parent.initialize() 349 | 350 | start_child!(id: :child1, start: fn -> :ignore end) 351 | start_child!(id: :child2, start: fn -> :ignore end) 352 | 353 | assert {:ok, %{child1: %{pid: :undefined}}} = Parent.shutdown_child(:child1) 354 | assert [%{id: :child2}] = Parent.children() 355 | end 356 | 357 | test "fails if an unknown child is given" do 358 | Parent.initialize() 359 | assert Parent.shutdown_child(:child) == :error 360 | end 361 | 362 | test "stops all dependencies in the opposite startup order" do 363 | Parent.initialize() 364 | 365 | child1 = start_child!(id: :child1, shutdown_group: :group1) 366 | child2 = start_child!(id: :child2, binds_to: [:child1], shutdown_group: :group2) 367 | child3 = start_child!(id: :child3, binds_to: [:child2]) 368 | child4 = start_child!(id: :child4, shutdown_group: :group1) 369 | child5 = start_child!(id: :child5, shutdown_group: :group2) 370 | start_child!(id: :child6) 371 | 372 | Enum.each([child1, child2, child3, child4, child5], &Process.monitor/1) 373 | 374 | assert {:ok, stopped_children} = Parent.shutdown_child(:child4) 375 | assert Map.keys(stopped_children) == ~w/child1 child2 child3 child4 child5/a 376 | assert [%{id: :child6}] = Parent.children() 377 | 378 | pids = 379 | Enum.map(1..5, fn _ -> 380 | assert_receive {:DOWN, _mref, :process, pid, _reason} 381 | pid 382 | end) 383 | 384 | assert pids == [child5, child4, child3, child2, child1] 385 | end 386 | 387 | test "stops pid-references dependencies" do 388 | Parent.initialize() 389 | 390 | child1 = start_child!(shutdown_group: :group1) 391 | child2 = start_child!(binds_to: [child1], shutdown_group: :group2) 392 | child3 = start_child!(binds_to: [child2]) 393 | child4 = start_child!(shutdown_group: :group1) 394 | child5 = start_child!(shutdown_group: :group2) 395 | child6 = start_child!() 396 | 397 | assert {:ok, stopped_children} = Parent.shutdown_child(child4) 398 | assert Map.keys(stopped_children) == [child1, child2, child3, child4, child5] 399 | assert [%{pid: ^child6}] = Parent.children() 400 | end 401 | 402 | test "stops a non-running dependency" do 403 | Parent.initialize() 404 | start_child!(id: :child1) 405 | start_child!(id: :child2, start: fn -> :ignore end, binds_to: [:child1]) 406 | 407 | assert {:ok, %{child2: %{pid: :undefined}}} = Parent.shutdown_child(:child1) 408 | assert Parent.children() == [] 409 | end 410 | 411 | test "works if a bound child stopped previously" do 412 | Parent.initialize() 413 | start_child!(id: :child1) 414 | start_child!(id: :child2, binds_to: [:child1]) 415 | start_child!(id: :child3, binds_to: [:child1]) 416 | 417 | {:ok, _} = Parent.shutdown_child(:child2) 418 | assert {:ok, stopped_children} = Parent.shutdown_child(:child1) 419 | assert Map.keys(stopped_children) == [:child1, :child3] 420 | end 421 | 422 | test "fails if the parent is not initialized" do 423 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.shutdown_child(1) end 424 | end 425 | end 426 | 427 | describe "restart_child" do 428 | test "restarts the process and returns the new pid" do 429 | Parent.initialize() 430 | child = start_child!(id: :child) 431 | assert Parent.restart_child(:child) == :ok 432 | assert [%{id: :child}] = Parent.children() 433 | refute child_pid!(:child) == child 434 | end 435 | 436 | test "restarts the process referenced by the pid" do 437 | Parent.initialize() 438 | child = start_child!(id: :child) 439 | assert Parent.restart_child(child) == :ok 440 | assert [%{id: :child}] = Parent.children() 441 | refute child_pid!(:child) == child 442 | end 443 | 444 | test "can restart a non-running child" do 445 | Parent.initialize() 446 | start_child!(id: :child, start: fn -> :ignore end) 447 | 448 | assert Parent.restart_child(:child) == :ok 449 | assert [%{id: :child}] = Parent.children() 450 | 451 | # trying once more to verify that a child is successfully reregistered 452 | assert Parent.restart_child(:child) == :ok 453 | assert [%{id: :child}] = Parent.children() 454 | end 455 | 456 | test "preserves startup order" do 457 | Parent.initialize() 458 | child1 = start_child!(id: :child1) 459 | _child2 = start_child!(id: :child2) 460 | child3 = start_child!(id: :child3) 461 | 462 | Parent.restart_child(:child2) 463 | {:ok, child2} = Parent.child_pid(:child2) 464 | assert Enum.map(Parent.children(), & &1.pid) == [child1, child2, child3] 465 | end 466 | 467 | test "also restarts all bound siblings" do 468 | Parent.initialize() 469 | 470 | child1 = start_child!(id: :child1, shutdown_group: :group1) 471 | child2 = start_child!(id: :child2, binds_to: [:child1]) 472 | child3 = start_child!(id: :child3, restart: :temporary, binds_to: [:child2]) 473 | child4 = start_child!(id: :child4, shutdown_group: :group1) 474 | child5 = start_child!(id: :child5, restart: :transient, binds_to: [:child2]) 475 | 476 | child6 = 477 | start_child!(id: :child6, ephemeral?: true, restart: :transient, binds_to: [:child2]) 478 | 479 | child7 = 480 | start_child!(id: :child7, ephemeral?: true, restart: :temporary, binds_to: [:child2]) 481 | 482 | child8 = start_child!(id: :child8) 483 | 484 | assert Parent.restart_child(:child4) == :ok 485 | 486 | refute child_pid!(:child1) == child1 487 | refute child_pid!(:child2) == child2 488 | refute child_pid!(:child3) == child3 489 | refute child_pid!(:child4) == child4 490 | refute child_pid!(:child5) == child5 491 | refute child_pid!(:child6) == child6 492 | refute child_pid!(:child7) == child7 493 | assert child_pid!(:child8) == child8 494 | end 495 | 496 | test "gradually retries child restart if the child fails to start" do 497 | Parent.initialize() 498 | 499 | start_child!(id: :child1) 500 | start_child!(id: :child2, binds_to: [:child1]) 501 | 502 | raise_on_child_start(:child1) 503 | Parent.restart_child(:child1) 504 | assert_receive {:EXIT, _pid, _} 505 | 506 | succeed_on_child_start(:child1) 507 | raise_on_child_start(:child2) 508 | 509 | assert handle_parent_message() == :ignore 510 | assert_receive {:EXIT, _pid, _} 511 | 512 | succeed_on_child_start(:child2) 513 | assert [%{id: :child1}, %{id: :child2}] = Parent.children() 514 | end 515 | 516 | test "fails if the parent is not initialized" do 517 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.restart_child(1) end 518 | end 519 | end 520 | 521 | describe "child termination" do 522 | test "by default causes restart" do 523 | Parent.initialize() 524 | start_child!(id: :child) 525 | provoke_child_termination!(:child) 526 | assert Parent.child?(:child) 527 | end 528 | 529 | test "causes restart if a permanent child stops" do 530 | Parent.initialize() 531 | pid1 = start_child!(id: :child, meta: :meta, restart: :permanent) 532 | provoke_child_termination!(:child, restart: :shutdown) 533 | assert is_pid(child_pid!(:child)) 534 | refute child_pid!(:child) == pid1 535 | end 536 | 537 | test "causes restart when a transient child terminates abnormally" do 538 | Parent.initialize() 539 | start_child!(id: :child, restart: :transient) 540 | provoke_child_termination!(:child) 541 | assert Parent.child?(:child) 542 | end 543 | 544 | test "causes restart when a child is terminated due to a timeout" do 545 | Parent.initialize() 546 | start_child!(id: :child, timeout: 0) 547 | :ignore = handle_parent_message() 548 | assert Parent.child?(:child) 549 | end 550 | 551 | test "doesn't cause restart if a temporary child terminates" do 552 | Parent.initialize() 553 | start_child!(id: :child, restart: :temporary) 554 | provoke_child_termination!(:child) 555 | assert Parent.child_pid(:child) == {:ok, :undefined} 556 | end 557 | 558 | test "doesn't cause restart if a transient child terminates normally" do 559 | Parent.initialize() 560 | start_child!(id: :child, restart: :transient, start: {Task, :start_link, [fn -> :ok end]}) 561 | :ignore = handle_parent_message() 562 | assert Parent.child_pid(:child) == {:ok, :undefined} 563 | end 564 | 565 | test "doesn't cause restart when a child is terminated via `Parent` function" do 566 | Parent.initialize() 567 | start_child!(id: :child) 568 | Parent.shutdown_child(:child) 569 | 570 | refute_receive _ 571 | refute Parent.child?(:child) 572 | end 573 | 574 | test "also restarts temporary bound siblings" do 575 | Parent.initialize() 576 | start_child!(id: :child1, restart: :permanent) 577 | 578 | child2 = 579 | start_child!(id: :child2, restart: :temporary, ephemeral?: false, binds_to: [:child1]) 580 | 581 | child3 = 582 | start_child!(id: :child3, restart: :temporary, ephemeral?: true, binds_to: [:child1]) 583 | 584 | provoke_child_termination!(:child1) 585 | refute child_pid!(:child2) in [:undefined, child2] 586 | refute child_pid!(:child3) in [:undefined, child3] 587 | end 588 | 589 | test "causes bound siblings to be stopped regardless of their restart strategy if the terminated child is not restarted" do 590 | Parent.initialize() 591 | start_child!(id: :child1, restart: :temporary) 592 | start_child!(id: :child2, restart: :transient, binds_to: [:child1]) 593 | start_child!(id: :child3, restart: :permanent, binds_to: [:child1]) 594 | 595 | provoke_child_termination!(:child1) 596 | assert child_pid!(:child1) == :undefined 597 | assert child_pid!(:child2) == :undefined 598 | assert child_pid!(:child3) == :undefined 599 | end 600 | 601 | test "causes bound siblings to be removed regardless of their ephemeral status or restart strategy if the terminated child is not restarted" do 602 | Parent.initialize() 603 | start_child!(id: :child1, restart: :temporary, ephemeral?: true) 604 | start_child!(id: :child2, restart: :transient, binds_to: [:child1]) 605 | start_child!(id: :child3, restart: :permanent, binds_to: [:child1]) 606 | 607 | assert {:stopped_children, stopped_children} = provoke_child_termination!(:child1) 608 | assert Enum.sort(Map.keys(stopped_children)) == ~w/child1 child2 child3/a 609 | assert Parent.children() == [] 610 | end 611 | 612 | test "causes bound ephemeral siblings to be removed if a non-ephemeral terminated child is not restarted" do 613 | Parent.initialize() 614 | start_child!(id: :child1, restart: :temporary) 615 | start_child!(id: :child2, restart: :transient, ephemeral?: true, binds_to: [:child1]) 616 | start_child!(id: :child3, restart: :permanent, ephemeral?: true, binds_to: [:child1]) 617 | start_child!(id: :child4, restart: :permanent, ephemeral?: false, binds_to: [:child1]) 618 | 619 | assert {:stopped_children, stopped_children} = provoke_child_termination!(:child1) 620 | assert Enum.sort(Map.keys(stopped_children)) == ~w/child2 child3/a 621 | 622 | assert [%{id: :child1, pid: :undefined}, %{id: :child4, pid: :undefined}] = 623 | Parent.children() 624 | end 625 | 626 | test "of an anonymous child also takes down anonymous bound siblings" do 627 | Parent.initialize() 628 | 629 | child1 = start_child!(restart: :temporary) 630 | child2 = start_child!(restart: :temporary, binds_to: [child1]) 631 | child3 = start_child!(restart: :temporary, binds_to: [child2]) 632 | 633 | Process.monitor(child2) 634 | Process.monitor(child3) 635 | 636 | provoke_child_termination!(child1) 637 | 638 | assert_receive {:DOWN, _mref, :process, ^child2, :shutdown} 639 | assert_receive {:DOWN, _mref, :process, ^child3, :shutdown} 640 | end 641 | 642 | test "takes down the entire parent on too many restarts" do 643 | Parent.initialize(max_restarts: 2) 644 | 645 | start_child!(id: :child1) 646 | start_child!(id: :child2) 647 | start_child!(id: :child3) 648 | 649 | provoke_child_termination!(:child1) 650 | provoke_child_termination!(:child2) 651 | 652 | log = assert_parent_exit(fn -> provoke_child_termination!(:child3) end, :too_many_restarts) 653 | assert log =~ "[error] Too many restarts in parent process" 654 | assert Parent.children() == [] 655 | end 656 | 657 | test "takes down the entire parent on too many restarts of a single child" do 658 | Parent.initialize(max_restarts: :infinity) 659 | 660 | start_child!(id: :child1, max_restarts: 2, max_seconds: 1) 661 | start_child!(id: :child2) 662 | 663 | provoke_child_termination!(:child1) 664 | provoke_child_termination!(:child1) 665 | 666 | log = assert_parent_exit(fn -> provoke_child_termination!(:child1) end, :too_many_restarts) 667 | assert log =~ "[error] Too many restarts in parent process" 668 | assert Parent.children() == [] 669 | end 670 | 671 | test "doesn't stop parent if max_restarts of the child is infinity" do 672 | Parent.initialize(max_restarts: :infinity) 673 | start_child!(id: :child1, max_restarts: :infinity) 674 | 675 | provoke_child_termination!(:child1) 676 | provoke_child_termination!(:child1) 677 | provoke_child_termination!(:child1) 678 | provoke_child_termination!(:child1) 679 | end 680 | 681 | test "clears recorded restarts after the interval has passed" do 682 | Parent.initialize() 683 | 684 | start_child!(id: :child1, max_restarts: 2, max_seconds: 2) 685 | start_child!(id: :child2) 686 | 687 | provoke_child_termination!(:child1, at: :timer.seconds(0)) 688 | provoke_child_termination!(:child1, at: :timer.seconds(1)) 689 | provoke_child_termination!(:child1, at: :timer.seconds(2)) 690 | end 691 | 692 | test "correctly updates pid-based bindings when the stopped non-ephemeral child is not restarted" do 693 | Parent.initialize() 694 | 695 | child1 = 696 | start_child!( 697 | id: :child1, 698 | restart: :temporary, 699 | start: {Task, :start_link, [fn -> :ok end]} 700 | ) 701 | 702 | start_child!(id: :child2, binds_to: [child1]) 703 | start_child!(id: :child3, binds_to: [child1]) 704 | 705 | :ignore = handle_parent_message() 706 | 707 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 708 | assert Map.keys(stopped_children) == [:child1, :child2, :child3] 709 | end 710 | end 711 | 712 | describe "shutdown_all/1" do 713 | test "terminates all children in the opposite startup order irrespective of bindings" do 714 | Parent.initialize() 715 | 716 | child1 = start_child!(id: :child1, group: :group1) 717 | Process.monitor(child1) 718 | 719 | child2 = start_child!(id: :child2) 720 | Process.monitor(child2) 721 | 722 | child3 = start_child!(id: :child3, group: :group1) 723 | Process.monitor(child3) 724 | 725 | Parent.shutdown_all() 726 | refute_receive {:EXIT, _pid, _reason} 727 | 728 | assert_receive {:DOWN, _mref, :process, pid1, _reason} 729 | assert_receive {:DOWN, _mref, :process, pid2, _reason} 730 | assert_receive {:DOWN, _mref, :process, pid3, _reason} 731 | 732 | assert [pid1, pid2, pid3] == [child3, child2, child1] 733 | end 734 | 735 | test "returns stopped_children that can be passed to return_children/1" do 736 | Parent.initialize() 737 | 738 | start_child!(id: :child1) 739 | start_child!(id: :child2) 740 | start_child!(id: :child3) 741 | 742 | stopped_children = Parent.shutdown_all() 743 | assert Parent.return_children(stopped_children) == :ok 744 | assert Enum.map(Parent.children(), & &1.id) == ~w/child1 child2 child3/a 745 | end 746 | 747 | test "fails if the parent is not initialized" do 748 | assert_raise RuntimeError, "Parent is not initialized", &Parent.shutdown_all/0 749 | end 750 | end 751 | 752 | describe "children/0" do 753 | test "returns child processes" do 754 | Parent.initialize() 755 | assert Parent.children() == [] 756 | 757 | child1 = start_child!(id: :child1, meta: :meta1) 758 | assert Parent.children() == [%{id: :child1, pid: child1, meta: :meta1}] 759 | 760 | child2 = start_child!(id: :child2, meta: :meta2) 761 | 762 | assert Parent.children() == [ 763 | %{id: :child1, pid: child1, meta: :meta1}, 764 | %{id: :child2, pid: child2, meta: :meta2} 765 | ] 766 | 767 | Parent.shutdown_child(:child1) 768 | assert Parent.children() == [%{id: :child2, pid: child2, meta: :meta2}] 769 | end 770 | 771 | test "fails if the parent is not initialized" do 772 | assert_raise RuntimeError, "Parent is not initialized", &Parent.children/0 773 | end 774 | end 775 | 776 | describe "num_children/0" do 777 | test "returns the number of child processes" do 778 | Parent.initialize() 779 | assert Parent.num_children() == 0 780 | 781 | start_child!(id: :child1) 782 | assert Parent.num_children() == 1 783 | 784 | start_child!() 785 | assert Parent.num_children() == 2 786 | 787 | Parent.shutdown_child(:child1) 788 | assert Parent.num_children() == 1 789 | end 790 | 791 | test "fails if the parent is not initialized" do 792 | assert_raise RuntimeError, "Parent is not initialized", &Parent.num_children/0 793 | end 794 | end 795 | 796 | describe "child?/0" do 797 | test "returns true for known children, false otherwise" do 798 | Parent.initialize() 799 | 800 | refute Parent.child?(:child1) 801 | refute Parent.child?(:child2) 802 | 803 | child1 = start_child!(id: :child1) 804 | child2 = start_child!(id: :child2) 805 | 806 | assert Parent.child?(:child1) 807 | assert Parent.child?(child1) 808 | 809 | assert Parent.child?(:child2) 810 | assert Parent.child?(child2) 811 | 812 | Parent.shutdown_child(:child1) 813 | refute Parent.child?(:child1) 814 | refute Parent.child?(child1) 815 | 816 | assert Parent.child?(:child2) 817 | assert Parent.child?(child2) 818 | end 819 | 820 | test "fails if the parent is not initialized" do 821 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.child?(:foo) end 822 | end 823 | end 824 | 825 | describe "child_pid/1" do 826 | test "returns the pid of the given child, error otherwise" do 827 | Parent.initialize() 828 | 829 | child1 = start_child!(id: :child1) 830 | child2 = start_child!(id: :child2) 831 | 832 | assert Parent.child_pid(:child1) == {:ok, child1} 833 | assert Parent.child_pid(:child2) == {:ok, child2} 834 | assert Parent.child_pid(:unknown_child) == :error 835 | 836 | Parent.shutdown_child(:child1) 837 | assert Parent.child_pid(:child1) == :error 838 | assert Parent.child_pid(:child2) == {:ok, child2} 839 | end 840 | 841 | test "can be invoked while the chid is being started" do 842 | Parent.initialize() 843 | test_pid = self() 844 | 845 | child1 = start_child!(id: :child1) 846 | 847 | start_child!( 848 | start: fn -> 849 | send(test_pid, {:child1, Parent.child_pid(:child1)}) 850 | Agent.start_link(fn -> :ok end) 851 | end 852 | ) 853 | 854 | assert_receive {:child1, {:ok, ^child1}} 855 | end 856 | 857 | test "fails if the parent is not initialized" do 858 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.child_pid(:foo) end 859 | end 860 | end 861 | 862 | describe "child_id/1" do 863 | test "returns the id of the given child, error otherwise" do 864 | Parent.initialize() 865 | 866 | child1 = start_child!(id: :child1) 867 | child2 = start_child!(id: :child2) 868 | 869 | assert Parent.child_id(child1) == {:ok, :child1} 870 | assert Parent.child_id(child2) == {:ok, :child2} 871 | assert Parent.child_id(self()) == :error 872 | 873 | Parent.shutdown_child(:child1) 874 | assert Parent.child_id(child1) == :error 875 | assert Parent.child_id(child2) == {:ok, :child2} 876 | end 877 | 878 | test "fails if the parent is not initialized" do 879 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.child_id(self()) end 880 | end 881 | end 882 | 883 | describe "child_meta/1" do 884 | test "returns the meta of the given child, error otherwise" do 885 | Parent.initialize() 886 | 887 | child1 = start_child!(id: :child1, meta: :meta1) 888 | child2 = start_child!(id: :child2, meta: :meta2) 889 | 890 | assert Parent.child_meta(:child1) == {:ok, :meta1} 891 | assert Parent.child_meta(child1) == {:ok, :meta1} 892 | 893 | assert Parent.child_meta(:child2) == {:ok, :meta2} 894 | assert Parent.child_meta(child2) == {:ok, :meta2} 895 | 896 | assert Parent.child_meta(:unknown_child) == :error 897 | 898 | Parent.shutdown_child(:child1) 899 | assert Parent.child_meta(:child1) == :error 900 | assert Parent.child_meta(:child2) == {:ok, :meta2} 901 | end 902 | 903 | test "fails if the parent is not initialized" do 904 | assert_raise RuntimeError, "Parent is not initialized", fn -> Parent.child_meta(:child) end 905 | end 906 | end 907 | 908 | describe "update_child_meta/2" do 909 | test "updates meta of the known child, fails otherwise" do 910 | Parent.initialize() 911 | 912 | start_child!(id: :child1, meta: 1) 913 | child2 = start_child!(id: :child2, meta: 2) 914 | 915 | Parent.update_child_meta(:child2, &(&1 + 1)) 916 | Parent.update_child_meta(child2, &(&1 + 1)) 917 | 918 | assert Parent.child_meta(:child1) == {:ok, 1} 919 | assert Parent.child_meta(:child2) == {:ok, 4} 920 | 921 | Parent.shutdown_child(:child1) 922 | assert Parent.update_child_meta(:child1, & &1) == :error 923 | end 924 | 925 | test "updates meta in registry, fails otherwise" do 926 | Parent.initialize(registry?: true) 927 | start_child!(id: :child1, meta: 1) 928 | Parent.update_child_meta(:child1, &(&1 + 1)) 929 | assert Parent.Client.child_meta(self(), :child1) == {:ok, 2} 930 | end 931 | 932 | test "doesn't affect meta of a reset child" do 933 | Parent.initialize() 934 | 935 | start_child!(id: :child, meta: 1) 936 | Parent.update_child_meta(:child, &(&1 + 1)) 937 | provoke_child_termination!(:child) 938 | 939 | assert Parent.child_meta(:child) == {:ok, 1} 940 | end 941 | 942 | test "fails if the parent is not initialized" do 943 | assert_raise RuntimeError, "Parent is not initialized", fn -> 944 | Parent.update_child_meta(:child, & &1) 945 | end 946 | end 947 | end 948 | 949 | describe "handle_message/1" do 950 | test "handles child termination" do 951 | Parent.initialize() 952 | child1 = start_child!(id: :child1, meta: :meta, restart: :temporary, ephemeral?: false) 953 | child2 = start_child!(id: :child2, meta: :meta, restart: :temporary, ephemeral?: true) 954 | 955 | GenServer.stop(child1) 956 | assert handle_parent_message() == :ignore 957 | assert Parent.child?(:child1) 958 | assert child_pid!(:child1) == :undefined 959 | 960 | GenServer.stop(child2) 961 | assert {:stopped_children, %{child2: _}} = handle_parent_message() 962 | refute Parent.child?(:child2) 963 | end 964 | 965 | test "terminates dependencies if a child stops" do 966 | Parent.initialize() 967 | 968 | {:ok, child1} = start_child(id: :child1, restart: :transient) 969 | {:ok, child2} = start_child(id: :child2, restart: :temporary, binds_to: [:child1]) 970 | {:ok, child3} = start_child(id: :child3, restart: :temporary, binds_to: [:child1]) 971 | {:ok, child4} = start_child(id: :child4, restart: :temporary) 972 | 973 | Enum.each([child2, child3], &Process.monitor/1) 974 | 975 | GenServer.stop(child1) 976 | 977 | assert handle_parent_message() == :ignore 978 | 979 | assert Enum.map(Parent.children(), &{&1.id, &1.pid}) == [ 980 | child1: :undefined, 981 | child2: :undefined, 982 | child3: :undefined, 983 | child4: child4 984 | ] 985 | 986 | assert_receive {:DOWN, _mref, :process, pid1, :shutdown} 987 | assert_receive {:DOWN, _mref, :process, pid2, :shutdown} 988 | assert [pid1, pid2] == [child3, child2] 989 | end 990 | 991 | test "handles child timeout by stopping the child" do 992 | Parent.initialize() 993 | start_child!(id: :child, restart: :temporary, meta: :meta, timeout: 0) 994 | handle_parent_message() 995 | assert Parent.child_pid(:child) == {:ok, :undefined} 996 | end 997 | 998 | test "handles supervisor calls" do 999 | Parent.initialize() 1000 | parent = self() 1001 | child = start_child!(id: :child) 1002 | 1003 | task = 1004 | Task.async(fn -> 1005 | assert :supervisor.which_children(parent) == [{:child, child, :worker, [Agent]}] 1006 | 1007 | assert :supervisor.count_children(parent) == 1008 | [active: 1, specs: 1, supervisors: 0, workers: 1] 1009 | 1010 | assert {:ok, %{id: :child}} = :supervisor.get_childspec(parent, :child) 1011 | assert {:ok, %{id: :child}} = :supervisor.get_childspec(parent, child) 1012 | assert :supervisor.get_childspec(parent, :unknown_child) == {:error, :not_found} 1013 | end) 1014 | 1015 | assert handle_parent_message() == :ignore 1016 | assert handle_parent_message() == :ignore 1017 | assert handle_parent_message() == :ignore 1018 | assert handle_parent_message() == :ignore 1019 | assert handle_parent_message() == :ignore 1020 | 1021 | Task.await(task) 1022 | end 1023 | 1024 | test "which_children correctly handles anonymous children" do 1025 | Parent.initialize() 1026 | parent = self() 1027 | child1 = start_child!() 1028 | child2 = start_child!(id: :child) 1029 | child3 = start_child!() 1030 | 1031 | task = 1032 | Task.async(fn -> 1033 | assert :supervisor.which_children(parent) == [ 1034 | {:undefined, child1, :worker, [Agent]}, 1035 | {:child, child2, :worker, [Agent]}, 1036 | {:undefined, child3, :worker, [Agent]} 1037 | ] 1038 | end) 1039 | 1040 | assert handle_parent_message() == :ignore 1041 | 1042 | Task.await(task) 1043 | end 1044 | 1045 | test "ignores unknown messages" do 1046 | Parent.initialize() 1047 | assert is_nil(Parent.handle_message({:EXIT, self(), :normal})) 1048 | assert is_nil(Parent.handle_message(:unknown_message)) 1049 | end 1050 | 1051 | test "fails if the parent is not initialized" do 1052 | assert_raise RuntimeError, "Parent is not initialized", fn -> 1053 | Parent.handle_message(:foo) 1054 | end 1055 | end 1056 | end 1057 | 1058 | describe "return_children/1" do 1059 | test "starts all stopped children preserving the shutdown order" do 1060 | Parent.initialize() 1061 | start_child!(id: :child1) 1062 | child2 = start_child!(id: :child2) 1063 | start_child!(id: :child3, binds_to: [:child1]) 1064 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 1065 | 1066 | assert Parent.return_children(stopped_children) == :ok 1067 | 1068 | assert [ 1069 | %{id: :child1, pid: child1}, 1070 | %{id: :child2, pid: ^child2}, 1071 | %{id: :child3, pid: child3} 1072 | ] = Parent.children() 1073 | 1074 | Process.monitor(child1) 1075 | Process.monitor(child2) 1076 | Process.monitor(child3) 1077 | 1078 | Parent.shutdown_all() 1079 | 1080 | assert_receive {:DOWN, _mref, :process, pid1, _reason} 1081 | assert_receive {:DOWN, _mref, :process, pid2, _reason} 1082 | assert_receive {:DOWN, _mref, :process, pid3, _reason} 1083 | 1084 | assert [pid1, pid2, pid3] == [child3, child2, child1] 1085 | end 1086 | 1087 | test "tries to restart non-started processes automatically" do 1088 | Parent.initialize() 1089 | 1090 | child1 = start_child!(id: :child1) 1091 | start_child!(id: :child2, binds_to: [child1]) 1092 | child3 = start_child!(id: :child3, binds_to: [child1], shutdown_group: :group1) 1093 | start_child!(id: :child4, shutdown_group: :group1) 1094 | start_child!(id: :child5, binds_to: [child3]) 1095 | start_child!(id: :child6, binds_to: [:child5], shutdown_group: :group1) 1096 | 1097 | # running this multiple times to make sure all bindings are correctly preserved 1098 | for _ <- 1..5 do 1099 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 1100 | 1101 | raise_on_child_start(:child5) 1102 | assert Parent.return_children(stopped_children) == :ok 1103 | assert_receive {:EXIT, _failed_child5, _} 1104 | 1105 | assert Enum.map(Parent.children(), & &1.id) == 1106 | ~w/child1 child2 child3 child4 child5 child6/a 1107 | 1108 | Enum.each(~w/child1 child2/a, &assert(is_pid(child_pid!(&1)))) 1109 | Enum.each(~w/child3 child4 child5 child6/a, &assert(child_pid!(&1) == :undefined)) 1110 | 1111 | succeed_on_child_start(:child5) 1112 | assert handle_parent_message() == :ignore 1113 | 1114 | Enum.each(~w/child3 child4 child5 child6/a, &assert(is_pid(child_pid!(&1)))) 1115 | end 1116 | end 1117 | 1118 | test "treats failed start of an ephemeral temporary child as a crash" do 1119 | Parent.initialize() 1120 | 1121 | start_child!(id: :child1) 1122 | start_child!(id: :child2, binds_to: [:child1], restart: :temporary, ephemeral?: true) 1123 | start_child!(id: :child3, shutdown_group: :group1) 1124 | start_child!(id: :child4, shutdown_group: :group1, binds_to: [:child2]) 1125 | start_child!(id: :child5, binds_to: [:child1]) 1126 | 1127 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 1128 | 1129 | raise_on_child_start(:child2) 1130 | assert Parent.return_children(stopped_children) == :ok 1131 | assert_receive {:EXIT, _, _} 1132 | 1133 | assert Enum.map(Parent.children(), & &1.id) == ~w/child1 child5/a 1134 | 1135 | assert {:stopped_children, stopped_children} = handle_parent_message() 1136 | assert Map.keys(stopped_children) == ~w/child2 child3 child4/a 1137 | end 1138 | 1139 | test "treats failed start of a non-ephemeral temporary child as a crash" do 1140 | Parent.initialize() 1141 | 1142 | start_child!(id: :child1) 1143 | start_child!(id: :child2, binds_to: [:child1], restart: :temporary, ephemeral?: false) 1144 | start_child!(id: :child3, shutdown_group: :group1) 1145 | start_child!(id: :child4, shutdown_group: :group1, binds_to: [:child2]) 1146 | start_child!(id: :child5, binds_to: [:child1]) 1147 | 1148 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 1149 | 1150 | raise_on_child_start(:child2) 1151 | assert Parent.return_children(stopped_children) == :ok 1152 | assert_receive {:EXIT, _, _} 1153 | 1154 | assert Enum.map(Parent.children(), & &1.id) == ~w/child1 child2 child3 child4 child5/a 1155 | Enum.each(~w/child2 child3 child4/a, &assert(Parent.child_pid(&1) == {:ok, :undefined})) 1156 | end 1157 | 1158 | test "is idempotent" do 1159 | Parent.initialize() 1160 | start_child!(id: :child1) 1161 | start_child!(id: :child2, binds_to: [:child1]) 1162 | {:ok, stopped_children} = Parent.shutdown_child(:child1) 1163 | assert Parent.return_children(stopped_children) == :ok 1164 | assert Parent.return_children(stopped_children) == :ok 1165 | assert [%{id: :child1}, %{id: :child2}] = Parent.children() 1166 | end 1167 | 1168 | test "records restart of a terminated child" do 1169 | Parent.initialize() 1170 | start_child!(id: :child1, restart: :temporary) 1171 | start_child!(id: :child2) 1172 | 1173 | start_child!( 1174 | id: :child3, 1175 | binds_to: [:child1], 1176 | restart: :temporary, 1177 | ephemeral?: true, 1178 | max_restarts: 1 1179 | ) 1180 | 1181 | {:stopped_children, stopped_children} = provoke_child_termination!(:child3, at: 0) 1182 | Parent.return_children(stopped_children) 1183 | 1184 | {:stopped_children, stopped_children} = provoke_child_termination!(:child3, at: 0) 1185 | 1186 | log = 1187 | assert_parent_exit( 1188 | fn -> Parent.return_children(stopped_children) end, 1189 | :too_many_restarts 1190 | ) 1191 | 1192 | assert log =~ "[error] Too many restarts in parent process" 1193 | assert Parent.children() == [] 1194 | end 1195 | 1196 | test "correctly returns pid-references dependencies" do 1197 | Parent.initialize() 1198 | 1199 | child1 = start_child!(shutdown_group: :group1) 1200 | child2 = start_child!(binds_to: [child1]) 1201 | start_child!(binds_to: [child2]) 1202 | start_child!(shutdown_group: :group1) 1203 | start_child!(binds_to: [child1]) 1204 | child6 = start_child!() 1205 | 1206 | {:ok, stopped_children} = Parent.shutdown_child(child1) 1207 | Parent.return_children(stopped_children) 1208 | assert Parent.num_children() == 6 1209 | 1210 | Parent.shutdown_child(hd(Parent.children()).pid) 1211 | assert [%{pid: ^child6}] = Parent.children() 1212 | end 1213 | end 1214 | 1215 | defp handle_parent_message, 1216 | do: Parent.handle_message(assert_receive _message) 1217 | 1218 | defp provoke_child_termination!(child_id, opts \\ []) do 1219 | now_ms = Keyword.get(opts, :at, 0) 1220 | Mox.stub(Parent.RestartCounter.TimeProvider.Test, :now_ms, fn -> now_ms end) 1221 | {:ok, pid} = Parent.child_pid(child_id) 1222 | Process.exit(pid, Keyword.get(opts, :reason, :shutdown)) 1223 | handle_parent_message() 1224 | end 1225 | 1226 | defp assert_parent_exit(fun, exit_reason) do 1227 | log = capture_log(fn -> assert catch_exit(fun.()) == exit_reason end) 1228 | assert_receive {:EXIT, _string_io_pid, :normal} 1229 | log 1230 | end 1231 | 1232 | defp start_child(overrides \\ []) do 1233 | overrides = Map.new(overrides) 1234 | id = Map.get(overrides, :id, nil) 1235 | succeed_on_child_start(id) 1236 | 1237 | Parent.start_child( 1238 | %{ 1239 | id: id, 1240 | start: 1241 | {Agent, :start_link, 1242 | [fn -> if id && :ets.lookup(__MODULE__, id) == [{id, true}], do: raise("error") end]} 1243 | }, 1244 | overrides 1245 | ) 1246 | end 1247 | 1248 | defp start_child!(overrides \\ []) do 1249 | {:ok, pid} = start_child(overrides) 1250 | pid 1251 | end 1252 | 1253 | defp child_pid!(child_id) do 1254 | {:ok, pid} = Parent.child_pid(child_id) 1255 | pid 1256 | end 1257 | 1258 | def succeed_on_child_start(id), do: :ets.insert(__MODULE__, {id, false}) 1259 | def raise_on_child_start(id), do: :ets.insert(__MODULE__, {id, true}) 1260 | end 1261 | -------------------------------------------------------------------------------- /test/periodic/logger_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Periodic.LoggerTest do 2 | use ExUnit.Case, async: false 3 | import Parent.CaptureLog 4 | import Periodic.Test 5 | import Periodic.TestHelper 6 | 7 | setup_all do 8 | Periodic.Logger.install(:test_job) 9 | end 10 | 11 | setup do 12 | Logger.configure(level: :debug) 13 | on_exit(fn -> Logger.configure(level: :warn) end) 14 | observe(:test_job) 15 | end 16 | 17 | test "start" do 18 | message = capture_log(fn -> start_job!() end) 19 | assert message =~ ~r/Periodic\(:test_job\): job #PID<.+> started/ 20 | end 21 | 22 | describe "finished" do 23 | test "normal" do 24 | message = 25 | capture_log(fn -> 26 | {_scheduler, job} = start_job!() 27 | finish_job(job) 28 | assert_periodic_event(:test_job, :finished, %{job: ^job}) 29 | end) 30 | 31 | assert message =~ ~r/Periodic\(:test_job\): job #PID<.+> finished, duration=\d+us/ 32 | end 33 | 34 | test "shutdown" do 35 | message = 36 | capture_log(fn -> 37 | {_scheduler, job} = start_job!() 38 | Process.exit(job, :shutdown) 39 | assert_periodic_event(:test_job, :finished, %{job: ^job}) 40 | end) 41 | 42 | assert message =~ ~r/Periodic\(:test_job\): job #PID<.+> shut down/ 43 | end 44 | 45 | test "kill" do 46 | message = 47 | capture_log(fn -> 48 | {_scheduler, job} = start_job!() 49 | Process.exit(job, :kill) 50 | assert_periodic_event(:test_job, :finished, %{job: ^job}) 51 | end) 52 | 53 | assert message =~ ~r/Periodic\(:test_job\): job #PID<.+> killed/ 54 | end 55 | 56 | test "crash" do 57 | message = 58 | capture_log(fn -> 59 | {_scheduler, job} = start_job!() 60 | send(job, {:crash, :some_reason}) 61 | assert_periodic_event(:test_job, :finished, %{job: ^job}) 62 | end) 63 | 64 | assert message =~ ~r/Periodic\(:test_job\): job #PID<.+> exited with reason :some_reason/ 65 | end 66 | end 67 | 68 | test "skipped" do 69 | message = 70 | capture_log(fn -> 71 | {scheduler, _job} = start_job!(on_overlap: :ignore) 72 | tick(scheduler) 73 | assert_periodic_event(:test_job, :skipped, %{scheduler: ^scheduler}) 74 | end) 75 | 76 | assert message =~ "skipped starting the job because the previous instance is still running" 77 | end 78 | 79 | test "stopped_previous" do 80 | message = 81 | capture_log(fn -> 82 | {scheduler, _job} = start_job!(on_overlap: :stop_previous) 83 | tick(scheduler) 84 | end) 85 | 86 | assert message =~ "killed previous job instance, because the new job is about to be started" 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /test/periodic_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PeriodicTest do 2 | use ExUnit.Case, async: true 3 | import Periodic.Test 4 | import Periodic.TestHelper 5 | 6 | setup do 7 | observe(:test_job) 8 | end 9 | 10 | test "auto mode" do 11 | test_pid = self() 12 | Periodic.start_link(every: 1, run: fn -> send(test_pid, :started) end) 13 | assert_receive :started 14 | assert_receive :started 15 | end 16 | 17 | test "regular job execution" do 18 | scheduler = start_scheduler!() 19 | 20 | refute_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 21 | tick(scheduler) 22 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler, job: job}) 23 | assert_receive {:started, ^job} 24 | 25 | refute_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 26 | tick(scheduler) 27 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler, job: job}) 28 | assert_receive {:started, ^job} 29 | end 30 | 31 | test "finished telemetry event" do 32 | {scheduler, job} = start_job!() 33 | finish_job(job) 34 | 35 | assert_periodic_event(:test_job, :finished, %{scheduler: ^scheduler, job: ^job}, %{time: time}) 36 | 37 | assert is_integer(time) and time > 0 38 | end 39 | 40 | describe "on_overlap" do 41 | test "ignore" do 42 | {scheduler, job} = start_job!(on_overlap: :ignore) 43 | 44 | tick(scheduler) 45 | assert_periodic_event(:test_job, :skipped, %{scheduler: ^scheduler, still_running: ^job}) 46 | refute_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 47 | 48 | finish_job(job) 49 | tick(scheduler) 50 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler, job: _job}) 51 | end 52 | 53 | test "stop_previous" do 54 | {scheduler, job} = start_job!(on_overlap: :stop_previous) 55 | 56 | mref = Process.monitor(job) 57 | 58 | tick(scheduler) 59 | assert_receive({:DOWN, ^mref, :process, ^job, :killed}) 60 | assert_periodic_event(:test_job, :stopped_previous, %{scheduler: ^scheduler, pid: ^job}) 61 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 62 | end 63 | end 64 | 65 | test "timeout" do 66 | {_scheduler, job} = start_job!(timeout: 1) 67 | mref = Process.monitor(job) 68 | assert_receive({:DOWN, ^mref, :process, ^job, :timeout}) 69 | end 70 | 71 | describe "initial_delay" do 72 | test "is by default equal to the interval" do 73 | scheduler = start_scheduler!(every: 100) 74 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 75 | end 76 | 77 | test "overrides the first tick interval" do 78 | scheduler = start_scheduler!(every: 100, initial_delay: 0) 79 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 0}) 80 | 81 | tick(scheduler) 82 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 83 | end 84 | end 85 | 86 | describe "delay_mode" do 87 | test "regular" do 88 | scheduler = start_scheduler!(delay_mode: :regular, every: 100) 89 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 90 | 91 | tick(scheduler) 92 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 93 | 94 | tick(scheduler) 95 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 96 | end 97 | 98 | test "shifted" do 99 | scheduler = start_scheduler!(delay_mode: :shifted, every: 100) 100 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 101 | 102 | tick(scheduler) 103 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler, job: job}) 104 | refute_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler}) 105 | 106 | finish_job(job) 107 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler, in: 100}) 108 | end 109 | end 110 | 111 | describe "job shutdown" do 112 | test "timeout when job doesn't trap exits" do 113 | {_scheduler, job} = start_job!(job_shutdown: 10, trap_exit?: false) 114 | mref = Process.monitor(job) 115 | stop_supervised(:test_job) 116 | assert_receive {:DOWN, ^mref, :process, ^job, :shutdown} 117 | end 118 | 119 | test "timeout when job traps exits" do 120 | {_scheduler, job} = start_job!(job_shutdown: 10, trap_exit?: true) 121 | mref = Process.monitor(job) 122 | stop_supervised(:test_job) 123 | assert_receive {:DOWN, ^mref, :process, ^job, :killed} 124 | end 125 | 126 | test "brutal_kill" do 127 | {_scheduler, job} = start_job!(job_shutdown: :brutal_kill, trap_exit?: true) 128 | mref = Process.monitor(job) 129 | stop_supervised(:test_job) 130 | assert_receive {:DOWN, ^mref, :process, ^job, :killed} 131 | end 132 | 133 | test "infinity" do 134 | {scheduler, job} = start_job!(job_shutdown: :infinity, trap_exit?: true) 135 | 136 | mref = Process.monitor(scheduler) 137 | 138 | # Invoking asynchronously because this code blocks. Since the code is invoked from another 139 | # process, we have to use GenServer.stop. 140 | Task.start_link(fn -> GenServer.stop(scheduler) end) 141 | 142 | refute_receive {:DOWN, ^mref, :process, ^scheduler, _} 143 | 144 | send(job, :finish) 145 | assert_receive {:DOWN, ^mref, :process, ^scheduler, _} 146 | end 147 | end 148 | 149 | test "registered name" do 150 | scheduler = start_scheduler!(name: :registered_name) 151 | assert Process.whereis(:registered_name) == scheduler 152 | assert_periodic_event(:test_job, :next_tick, %{scheduler: ^scheduler}) 153 | end 154 | 155 | describe "job guard" do 156 | test "returns true" do 157 | scheduler = start_scheduler!(when: fn -> true end) 158 | tick(scheduler) 159 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 160 | end 161 | 162 | test "returns false" do 163 | scheduler = start_scheduler!(when: fn -> false end) 164 | tick(scheduler) 165 | refute_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 166 | end 167 | 168 | test "can be specified as mfa" do 169 | scheduler = start_scheduler!(when: {:erlang, :not, [true]}) 170 | tick(scheduler) 171 | refute_periodic_event(:test_job, :started, %{scheduler: ^scheduler}) 172 | end 173 | end 174 | 175 | describe "tick with `wait_job?: false`" do 176 | test "returns error if the scheduled is in auto mode" do 177 | scheduler = start_scheduler!(mode: :auto) 178 | assert tick(scheduler) == {:error, :not_in_manual_mode} 179 | end 180 | end 181 | 182 | describe "tick with `wait_job?: true`" do 183 | test "returns when the process stops" do 184 | captured_output = 185 | ExUnit.CaptureIO.capture_io(fn -> 186 | scheduler = 187 | start_scheduler!( 188 | run: fn -> 189 | Process.sleep(100) 190 | IO.puts("some output") 191 | end 192 | ) 193 | 194 | assert sync_tick(scheduler) == {:ok, :normal} 195 | end) 196 | 197 | assert captured_output == "some output\n" 198 | end 199 | 200 | test "returns error if the job is not started" do 201 | scheduler = start_scheduler!(when: fn -> false end) 202 | assert sync_tick(scheduler) == {:error, :job_not_started} 203 | end 204 | 205 | test "returns error if the scheduled is in auto mode" do 206 | scheduler = start_scheduler!(mode: :auto) 207 | assert sync_tick(scheduler) == {:error, :not_in_manual_mode} 208 | end 209 | 210 | test "raises on timeout" do 211 | scheduler = start_scheduler!(run: fn -> Process.sleep(:infinity) end) 212 | assert {:timeout, _} = catch_exit(sync_tick(scheduler, 0)) 213 | end 214 | end 215 | end 216 | -------------------------------------------------------------------------------- /test/support/capture_log.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.CaptureLog do 2 | def capture_log(opts \\ [], fun) do 3 | :global.trans({:capture_log, self()}, fn -> 4 | ExUnit.CaptureLog.capture_log(opts, fun) 5 | end) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /test/support/periodic/test_helper.ex: -------------------------------------------------------------------------------- 1 | defmodule Periodic.TestHelper do 2 | import Periodic.Test 3 | import ExUnit.Assertions 4 | 5 | def start_scheduler!(opts \\ []) do 6 | job_opts = Keyword.take(opts, [:trap_exit?]) 7 | 8 | defaults = [ 9 | id: :test_job, 10 | telemetry_id: :test_job, 11 | every: 1, 12 | mode: :manual, 13 | run: instrumented_job(job_opts) 14 | ] 15 | 16 | ExUnit.Callbacks.start_supervised!({Periodic, Keyword.merge(defaults, opts)}) 17 | end 18 | 19 | def start_job!(opts \\ []) do 20 | scheduler = start_scheduler!(opts) 21 | tick(scheduler) 22 | assert_periodic_event(:test_job, :started, %{scheduler: ^scheduler, job: job}) 23 | {scheduler, job} 24 | end 25 | 26 | def finish_job(job) do 27 | mref = Process.monitor(job) 28 | send(job, :finish) 29 | assert_receive {:DOWN, ^mref, :process, ^job, _}, 100 30 | :ok 31 | end 32 | 33 | defp instrumented_job(job_opts) do 34 | test_pid = self() 35 | 36 | fn -> 37 | Process.flag(:trap_exit, Keyword.get(job_opts, :trap_exit?, false)) 38 | send(test_pid, {:started, self()}) 39 | 40 | receive do 41 | :finish -> :ok 42 | {:crash, reason} -> exit(reason) 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /test/support/test_server.ex: -------------------------------------------------------------------------------- 1 | defmodule Parent.TestServer do 2 | use Parent.GenServer 3 | 4 | def start_link({initializer, opts}), 5 | do: Parent.GenServer.start_link(__MODULE__, initializer, opts) 6 | 7 | def call(pid, fun), do: GenServer.call(pid, fun) 8 | 9 | def cast(pid, fun), do: GenServer.cast(pid, fun) 10 | 11 | def send(pid, fun), do: Kernel.send(pid, fun) 12 | 13 | @impl GenServer 14 | def init(initializer), do: {:ok, initializer.()} 15 | 16 | @impl GenServer 17 | def handle_call(fun, _from, state) do 18 | {response, state} = fun.(state) 19 | {:reply, response, state} 20 | end 21 | 22 | @impl GenServer 23 | def handle_cast(fun, state), do: {:noreply, fun.(state)} 24 | 25 | @impl GenServer 26 | def handle_info(fun, state) when is_function(fun), do: {:noreply, fun.(state)} 27 | def handle_info(_other, state), do: {:noreply, state} 28 | end 29 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------