├── src
    ├── sups_app.erl
    ├── sups.app.src
    ├── sups.erl
    ├── sups_worker_sup.erl
    ├── sups_db_sup.erl
    ├── sups_supersup.erl
    ├── sups_worker.erl
    ├── sups_db_worker.erl
    └── sups_lib.erl
├── rebar.config
├── .gitignore
├── test
    ├── prop_sups.erl
    └── sups_statem.erl
├── README.md
└── LICENSE


/src/sups_app.erl:
--------------------------------------------------------------------------------
1 | -module(sups_app).
2 | -export([start/2, stop/1]).
3 | 
4 | start(_Type, _Args) -> sups_supersup:start_link().
5 | 
6 | stop(_) -> ok.
7 | 
8 | 


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {plugins, [rebar3_proper]}.
2 | 
3 | {profiles, [
4 |     {test, [
5 |         {erl_opts, [nowarn_export_all, {parse_transform, lager_transform}]},
6 |         {deps, [proper, lager]}
7 |     ]}
8 | ]}.
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .rebar3
 2 | _*
 3 | .eunit
 4 | *.o
 5 | *.beam
 6 | *.plt
 7 | *.swp
 8 | *.swo
 9 | .erlang.cookie
10 | ebin
11 | log
12 | erl_crash.dump
13 | .rebar
14 | logs
15 | _build
16 | .idea
17 | *.iml
18 | rebar3.crashdump
19 | 


--------------------------------------------------------------------------------
/src/sups.app.src:
--------------------------------------------------------------------------------
 1 | {application, sups,
 2 |  [{description, "An OTP library"},
 3 |   {vsn, "0.1.0"},
 4 |   {registered, []},
 5 |   {mod, {sups_app, []}},
 6 |   {applications,
 7 |    [kernel,
 8 |     stdlib
 9 |    ]},
10 |   {env,[]},
11 |   {modules, []},
12 | 
13 |   {maintainers, []},
14 |   {licenses, ["Apache 2.0"]},
15 |   {links, []}
16 |  ]}.
17 | 


--------------------------------------------------------------------------------
/src/sups.erl:
--------------------------------------------------------------------------------
 1 | -module(sups).
 2 | 
 3 | %% API exports
 4 | -export([]).
 5 | 
 6 | %%====================================================================
 7 | %% API functions
 8 | %%====================================================================
 9 | 
10 | 
11 | %%====================================================================
12 | %% Internal functions
13 | %%====================================================================
14 | 


--------------------------------------------------------------------------------
/src/sups_worker_sup.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_worker_sup).
 2 | -export([start_link/0, init/1]).
 3 | -behaviour(supervisor).
 4 | 
 5 | start_link() -> supervisor:start_link(?MODULE, []).
 6 | 
 7 | init([]) ->
 8 |     {ok, {#{strategy => one_for_one, intensity => 5, period => 10},
 9 |           [#{id => worker1,
10 |              start => {sups_worker, start_link, []},
11 |              restart => permanent, modules => [sups_worker]}]
12 |     }}.
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/src/sups_db_sup.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_db_sup).
 2 | -export([start_link/0, init/1]).
 3 | -behaviour(supervisor).
 4 | 
 5 | start_link() -> supervisor:start_link(?MODULE, []).
 6 | 
 7 | init([]) ->
 8 |     put(sups_tags, [db]),
 9 |     {ok, {#{strategy => one_for_one, intensity => 10, period => 1},
10 |           [#{id => worker1,
11 |              start => {sups_db_worker, start_link, []},
12 |              restart => permanent,
13 |              type => worker,
14 |              modules => [sups_db_worker]}]
15 |     }}.
16 | 
17 | 


--------------------------------------------------------------------------------
/src/sups_supersup.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_supersup).
 2 | -export([start_link/0, init/1]).
 3 | -behaviour(supervisor).
 4 | 
 5 | start_link() -> supervisor:start_link(?MODULE, []).
 6 | 
 7 | init([]) ->
 8 |     SupFlags = #{strategy => one_for_one,
 9 |                  intensity => 10,
10 |                  period => 1},
11 |     ChildSpecs = [
12 |         #{id => db_sup,
13 |           start => {sups_db_sup, start_link, []},
14 |           restart => permanent,
15 |           type => supervisor,
16 |           modules => [sups_db_sup]},
17 |         #{id => workers_sup,
18 |           start => {sups_worker_sup, start_link, []},
19 |           type => supervisor,
20 |           modules => [sups_worker_sup]}
21 |     ],
22 |     {ok, {SupFlags, ChildSpecs}}.
23 | 


--------------------------------------------------------------------------------
/src/sups_worker.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_worker).
 2 | -behaviour(gen_server).
 3 | -export([start_link/0]).
 4 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
 5 | 
 6 | start_link() ->
 7 |     gen_server:start_link(?MODULE, [], []).
 8 | 
 9 | init([]) ->
10 |     self() ! req,
11 |     {ok, undefined}.
12 | 
13 | handle_call(_, _From, State) ->
14 |     {noreply, State}.
15 | 
16 | handle_cast(_, State) ->
17 |     {noreply, State}.
18 | 
19 | handle_info(req, State) ->
20 |     case sups_db_worker:req(req, infinity) of
21 |         {ok, _} -> ok;
22 |         {error, disconnected} -> retry_later
23 |     end,
24 |     self() ! req,
25 |     {noreply, State}.
26 | 
27 | terminate(_, _) ->
28 |     ok.
29 | 
30 | %    case sups_db_worker:req(req, infinity) of
31 | %        {error, disconnected} -> ignore;
32 | %        {ok,_} -> ok % good! request went through!
33 | %    end,


--------------------------------------------------------------------------------
/src/sups_db_worker.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_db_worker).
 2 | -behaviour(gen_server).
 3 | -export([start_link/0, req/2]).
 4 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
 5 | 
 6 | start_link() ->
 7 |     gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
 8 | 
 9 | req(Req, Timeout) ->
10 |     gen_server:call(?MODULE, {req, Req}, Timeout).
11 | 
12 | init([]) ->
13 |     put(sups_tags, [db]),
14 |     {ok, connected}.
15 | 
16 | handle_call({req, _}, _From, disconnected = State) ->
17 |     {reply, {error, disconnected}, State};
18 | handle_call({req, Req}, _From, connected = State) ->
19 |     timer:sleep(500),
20 |     {reply, {ok, Req}, State};
21 | handle_call(disconnect, _From, _) ->
22 |     {reply, ok, disconnected};
23 | handle_call(connect, _From, _) ->
24 |     {reply, ok, connected}.
25 | 
26 | handle_cast(_, State) ->
27 |     {noreply, State}.
28 | 
29 | handle_info(_, State) ->
30 |     {noreply, State}.
31 | 
32 | terminate(_, _) ->
33 |     ok.
34 | 


--------------------------------------------------------------------------------
/test/prop_sups.erl:
--------------------------------------------------------------------------------
 1 | -module(prop_sups).
 2 | -include_lib("proper/include/proper.hrl").
 3 | 
 4 | prop_check_tree() ->
 5 |     ?FORALL(Cmds, commands(sups_statem),
 6 |         begin
 7 |             %% Pre
 8 |             silence_logs(),
 9 |             {ok, Apps} = application:ensure_all_started(sups),
10 |             %% Tests
11 |             {History, State, Result} = run_commands(sups_statem, Cmds),
12 |             %% Post
13 |             [application:stop(App) || App <- Apps],
14 |             %% Reporting
15 |             ?WHENFAIL(io:format("History: ~p~nState: ~p~nResult: ~p~n",
16 |                                 [History,State,Result]),
17 |                       collect(bucket(length(Cmds), 10),
18 |                               Result =:= ok))
19 |         end).
20 | 
21 | bucket(N, M) ->
22 |     Base = N div M,
23 |     {Base*M, (Base+1)*M}.
24 | 
25 | silence_logs() ->
26 |     application:load(lager),
27 |     application:set_env(lager, handlers, []),
28 |     application:ensure_all_started(lager).


--------------------------------------------------------------------------------
/test/sups_statem.erl:
--------------------------------------------------------------------------------
 1 | -module(sups_statem).
 2 | -include_lib("proper/include/proper.hrl").
 3 | -compile(export_all).
 4 | -define(APPS, [sups]).
 5 | 
 6 | initial_state() -> undefined.
 7 | 
 8 | command(undefined) ->
 9 |     {call, sups_lib, init_state, [?APPS]};
10 | command(State) ->
11 |     oneof([
12 |         {call, sups_lib, mock_success,
13 |          [State, fun mock_db_call/0, fun unmock_db_call/0, ?APPS]},
14 |         {call, sups_lib, mark_as_dead,
15 |          [State, non_neg_integer(), [{not_tagged, db}], ?APPS]}
16 |     ]).
17 | 
18 | precondition(undefined, {call, _, init_state, _}) ->
19 |     true;
20 | precondition(State, {call, _, mock_success, _}) when State =/= undefined ->
21 |     true;
22 | precondition(State, {call, _, mark_as_dead, _}) when State =/= undefined ->
23 |     true;
24 | precondition(_, _) ->
25 |     false.
26 | 
27 | postcondition(_, {call, _, init_state, _}, _Apptree) ->
28 |     true;
29 | postcondition({OldTree, _Deaths}, {call, _, mark_as_dead, _}, {NewTree,NewDeaths}) ->
30 |     sups_lib:validate_mark_as_dead(OldTree, NewTree, NewDeaths);
31 | postcondition({OldTree, _Deaths}, {call, _, mock_success, _}, {NewTree,NewDeaths}) ->
32 |     sups_lib:validate_mock_success(OldTree, NewTree, NewDeaths).
33 | 
34 | next_state(undefined, NewState, {call, _, init_state, _}) ->
35 |     NewState;
36 | next_state(_State, NewState, {call, _, mock_success, _}) ->
37 |     NewState;
38 | next_state(_State, NewState, {call, _, mark_as_dead, _}) ->
39 |     NewState.
40 | 
41 | 
42 | %% This is actually using a stub because the demo didn't quite like me flipping
43 | %% the switch super hard on a central process through meck and lotsa code loading.
44 | mock_db_call() ->
45 |     gen_server:call(sups_db_worker, disconnect, infinity),
46 |     100.
47 | 
48 | unmock_db_call() ->
49 |     gen_server:call(sups_db_worker, connect, infinity),
50 |     ok.
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | sups
 2 | =====
 3 | 
 4 | Experimental code library to be used with PropEr or Quickcheck to validate that an OTP application is properly implemented to match the structure of its supervision tree.
 5 | 
 6 | Basically, in a scenario where the supervision structure encodes the expected failure semantics of a program, this library can be used to do fault-injection and failure simulation to see that the failures taking place within the program actually respect the model exposed by the supervision tree. If random process kills or simulated faults end up killing unexpected supervision subtrees, it is assumed that the caller processes have weaknesses in how they handle events.
 7 | 
 8 | This documentation is still a work in progress.
 9 | 
10 | Calls
11 | -----
12 | 
13 | The API needs rework to be a bit more transparent. This doc needs rework to be a bit more helpful.
14 | 
15 | - `sups_lib:init_state()` and `sups_lib:init_state([WhiteList])`: takes a snapshot of the supervision tree of currently running applications. Should be called before any other function of this library in a PropEr model to give it a baseline data set against which to run.
16 | - `mark_as_dead(State, N, Filters, WhiteList)` where `N` should be an integer picked randomly by the test framework. See the _Filters_ section for filters. This function kills a random process in the supervision tree, and makes guesses (based on the tree structure) as to what processes should be expected to die along with it.
17 | - `mock_success(State, MockFun, UnmockFun, WhiteList)` takes two functions: the first one of the form `fun() -> DoSomeMocking, IntegerValue end`, where you can set up any mocking you want, and `IntegerValue` tells the system how long to sleep (in milliseconds) before calling the unmocking function (`fun() -> Whatever end`), returning the system to normal.
18 | - `validate_mark_as_dead(OldTree, NewTree, DeadList) -> Bool` (TODO: rework to use `State`) can be used as a postcondition to validate that the right processes are living or dead in the application.
19 | - `validate_mock_success(OldTree, NewTree, DeadList) -> Bool` (TODO: rework to use `State`) can be used as a postcondition to validate that no unexpected processes died during fault injection.
20 | 
21 | Other functions are exported in `sups_lib` to let you implement custom validation.
22 | 
23 | Filters
24 | -------
25 | 
26 | Zero of more filters can be used in a list:
27 | 
28 | - `{named, Atom}`: only kill processes with that given name (in the native registry)
29 | - `{not_named, Atom}`: only kill processes aside from a known named one
30 | - `{tagged, Term}`: only kill processes that have an entry of the form `put(sups_tags, [Term])` in their process dictionary
31 | - `{not_tagged, Term}`: only kill processes that don't have an entry of the form `put(sups_tags, [Term])` in their process dictionary
32 | 
33 | How It Works
34 | ------------
35 | 
36 | By building a supervision tree data structure with all annotations, we can create an integer `N` through a regular PropEr or QuickCheck generator that is applied to the tree to denote a specific node. The count starts depth-first, from the right-most child to the leftmost child (meaning that by default shrinking rules, we start by killing newer processes than older and more critical ones).
37 | 
38 | This numeric value is adapted according to filters and whatnot, and since it relies on the shape of the tree rather than the processes it contains, it should allow proper Shrinking to work fine.
39 | 
40 | On a process kill, we analyze the structure of the tree and supervision structure, maintain a list of processes we know should have died, and use it to resolve what the actual tree should be doing as a model.
41 | 
42 | Example
43 | -------
44 | TODO
45 | 
46 | Actually the test code for this lib (`rebar3 proper`)
47 | 
48 | Caveats
49 | -------
50 | 
51 | Currently, the system does not track nor model unexpected worker faults in remote subtrees (only local ones), and so those may end up impacting tolerance rates of other supervisors and lower the accuracy of the model. Not too sure if this becomes a problem in practice or not.
52 | 
53 | The system must be running under constant simulation load to be realistic.
54 | 
55 | The sleeping / waiting timer for propagation is a bit ad hoc and requires tweaking
56 | 
57 | Not seen enough testing with real world apps.
58 | 
59 | Roadmap
60 | -------
61 | 
62 | - Fix arguments to functions
63 | - Fix app/demo structure
64 | - Rename mocking functions to be related to fault injection
65 | - Write tests instead of just running them
66 | - See if this holds up in real world projects


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright 2018, Fred Hebert <mononcqc@ferd.ca>.
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.
191 | 
192 | 


--------------------------------------------------------------------------------
/src/sups_lib.erl:
--------------------------------------------------------------------------------
  1 | -module(sups_lib).
  2 | -export([find_supervisors/0, find_supervisors/1, init_state/0, init_state/1, extract_dead/1,
  3 |          mark_as_dead/4, mock_success/4,
  4 |          validate_mark_as_dead/3, validate_mock_success/3,
  5 |          dead_as_expected/2, sups_still_living/3]).
  6 | 
  7 | -type strategy() :: one_for_one | simple_one_for_one | rest_for_one | one_for_all.
  8 | -type intensity() :: non_neg_integer().
  9 | -type period() :: pos_integer().
 10 | -type restart() :: permanent | transient | temporary. 
 11 | -type attr() :: {term(), term()}.
 12 | -type worker() :: {worker | non_otp_sup, pid(), [attr()]}.
 13 | -type sup() :: {strategy(), pid(), {intensity(), period()}, [suptree()], [attr()]}.
 14 | -type suptree() :: [{restart(), worker() | sup()}].
 15 | -type app() :: atom().
 16 | -type apptree() :: [{app(), [suptree()]}].
 17 | -type death_event() :: {dead|child_dead, pid(), stamp()}.
 18 | -type stamp() :: integer().
 19 | -type mockfun() :: fun(() -> pos_integer()). % returns sleep time
 20 | -type unmockfun() :: fun(() -> ok). 
 21 | -type filter() :: {named|not_named, atom()} | {tagged|not_tagged, term()}.
 22 | 
 23 | -export_type([apptree/0]).
 24 | 
 25 | %%%%%%%%%%%%%%
 26 | %%% PUBLIC %%%
 27 | %%%%%%%%%%%%%%
 28 | 
 29 | %% @doc find all the supervisors in a running system
 30 | -spec find_supervisors() -> apptree().
 31 | find_supervisors() -> find_supervisors([]).
 32 | 
 33 | %% @doc find all the supervisors in a running system within a
 34 | %% list of whitelisted applications. An empty list means all
 35 | %% apps are scanned.
 36 | -spec find_supervisors([atom()]) -> apptree().
 37 | find_supervisors(Whitelist) ->
 38 |     [{App, [{permanent, dig_sup(P)}]}
 39 |      || {App,P} <- root_sups(),
 40 |         Whitelist =:= [] orelse lists:member(App, Whitelist)].
 41 | 
 42 | %% @doc wrapper to initialize the state in a PropEr statem test so that other functions get the
 43 | %% right state.
 44 | -spec init_state() -> {apptree(), []}.
 45 | init_state() -> {find_supervisors(), []}.
 46 | 
 47 | %% @doc wrapper to initialize the state in a PropEr statem test so that other functions get the
 48 | %% right state. Takes a whitelist of applications to damage and test.
 49 | -spec init_state([atom()]) -> {apptree(), []}.
 50 | init_state(WhiteList) -> {find_supervisors(WhiteList), []}.
 51 | 
 52 | %% @doc from a list of death events, extract the pids that are definitely dead
 53 | %% under the form of a set for quick matching
 54 | -spec extract_dead([death_event()]) -> sets:set(pid()).
 55 | extract_dead(List) -> sets:from_list([Pid || {dead, Pid, _} <- List]).
 56 | 
 57 | %% @doc Takes in an app tree with all the related deaths seen so far,
 58 | %% along with a random number `N' that identifies what should die,
 59 | %% and a whitelist of applications to look into to kill stuff in.
 60 | %% Then, the call will go inside the tree and:
 61 | %%
 62 | %% 1. find how many processes are in the tree
 63 | %% 2. mark them with numbers 0..M based on position (implicit)
 64 | %% 3. mark the pid or supervisor at M-N rem M as dead (prioritize workers at first)
 65 | %% 4. propagate expected status to other supervisors based on tolerance
 66 | %% 5. kill the actual process
 67 | %% 6. wait a few milliseconds for propagation (arbitrary)
 68 | %% 7. take a snapshot of the program tree and compare with the old one.
 69 | %% @end
 70 | %% @TODO make the millisecond wait for propagation more solid
 71 | -spec mark_as_dead({apptree(), [death_event()]}, non_neg_integer(), [filter()], [atom()]) ->
 72 |         {apptree(), [death_event()]}.
 73 | mark_as_dead({Tree, Deaths}, N, Filters, Whitelist) when is_list(Tree) ->
 74 |     %% 1. find how many procs are in the tree,
 75 |     M = count_procs(Tree, Filters),
 76 |     mark_as_dead({Tree, Deaths}, N, M, Filters, Whitelist).
 77 | 
 78 | %% @doc runs a mocked bit of code that can simulate some sort of
 79 | %% failure or return value for an arbitrary period of time
 80 | %% and then reverts it.
 81 | %% A healthy supervision tree should be coming back, with no supervisor
 82 | %% failures in it.
 83 | -spec mock_success({apptree(), [death_event()]}, mockfun(), unmockfun(), [atom()]) ->
 84 |         {apptree(), [death_event()]}.
 85 | mock_success({Tree, Deaths}, Mock, Unmock, Whitelist) when is_list(Tree) ->
 86 |     Sleep = Mock(),
 87 |     timer:sleep(Sleep),
 88 |     Unmock(),
 89 |     NewTree = find_supervisors(Whitelist),
 90 |     {NewTree, Deaths}.
 91 | 
 92 | %% @doc Recommended validation helper for `mark_as_dead' function;
 93 | %% checks that the processes that were expected to die are actually gone,
 94 | %% and that the supervisors in unrelated subtrees are unaffected. This should
 95 | %% capture unhandled expected faults in subtrees.
 96 | %% Outputs the old tree, the new tree, and the expected missing processes in
 97 | %% case a counter-example.
 98 | -spec validate_mark_as_dead(apptree(), apptree(), [death_event()]) -> boolean().
 99 | validate_mark_as_dead(OldTree, NewTree, NewDeaths) ->
100 |     MustBeMissing = extract_dead(NewDeaths),
101 |     Res = dead_as_expected(NewTree, MustBeMissing)
102 |     andalso sups_still_living(OldTree, NewTree, MustBeMissing),
103 |     case Res of
104 |         true ->
105 |             true;
106 |         false ->
107 |             io:format("Old: ~p~nNew: ~p~nDead: ~p~n",
108 |                       [OldTree, NewTree, sets:to_list(MustBeMissing)]),
109 |             false
110 |     end.
111 | 
112 | %% @doc Recommended validation helper for `mock_success' function;
113 | %% Checks that no supervisor has unexpectedly died, which would capture
114 | %% a massive failure subsequent to a fault injection that would have been
115 | %% considered survivable.
116 | -spec validate_mock_success(apptree(), apptree(), [death_event()]) -> boolean().
117 | validate_mock_success(OldTree, NewTree, NewDeaths) ->
118 |     %% Should not see any deaths on a successful call.
119 |     MustBeMissing = extract_dead(NewDeaths),
120 |     sups_still_living(OldTree, NewTree, MustBeMissing).
121 | 
122 | %% @doc Takes a supervision tree model and ensures that none of the
123 | %% processes in `Set' are to be found in it.
124 | -spec dead_as_expected(apptree(), sets:set(pid())) -> boolean().
125 | dead_as_expected([], _) -> true;
126 | dead_as_expected([{_Restart, noproc} | T], Set) ->
127 |     dead_as_expected(T, Set);
128 | dead_as_expected([{_Restart, {_Type, Pid, _Attrs}} | T], Set) -> % worker
129 |     (not sets:is_element(Pid, Set)) andalso dead_as_expected(T, Set);
130 | dead_as_expected([{_Restart, {_, Pid, _, Children, _Attrs}} | T], Set) -> % sup
131 |     (not sets:is_element(Pid, Set))
132 |     andalso dead_as_expected(Children, Set)
133 |     andalso dead_as_expected(T, Set);
134 | dead_as_expected([{_App, Sup}|T], Set) when is_list(Sup) -> % app
135 |     dead_as_expected(Sup, Set) andalso dead_as_expected(T, Set).
136 | 
137 | %% @doc compares two supervision trees (an old one and a newer one) and a set of
138 | %% pids that are expected to be dead, and makes sure that the new supervision tree
139 | %% does contain all of the supervisors that were in the old tree and should
140 | %% not have died according to the model.
141 | -spec sups_still_living(apptree(), apptree(), sets:set(pid())) -> boolean().
142 | sups_still_living(Old, New, ShouldBeDead) ->
143 |     OldSupPids = supervisor_pids(Old),
144 |     NewSupPids = supervisor_pids(New),
145 |     MustLive = OldSupPids -- sets:to_list(ShouldBeDead),
146 |     lists:all(fun(Pid) -> lists:member(Pid, NewSupPids) end, MustLive).
147 | 
148 | %%%%%%%%%%%%%%%
149 | %%% PRIVATE %%%
150 | %%%%%%%%%%%%%%%
151 | 
152 | %%% DIG WITHIN A SUPERVISOR
153 | dig_sup(Pid) ->
154 |     try sys:get_state(Pid) of
155 |         {state, _Name, Strategy, Children, _Dynamics,
156 |          Intensity, Period, _Restarts, _DynamicRestarts,
157 |          _Mod, _Args} ->
158 |             Attrs = dig_attrs(Pid),
159 |             {Strategy, Pid, {Intensity, Period}, dig_children(Children, Pid), Attrs};
160 |         _Other ->
161 |             {non_otp_supervisor, Pid, dig_attrs(Pid)}
162 |     catch
163 |         exit:{noproc,_} -> noproc
164 |     end.
165 | 
166 | dig_children([{child, undefined, _Name, _MFA, Restart, _Kill, worker, _Type}], Parent) ->
167 |     %% Simple one for one worker
168 |     Children = supervisor:which_children(Parent),
169 |     [{Restart, {worker, Pid, dig_attrs(Pid)}} || {_,Pid,_,_} <- Children];
170 | dig_children([{child, undefined, _Name, _MFA, Restart, _Kill, supervisor, _Type}], Parent) ->
171 |     Children = supervisor:which_children(Parent),
172 |     [{Restart, handle_dig_result(dig_sup(Pid))} || {_,Pid,_,_} <- Children];
173 | dig_children(Children, _Parent) ->
174 |     dig_children_(Children).
175 | 
176 | dig_children_([]) -> [];
177 | dig_children_([{child, Pid, _Name, _MFA, Restart, _Kill, worker, _Type} | T]) ->
178 |     [{Restart, {worker, Pid, dig_attrs(Pid)}} | dig_children_(T)];
179 | dig_children_([{child, Pid, _Name, _MFA, Restart, _Kill, supervisor, _} | T]) ->
180 |     [{Restart, handle_dig_result(dig_sup(Pid))} | dig_children_(T)].
181 | 
182 | handle_dig_result({non_otp_supervisor, Pid, Attrs}) -> {non_otp_sup, Pid, Attrs};
183 | handle_dig_result(noproc) -> noproc;
184 | handle_dig_result(Res) -> Res.
185 | 
186 | %% @private find process attributes that can be used to filter processes in
187 | %% or out of the kill process
188 | -spec dig_attrs(pid()) -> [attr()].
189 | dig_attrs(Pid) ->
190 |     [{_, Name}, {_, PDict}] = process_info(Pid, [registered_name, dictionary]),
191 |     [{name, Name} || Name =/= []]
192 |     ++ [{tag, T} || T <- proplists:get_value(sups_tags, PDict, [])].
193 | 
194 | root_sups() ->
195 |     RunningApps = proplists:get_value(running, application:info()),
196 |     Apps = [{App, Pid} || {App, Pid} <- RunningApps, is_pid(Pid)],
197 |     [{App, P} ||
198 |           {App, MasterOuter} <- Apps,
199 |           {links, MasterInners} <- [process_info(MasterOuter, links)],
200 |           M <- MasterInners,
201 |           {_,{application_master,start_it,4}} <- [process_info(M, initial_call)],
202 |           {links, Links} <- [process_info(M, links)],
203 |           P <- Links,
204 |           {supervisor,_,_} <- [proc_lib:translate_initial_call(P)]].
205 | 
206 | 
207 | %%% MARK AS DEAD COMPLEX STUFF %%%
208 | 
209 | %% @private mark_as_dead continuation.
210 | %% @TODO: fix the logging for the last process left maybe
211 | -spec mark_as_dead({apptree(), [death_event()]}, non_neg_integer(), non_neg_integer(), [filter()], [atom()]) ->
212 |         {apptree(), [death_event()]}.
213 | mark_as_dead(State, _, 0, _, _) ->
214 |     io:format("Null case, supervisor tree is gone or only root left~n", []),
215 |     State;
216 | mark_as_dead({Tree, Deaths}, N, Count, Filters, Whitelist) ->
217 |     M = Count-1,
218 |     %% 2. mark them with numbers 0..M based on position (implicit)
219 |     %% 3. mark the pid or supervisor at M-N rem M as dead (prioritize workers at first)
220 |     ChosenN = M - (N rem M),
221 |     %% 4. propagate expected status to other supervisors based on tolerance
222 |     {Pid, NewDeaths} = propagate_death(Tree, Deaths, ChosenN, Filters),
223 |     %% 5. kill the actual process
224 |     kill_and_wait(Pid), % should this be conditional in case a proc choice failed?
225 |     %% 6. wait a few milliseconds for propagation
226 |     DeadSleep = lists:sum([case Dead of  % TODO: tweak
227 |                             dead -> 150;
228 |                             child_dead -> 75
229 |                            end || {Dead, _, _} <- NewDeaths]),
230 |     timer:sleep(min(DeadSleep, 1000)), % very tolerant sups may be killed at random anyway
231 |     %% 7. take a snapshot of the program tree and compare them
232 |     NewTree = find_supervisors(Whitelist),
233 |     {NewTree, NewDeaths ++ Deaths}.
234 | 
235 | %% @private returns how many processes are in a supervision tree
236 | -spec count_procs(apptree(), [filter()]) -> non_neg_integer().
237 | count_procs([], _) -> 0;
238 | count_procs([{_Restart, noproc} | T], Filters) ->
239 |     %% This happens somehow
240 |     count_procs(T, Filters);
241 | count_procs([{_Restart, {_, _Pid, _, Children, Attrs}}|T], Filters) ->
242 |     Val = case filter_attrs(Filters, Attrs) of
243 |         true -> 1;
244 |         false -> 0
245 |     end,
246 |     Val + count_procs(Children, Filters) + count_procs(T, Filters);
247 | count_procs([{_Restart, {_Type, _Pid, Attrs}} | T], Filters) ->
248 |     Val = case filter_attrs(Filters, Attrs) of
249 |         true -> 1;
250 |         false -> 0
251 |     end,
252 |     Val + count_procs(T, Filters);
253 | count_procs([{App, [{_, {_, _, _, Children, _Attrs}}]}|T], Filters) when is_atom(App) ->
254 |     count_procs(Children, Filters) + count_procs(T, Filters).
255 | 
256 | %% @private returns `true' if all the filters match a given process' attributes
257 | -spec filter_attrs([filter()], [attr()]) -> boolean().
258 | filter_attrs(Filters, Attrs) ->
259 |     lists:all(fun(Filter) -> filter(Filter, Attrs) end, Filters).
260 | 
261 | filter({tagged, Tag}, Attrs) ->
262 |     lists:member({tag, Tag}, Attrs);
263 | filter({not_tagged, Tag}, Attrs) ->
264 |     not filter({tagged, Tag}, Attrs);
265 | filter({named, Name}, Attrs) ->
266 |     lists:member({name, Name}, Attrs);
267 | filter({not_named, Name}, Attrs) ->
268 |     not filter({named, Name}, Attrs).
269 | 
270 | 
271 | %% @private send an exit signal and return once the process has died.
272 | kill_and_wait(Pid) ->
273 |     Ref = erlang:monitor(process, Pid),
274 |     exit(Pid, kill),
275 |     receive
276 |         {'DOWN', Ref, process, Pid, _} -> ok
277 |     after 5000 ->
278 |         error({timeout, {kill, Pid}})
279 |     end.
280 | 
281 | %% @private
282 | %% Take the app tree, the deaths seen so far, and then kill the process
283 | %% that has been targeted by its integer position. Once it is killed, update
284 | %% the death events and propagate all deaths up the supervision tree
285 | %% according to the model.
286 | %% @end
287 | -spec propagate_death(apptree(), [death_event()], non_neg_integer(), [filter()]) ->
288 |     {pid(), [death_event()]}.
289 | %% kill shots
290 | propagate_death([{_Restart, {_Type, Pid, Attrs}}|T], Deaths, 0, Filters) ->
291 |     case filter_attrs(Filters, Attrs) of
292 |         true -> {Pid, [{dead, Pid, stamp()}]};
293 |         false -> propagate_death(T, Deaths, 0, Filters)
294 |     end;
295 | propagate_death([{_Restart, {Strategy, Pid, Tolerance, Children, Attrs}}|T], Deaths, 0, Filters) ->
296 |     case filter_attrs(Filters, Attrs) of
297 |         true ->
298 |             {Pid, [{dead, Pid, stamp()} | recursive_all_dead(Children)]};
299 |         false ->
300 |             sup_propagation(Pid, Strategy, Tolerance, Children, T, Deaths, 0, Filters)
301 |     end;
302 | %% propagation steps
303 | propagate_death([], _Deaths, N, _Filters) ->
304 |     %% proc not found
305 |     {not_in_tree, N};
306 | propagate_death([{_Restart, noproc} | T], Deaths, N, Filters) ->
307 |     %% skip process as non-existing
308 |     propagate_death(T, Deaths, N, Filters);
309 | propagate_death([{_Restart, {Strategy, Pid, Tolerance, Children, _Attrs}}|T], Deaths, N, Filters) ->
310 |     %% supervisor (not the target). Propagate the kill signal, and if it comes
311 |     %% back up to us and a child (direct or not) was the target, propagate
312 |     %% the death to other siblings or even ourselves
313 |     sup_propagation(Pid, Strategy, Tolerance, Children, T, Deaths, N-1, Filters);
314 | propagate_death([{_Restart, {_Atom, _Pid, _Attrs}}|T], Deaths, N, Filters) ->
315 |     %% non-target worker
316 |     propagate_death(T, Deaths, N-1, Filters);
317 | propagate_death([{App, [{_,{Strategy,Pid,Tolerance,Children,_Attrs}}]}|T], Deaths, N, Filters) when is_atom(App) ->
318 |     %% Skip to the next app. Since we represent the root process of the app, we may
319 |     %% need to do propagation of our own.
320 |     sup_propagation(Pid, Strategy, Tolerance, Children, T, Deaths, N, Filters).
321 | 
322 | sup_propagation(Pid, Strategy, Tolerance, Children, Rest, Deaths, Count, Filters) ->
323 |     case propagate_death(Children, Deaths, Count, Filters) of
324 |         {not_in_tree, NewN} ->
325 |             propagate_death(Rest, Deaths, NewN, Filters);
326 |         {KillPid, NewDeaths} when is_pid(KillPid) ->
327 |             handle_child_death(Pid, KillPid, Strategy, Tolerance, NewDeaths, Deaths, Children)
328 |     end.
329 | 
330 | %% @private Act as a supervisor and apply a modeled version of the various
331 | %% restart strategies to children:
332 | %% - if a one_for_one/sofo supervisor sibling dies, none of the other siblings should die
333 | %% - if a rest_for_one supervisor sibling (ancestor) dies, the newer ones should die
334 | %% - if a one_for_all supervisor sibling dies, they all die.
335 | %% - if a worker dies, add the count to the parent ({child_dead, SupPid, Stamp})
336 | -spec handle_child_death(pid(), pid(), strategy(), {intensity(), period()},
337 |                          [death_event()], [death_event()], apptree()) -> {pid(), [death_event()]}.
338 | handle_child_death(Pid, KillPid, Strategy, {Intensity, Period}, NewDeaths, Deaths, Children) ->
339 |     Now = stamp(),
340 |     Deadline = Now-Period,
341 |     ChildPids = get_child_pids(Children),
342 |     DeadPids = [{child_dead, Pid, S} || {dead, P, S} <- NewDeaths,
343 |                                         lists:member(P, ChildPids)],
344 |     %% Should the supervisor die, or just some of its children?
345 |     ShouldDie = Intensity < length(qualifying_deaths(Pid, Deadline, DeadPids++Deaths)),
346 |     CurrentDeaths = if ShouldDie ->
347 |                         [{dead, Pid, Now} | all_dead(Children)]
348 |                     ;  not ShouldDie ->
349 |                         ShutdownPids = propagate(Strategy, DeadPids, Children),
350 |                         dedupe_append([ShutdownPids, DeadPids, NewDeaths])
351 |                     end,
352 |     {KillPid, CurrentDeaths}.
353 | 
354 | %% @private implement the propagation strategy on a list of children
355 | -spec propagate(strategy(), [pid()], [worker()|sup()]) -> [pid()].
356 | propagate(_, [], _) ->
357 |     % no dead children
358 |     [];
359 | propagate(one_for_all, _, Children) ->
360 |     all_dead(Children);
361 | propagate(rest_for_one, [DeadPid], Children) ->
362 |     %% The children are in reverse order, so we dropwhile to the child
363 |     lists:dropwhile(fun({_, Pid, _}) -> Pid =/= DeadPid end, all_dead(Children));
364 | propagate(T, Dead, _) when T =:= one_for_one; T =:= simple_one_for_one ->
365 |     %% one_for_one and simple_one_for_one remain as-is
366 |     Dead.
367 | 
368 | %% @private Add events to the end of a list, but skip duplicate entries since
369 | %% those can interfere with the frequency counts. Keep the latest instances
370 | %% only.
371 | -spec dedupe_append([[death_event()]]) -> [death_event()].
372 | dedupe_append([]) -> [];
373 | dedupe_append([[]|T]) -> dedupe_append(T);
374 | dedupe_append([[H={Tag,Pid,_}|T] | Rest]) ->
375 |     try
376 |         [throw(dupe) || List <- [T | Rest],
377 |                         {Type,P,_} <- List,
378 |                         {Type,P} == {Tag,Pid}],
379 |         [H | dedupe_append([T | Rest])]
380 |     catch
381 |         dupe -> dedupe_append([T|Rest])
382 |     end.
383 | 
384 | %% @private mark all direct children as dead
385 | -spec all_dead([worker()|sup()]) -> [death_event()].
386 | all_dead(Children) ->
387 |     Now = stamp(),
388 |     [{dead, Pid, Now} || Pid <- get_child_pids(Children)].
389 | 
390 | %% @private mark all direct and indirect children as dead
391 | -spec recursive_all_dead([worker()|sup()]) -> [death_event()].
392 | recursive_all_dead(Children) ->
393 |     Now = stamp(),
394 |     [{dead, Pid, Now} || Pid <- get_subtree_pids(Children)].
395 | 
396 | %% @private all deaths that have happened on or after a deadline
397 | -spec qualifying_deaths(pid(), stamp(), [death_event()]) -> [death_event()].
398 | qualifying_deaths(Pid, Deadline, Deaths) ->
399 |     [D || D = {child_dead,P,S} <- Deaths,
400 |           P =:= Pid, S >= Deadline].
401 | 
402 | %% @private monotonic timestamp. Must have the same granularity as
403 | %% what supervisors use on their own to get filtering to work (seconds).
404 | -spec stamp() -> stamp().
405 | stamp() -> erlang:monotonic_time(second).
406 | 
407 | %% @private get the pids of all direct children of a process'
408 | %% child list
409 | -spec get_child_pids([worker()|sup()]) -> [pid()].
410 | get_child_pids([]) -> [];
411 | get_child_pids([{_, noproc} | T]) -> get_child_pids(T);
412 | get_child_pids([{_, {_, Pid, _, _, _}} | T]) -> [Pid | get_child_pids(T)];
413 | get_child_pids([{_, {_, Pid, _}}|T]) -> [Pid | get_child_pids(T)].
414 | 
415 | %% @private get the pids of all direct or indirect children of a process'
416 | %% child list
417 | -spec get_subtree_pids([worker()|sup()]) -> [pid()].
418 | get_subtree_pids([]) -> [];
419 | get_subtree_pids([{_, noproc} | T]) -> get_subtree_pids(T);
420 | get_subtree_pids([{_, {_, Pid, _}}|T]) -> [Pid | get_subtree_pids(T)];
421 | get_subtree_pids([{_, {_, Pid, _, Children, _}} | T]) ->
422 |     [Pid | get_subtree_pids(Children)] ++ get_subtree_pids(T).
423 | 
424 | %% @private get the pids of all supervisors that are direct or indirect
425 | %% children of a process' child list
426 | -spec supervisor_pids([worker()|sup()]) -> [pid()].
427 | supervisor_pids([]) -> [];
428 | supervisor_pids([{_, noproc} | T]) -> supervisor_pids(T);
429 | supervisor_pids([{_, {_,_,_}} | T]) -> supervisor_pids(T);
430 | supervisor_pids([{_, {_, Pid, _, Children, _}} | T]) ->
431 |     [Pid | supervisor_pids(Children)] ++ supervisor_pids(T);
432 | supervisor_pids([{_, Sup} | T]) when is_list(Sup) ->
433 |     supervisor_pids(Sup) ++ supervisor_pids(T).


--------------------------------------------------------------------------------