├── .gitignore
├── test
    ├── src
    │   ├── clusterer_test.hrl
    │   ├── clusterer_utils.erl
    │   ├── clusterer_test.erl
    │   ├── clusterer_node.erl
    │   ├── clusterer_interpreter.erl
    │   └── clusterer_program.erl
    └── README.txt
├── rabbitmq-server.patch
├── Makefile
├── src
    ├── rabbit_clusterer_sup.erl
    ├── rabbit_clusterer_comms_sup.erl
    ├── rabbit_clusterer.erl
    ├── rabbit_clusterer_utils.erl
    ├── rabbit_clusterer_comms.erl
    ├── rabbit_clusterer_config.erl
    ├── rabbit_clusterer_coordinator.erl
    └── rabbit_clusterer_transitioner.erl
├── CONTRIBUTING.md
├── priv
    └── schema
    │   └── rabbitmq_clusterer.schema
├── CODE_OF_CONDUCT.md
├── rabbitmq-components.mk
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
 1 | .sw?
 2 | .*.sw?
 3 | *.beam
 4 | .erlang.mk/
 5 | cover/
 6 | deps/
 7 | doc/
 8 | ebin/
 9 | logs/
10 | plugins/
11 | 
12 | rabbitmq_clusterer.d
13 | 


--------------------------------------------------------------------------------
/test/src/clusterer_test.hrl:
--------------------------------------------------------------------------------
 1 | -record(state, { seed,
 2 |                  node_count,
 3 |                  nodes,
 4 |                  config,
 5 |                  valid_config,
 6 |                  active_config
 7 |                }).
 8 | 
 9 | -record(node, { name,
10 |                 port,
11 |                 state,
12 |                 pid
13 |               }).
14 | 
15 | -record(config, { version,
16 |                   nodes,
17 |                   gospel }).
18 | 
19 | -record(step, { modify_node_instrs,
20 |                 modify_config_instr,
21 |                 existential_node_instr,
22 |                 final_state }).
23 | 


--------------------------------------------------------------------------------
/rabbitmq-server.patch:
--------------------------------------------------------------------------------
 1 | diff -r e9637021f623 scripts/rabbitmq-server
 2 | --- a/scripts/rabbitmq-server   Thu Aug 01 15:36:09 2013 +0100
 3 | +++ b/scripts/rabbitmq-server   Tue Aug 06 11:15:37 2013 +0100
 4 | @@ -59,7 +59,7 @@
 5 |  
 6 |  RABBITMQ_START_RABBIT=
 7 |  [ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT=" -noinput"
 8 | -[ "x" = "x$RABBITMQ_NODE_ONLY" ] && RABBITMQ_START_RABBIT="$RABBITMQ_START_RABBIT -s rabbit boot "
 9 | +[ "x" = "x$RABBITMQ_NODE_ONLY" ] && RABBITMQ_START_RABBIT="$RABBITMQ_START_RABBIT -s rabbit_clusterer boot -pa ${RABBITMQ_PLUGINS_DIR}/rabbitmq_clusterer.ez/rabbitmq_clusterer-1.0.0/ebin"
10 |  
11 |  case "$(uname -s)" in
12 |    CYGWIN*) # we make no attempt to record the cygwin pid; rabbitmqctl wait
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PROJECT = rabbitmq_clusterer
 2 | PROJECT_DESCRIPTION = Declarative RabbitMQ clustering
 3 | PROJECT_MOD = rabbit_clusterer
 4 | PROJECT_APP_EXTRA_KEYS = {broker_version_requirements, ["3.6.0", "3.7.0"]}
 5 | 
 6 | BUILD_DEPS = rabbit_common rabbit
 7 | 
 8 | DEP_EARLY_PLUGINS = rabbit_common/mk/rabbitmq-early-plugin.mk
 9 | DEP_PLUGINS = rabbit_common/mk/rabbitmq-plugin.mk
10 | 
11 | # FIXME: Use erlang.mk patched for RabbitMQ, while waiting for PRs to be
12 | # reviewed and merged.
13 | 
14 | ERLANG_MK_REPO = https://github.com/rabbitmq/erlang.mk.git
15 | ERLANG_MK_COMMIT = rabbitmq-tmp
16 | 
17 | include rabbitmq-components.mk
18 | include erlang.mk
19 | 
20 | # --------------------------------------------------------------------
21 | # Testing.
22 | # --------------------------------------------------------------------
23 | 
24 | # clusterer test suite was never finished
25 | # and currently disabled
26 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_sup.erl:
--------------------------------------------------------------------------------
 1 | %% The contents of this file are subject to the Mozilla Public License 
 2 | %% Version 1.1 (the "License"); you may not use this file except in 
 3 | %% compliance with the License. You may obtain a copy of the License at 
 4 | %% https://www.mozilla.org/MPL/1.1/ 
 5 | %%
 6 | %% Software distributed under the License is distributed on an "AS IS" 
 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
 8 | %% License for the specific language governing rights and limitations 
 9 | %% under the License. 
10 | %%
11 | %% The Original Code is RabbitMQ. 
12 | %%
13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
15 | %% Pivotal Software, Inc. All Rights Reserved.
16 | 
17 | -module(rabbit_clusterer_sup).
18 | 
19 | -behaviour(supervisor2).
20 | 
21 | -export([start_link/0, init/1]).
22 | 
23 | start_link() ->
24 |     supervisor2:start_link(?MODULE, []).
25 | 
26 | init([]) ->
27 |     {ok, {{one_for_all, 0, 1},
28 |           [{rabbit_clusterer_comms_sup,
29 |             {rabbit_clusterer_comms_sup, start_link, []},
30 |             intrinsic, infinity, supervisor, [rabbit_clusterer_comms_sup]},
31 |            {rabbit_clusterer_coordinator,
32 |             {rabbit_clusterer_coordinator, start_link, []},
33 |             intrinsic, 16#ffffffff, worker, [rabbit_clusterer_coordinator]}]}}.
34 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_comms_sup.erl:
--------------------------------------------------------------------------------
 1 | %% The contents of this file are subject to the Mozilla Public License 
 2 | %% Version 1.1 (the "License"); you may not use this file except in 
 3 | %% compliance with the License. You may obtain a copy of the License at 
 4 | %% https://www.mozilla.org/MPL/1.1/ 
 5 | %%
 6 | %% Software distributed under the License is distributed on an "AS IS" 
 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
 8 | %% License for the specific language governing rights and limitations 
 9 | %% under the License. 
10 | %%
11 | %% The Original Code is RabbitMQ. 
12 | %%
13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
15 | %% Pivotal Software, Inc. All Rights Reserved.
16 | 
17 | -module(rabbit_clusterer_comms_sup).
18 | 
19 | -behaviour(supervisor).
20 | 
21 | -export([start_link/0, start_comms/0]).
22 | 
23 | -export([init/1]).
24 | 
25 | -define(SERVER, ?MODULE).
26 | 
27 | start_link() ->
28 |     supervisor:start_link({local, ?SERVER}, ?MODULE, []).
29 | 
30 | start_comms() ->
31 |     {ok, _Pid, Token} = supervisor:start_child(?SERVER, []),
32 |     {ok, Token}.
33 | 
34 | %%----------------------------------------------------------------------------
35 | 
36 | init([]) ->
37 |     {ok, {{simple_one_for_one, 10, 10},
38 |           [{comms, {rabbit_clusterer_comms, start_link, []},
39 |             temporary, 16#ffffffff, worker, [rabbit_clusterer_comms]}]}}.
40 | 


--------------------------------------------------------------------------------
/test/README.txt:
--------------------------------------------------------------------------------
 1 | Testing Plan 3.
 2 | 
 3 | # Plan 1 was:
 4 | program <- dsl
 5 | run program
 6 | verify result.
 7 | 
 8 | Problem was generation of valid program required complex linear types,
 9 | making in unfeasible. I then went to plan 2:
10 | 
11 | # Plan 2 was:
12 | one process per node. Up to one config process per node.
13 | Iterative expansion of program with minimal coordination and state by
14 | essentially allowing node processes to do whatever they want to their
15 | node.
16 | 
17 | Problem here is that the node processes can't really validate the
18 | action they attempted to apply to their node as their node is
19 | influenced by everyone else too. Eg you start a node, but you can't
20 | even expect to find later that it's up because some other node process
21 | may have applied a config to their node which turns our node off.
22 | 
23 | General process was to ask everyone to "observe and pick your next
24 | action", then "apply action". Repeat.
25 | 
26 | # Plan 3 is:
27 | Sort of a variation on Plan 2, but with much greater coordination.
28 | 
29 | Driver asks all nodes for their instruction. Node process select based
30 | on current known state of their node. Driver uses all this to predict
31 | the next stable state. Driver allows all node processes to proceed,
32 | which they do. We then continuously poll all nodes until they're
33 | stable - i.e. pending_shutdown, off, or read. At that point, we
34 | compare global state with predicted state.
35 | 
36 | Now there are large areas which will not be tested by plan 3
37 | (i.e. application of changes during times of flux), but it's the
38 | sanest approach to testing yet, and maybe implementable.
39 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | RabbitMQ projects use pull requests to discuss, collaborate on and accept code contributions.
 4 | Pull requests is the primary place of discussing code changes.
 5 | 
 6 | ## How to Contribute
 7 | 
 8 | The process is fairly standard:
 9 | 
10 |  * Fork the repository or repositories you plan on contributing to
11 |  * Clone [RabbitMQ umbrella repository](https://github.com/rabbitmq/rabbitmq-public-umbrella)
12 |  * `cd umbrella`, `make co`
13 |  * Create a branch with a descriptive name in the relevant repositories
14 |  * Make your changes, run tests, commit with a [descriptive message](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html), push to your fork
15 |  * Submit pull requests with an explanation what has been changed and **why**
16 |  * Submit a filled out and signed [Contributor Agreement](https://github.com/rabbitmq/ca#how-to-submit) if needed (see below)
17 |  * Be patient. We will get to your pull request eventually
18 | 
19 | If what you are going to work on is a substantial change, please first ask the core team
20 | of their opinion on [RabbitMQ mailing list](https://groups.google.com/forum/#!forum/rabbitmq-users).
21 | 
22 | 
23 | ## Code of Conduct
24 | 
25 | See [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md).
26 | 
27 | 
28 | ## Contributor Agreement
29 | 
30 | If you want to contribute a non-trivial change, please submit a signed copy of our
31 | [Contributor Agreement](https://github.com/rabbitmq/ca#how-to-submit) around the time
32 | you submit your pull request. This will make it much easier (in some cases, possible)
33 | for the RabbitMQ team at Pivotal to merge your contribution.
34 | 
35 | 
36 | ## Where to Ask Questions
37 | 
38 | If something isn't clear, feel free to ask on our [mailing list](https://groups.google.com/forum/#!forum/rabbitmq-users).
39 | 


--------------------------------------------------------------------------------
/priv/schema/rabbitmq_clusterer.schema:
--------------------------------------------------------------------------------
 1 | {mapping, "clusterer.config", "rabbitmq_clusterer.config",
 2 |     [{datatype, string}, {validators, ["file_accessible"]}]}.
 3 | 
 4 | {translation, "rabbitmq_clusterer.config",
 5 | fun(Conf) ->
 6 |     case cuttlefish:conf_get("clusterer.config", Conf, undefined) of
 7 |         String when is_list(String) -> 
 8 |             case cuttlefish_variable:filter_by_prefix("clusterer", Conf) of
 9 |                 [{["clusterer", "config"], String}] -> String;
10 |                 _ -> cuttlefish:invalid("Config for clusterer defined in "++ 
11 |                                         String ++ " file. " ++
12 |                                         "All other clusterer configurations should be removed")
13 |             end;
14 |         _ -> []
15 |     end
16 | end}.
17 | 
18 | {mapping, "clusterer.version", "rabbitmq_clusterer.config.version",
19 |     [{datatype, integer}]}.
20 | 
21 | {mapping, "clusterer.nodes.$node", "rabbitmq_clusterer.config.nodes",
22 |     [{datatype, atom}]}.
23 | 
24 | {mapping, "clusterer.nodes.ram.$node", "rabbitmq_clusterer.config.nodes",
25 |     [{datatype, atom}]}.
26 | 
27 | {mapping, "clusterer.nodes.disk.$node", "rabbitmq_clusterer.config.nodes",
28 |     [{datatype, atom}]}.
29 | 
30 | {mapping, "clusterer.nodes.disc.$node", "rabbitmq_clusterer.config.nodes",
31 |     [{datatype, atom}]}.
32 | 
33 | {translation, "rabbitmq_clusterer.config.nodes", 
34 | fun(Conf) ->
35 |     DiskNodes = cuttlefish_variable:filter_by_prefix("clusterer.nodes", Conf)
36 |                 ++ cuttlefish_variable:filter_by_prefix("clusterer.nodes.disk", Conf)
37 |                 ++ cuttlefish_variable:filter_by_prefix("clusterer.nodes.disc", Conf),
38 |     RamNodes = cuttlefish_variable:filter_by_prefix("clusterer.nodes.ram", Conf),
39 |     [{Node, disk} || {_, Node} <- DiskNodes] ++ [{Node, ram} || Node <- RamNodes]
40 | end}.
41 | 
42 | {mapping, "clusterer.gospel", "rabbitmq_clusterer.config.gospel",
43 |     [{datatype, {enum, [reset]}}]}.
44 | 
45 | {mapping, "clusterer.gospel.node", "rabbitmq_clusterer.config.gospel",
46 |     [{datatype, atom}]}.
47 | 
48 | {translation, "rabbitmq_clusterer.config.gospel", 
49 | fun(Conf) ->
50 |     case cuttlefish:conf_get("clusterer.gospel", Conf, undefined) of
51 |         reset -> reset;
52 |         _     ->
53 |             {node, cuttlefish:conf_get("clusterer.gospel.node", Conf)}
54 |     end
55 | end}.
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, and in the interest of fostering an open
 4 | and welcoming community, we pledge to respect all people who contribute through reporting
 5 | issues, posting feature requests, updating documentation, submitting pull requests or
 6 | patches, and other activities.
 7 | 
 8 | We are committed to making participation in this project a harassment-free experience for
 9 | everyone, regardless of level of experience, gender, gender identity and expression,
10 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age,
11 | religion, or nationality.
12 | 
13 | Examples of unacceptable behavior by participants include:
14 | 
15 |  * The use of sexualized language or imagery
16 |  * Personal attacks
17 |  * Trolling or insulting/derogatory comments
18 |  * Public or private harassment
19 |  * Publishing other's private information, such as physical or electronic addresses,
20 |    without explicit permission
21 |  * Other unethical or unprofessional conduct
22 | 
23 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
24 | commits, code, wiki edits, issues, and other contributions that are not aligned to this
25 | Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors
26 | that they deem inappropriate, threatening, offensive, or harmful.
27 | 
28 | By adopting this Code of Conduct, project maintainers commit themselves to fairly and
29 | consistently applying these principles to every aspect of managing this project. Project
30 | maintainers who do not follow or enforce the Code of Conduct may be permanently removed
31 | from the project team.
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an
34 | individual is representing the project or its community.
35 | 
36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
37 | contacting a project maintainer at [info@rabbitmq.com](mailto:info@rabbitmq.com). All complaints will
38 | be reviewed and investigated and will result in a response that is deemed necessary and
39 | appropriate to the circumstances. Maintainers are obligated to maintain confidentiality
40 | with regard to the reporter of an incident.
41 | 
42 | This Code of Conduct is adapted from the
43 | [Contributor Covenant](https://contributor-covenant.org), version 1.3.0, available at
44 | [contributor-covenant.org/version/1/3/0/](https://contributor-covenant.org/version/1/3/0/)
45 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer.erl:
--------------------------------------------------------------------------------
 1 | %% The contents of this file are subject to the Mozilla Public License 
 2 | %% Version 1.1 (the "License"); you may not use this file except in 
 3 | %% compliance with the License. You may obtain a copy of the License at 
 4 | %% https://www.mozilla.org/MPL/1.1/ 
 5 | %%
 6 | %% Software distributed under the License is distributed on an "AS IS" 
 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
 8 | %% License for the specific language governing rights and limitations 
 9 | %% under the License. 
10 | %%
11 | %% The Original Code is RabbitMQ. 
12 | %%
13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
15 | %% Pivotal Software, Inc. All Rights Reserved.
16 | 
17 | -module(rabbit_clusterer).
18 | 
19 | -behaviour(application).
20 | 
21 | -export([boot/0]).
22 | 
23 | -export([apply_config/0, apply_config/1,   %% for 'rabbitmqctl eval ...'
24 |          status/0, status/1]).
25 | 
26 | -export([start/2, stop/1]).
27 | 
28 | %%----------------------------------------------------------------------------
29 | 
30 | boot() ->
31 |     ok = application:start(rabbitmq_clusterer),
32 |     ok = rabbit_clusterer_coordinator:begin_coordination(),
33 |     ok.
34 | 
35 | %% Apply_config allows cluster configs to be dynamically applied to a
36 | %% running system. Currently that's best done by rabbitmqctl eval, but
37 | %% may be improved in the future.
38 | apply_config() -> apply_config(undefined).
39 | 
40 | apply_config(Config) -> rabbit_clusterer_coordinator:apply_config(Config).
41 | 
42 | status() ->
43 |     status(node()).
44 | 
45 | status(Node) ->
46 |     {Message, Config, List} =
47 |         case rabbit_clusterer_coordinator:request_status(Node) of
48 |             preboot ->
49 |                 {"Clusterer is pre-booting. ~p~n", undefined, []};
50 |             {Config1, booting} ->
51 |                 {"Clusterer is booting Rabbit into cluster configuration: "
52 |                  "~n~s~n", Config1, []};
53 |             {Config1, ready} ->
54 |                 {"Rabbit is running in cluster configuration: ~n~s~n"
55 |                  "Running nodes: ~p~n", Config1,
56 |                 [rabbit_mnesia:cluster_nodes(running)]};
57 |             {Config1, {transitioner, join}} ->
58 |                 {"Clusterer is trying to join into cluster configuration: "
59 |                  "~n~s~n", Config1, []};
60 |             {Config1, {transitioner, rejoin}} ->
61 |                 {"Clusterer is trying to rejoin cluster configuration: ~n~s~n",
62 |                  Config1, []}
63 |         end,
64 |     Config2 = case Config of
65 |                   undefined -> "";
66 |                   _         -> rabbit_misc:format(
67 |                                  "~p", [tl(rabbit_clusterer_config:to_proplist(
68 |                                              undefined, Config))])
69 |               end,
70 |     io:format(Message, [Config2 | List]).
71 | 
72 | %%----------------------------------------------------------------------------
73 | 
74 | start(normal, []) -> rabbit_clusterer_sup:start_link().
75 | 
76 | stop(_State) -> ok.
77 | 


--------------------------------------------------------------------------------
/test/src/clusterer_utils.erl:
--------------------------------------------------------------------------------
  1 | -module(clusterer_utils).
  2 | 
  3 | -export([set_config/2,
  4 |          store_node/2,
  5 |          set_node_state/2,
  6 |          contains_node/2,
  7 |          make_config_active/1,
  8 |          localise_program/2]).
  9 | 
 10 | -include("clusterer_test.hrl").
 11 | 
 12 | %%----------------------------------------------------------------------------
 13 | 
 14 | set_config(Config = #config { nodes = [_|_] },
 15 |            State = #state { valid_config = undefined }) ->
 16 |     State #state { config = Config, valid_config = Config };
 17 | set_config(Config = #config { nodes = [_|_], version = V },
 18 |            State = #state { valid_config = #config { version = VV } })
 19 |   when V > VV ->
 20 |     State #state { config = Config, valid_config = Config };
 21 | set_config(Config, State) ->
 22 |     State #state { config = Config }.
 23 | 
 24 | store_node(Node = #node { name = Name }, State = #state { nodes = Nodes }) ->
 25 |     State #state { nodes = orddict:store(Name, Node, Nodes) }.
 26 | 
 27 | set_node_state(Node = #node { name = Name, state = State }, Config) ->
 28 |     case State =:= ready andalso not contains_node(Name, Config) of
 29 |         true  -> Node #node { state = off };
 30 |         false -> Node
 31 |     end.
 32 | 
 33 | contains_node(Node,  #config { nodes = Nodes }) -> orddict:is_key(Node, Nodes);
 34 | contains_node(_Node, undefined)                 -> false.
 35 | 
 36 | %% Because we know that the valid config is only applied to nodes
 37 | %% which are involved in the config, modelling the propogation is
 38 | %% easy.
 39 | make_config_active(State = #state { nodes        = Nodes,
 40 |                                   valid_config = VConfig = #config { } }) ->
 41 |     Nodes1 = orddict:map(
 42 |                fun (_Name, Node) -> set_node_state(Node, VConfig) end, Nodes),
 43 |     State #state { nodes         = Nodes1,
 44 |                    active_config = VConfig }.
 45 | 
 46 | localise_program({InitialState, Steps}, Host) ->
 47 |     {localise_state(InitialState, Host),
 48 |      [localise_step(Step, Host) || Step <- Steps]}.
 49 | 
 50 | localise_step(#step { modify_node_instrs     = NodeInstrs,
 51 |                       modify_config_instr    = ConfigInstr,
 52 |                       existential_node_instr = ExistentialInstr,
 53 |                       final_state            = State }, Host) ->
 54 |     #step { modify_node_instrs     = [localise_instr(Instr, Host) ||
 55 |                                          Instr <- NodeInstrs],
 56 |             modify_config_instr    = localise_instr(ConfigInstr, Host),
 57 |             existential_node_instr = localise_instr(ExistentialInstr, Host),
 58 |             final_state            = localise_state(State, Host) }.
 59 | 
 60 | localise_instr({Action, Name}, Host)
 61 |   when Action =:= stop_node orelse
 62 |        Action =:= start_node orelse
 63 |        Action =:= reset_node orelse
 64 |        Action =:= delete_node orelse
 65 |        Action =:= config_remove_node orelse
 66 |        Action =:= config_add_node ->
 67 |     {Action, localise_name(Name, Host)};
 68 | localise_instr({Action, Name, Config}, Host)
 69 |   when Action =:= apply_config_to_node orelse
 70 |        Action =:= start_node_with_config ->
 71 |     {Action, localise_name(Name, Host), localise_config(Config, Host)};
 72 | localise_instr({create_node, Name, Port}, Host) ->
 73 |     {create_node, localise_name(Name, Host), Port};
 74 | localise_instr({config_gospel_to, reset} = Instr, _Host) ->
 75 |     Instr;
 76 | localise_instr({config_gospel_to, {node, Name}}, Host) ->
 77 |     {config_gospel_to, {node, localise_name(Name, Host)}};
 78 | localise_instr({config_version_to, _Ver} = Instr, _Host) ->
 79 |     Instr;
 80 | localise_instr({config_shutdown_timeout_to, _ST} = Instr, _Host) ->
 81 |     Instr;
 82 | localise_instr(noop, _Host) ->
 83 |     noop.
 84 | 
 85 | localise_name(NodeName, Host) ->
 86 |     {Node, _Host} = rabbit_nodes:parts(NodeName),
 87 |     rabbit_nodes:make({Node, Host}).
 88 | 
 89 | localise_config(Config = #config { nodes = Nodes, gospel = Gospel }, Host) ->
 90 |     Config #config {
 91 |       nodes = orddict:from_list([{localise_name(Name, Host), Value} ||
 92 |                                     {Name, Value} <- orddict:to_list(Nodes)]),
 93 |       gospel = case Gospel of
 94 |                    reset        -> reset;
 95 |                    {node, Name} -> {node, localise_name(Name, Host)}
 96 |                end
 97 |      };
 98 | localise_config(undefined, _Host) ->
 99 |     undefined.
100 | 
101 | localise_state(State = #state { nodes         = Nodes,
102 |                                 config        = Config,
103 |                                 valid_config  = VConfig,
104 |                                 active_config = AConfig }, Host) ->
105 |     State #state { nodes         =
106 |                        orddict:from_list(
107 |                          [{localise_name(Name, Host),
108 |                            localise_node(Node, Host)} ||
109 |                              {Name, Node} <- orddict:to_list(Nodes)]),
110 |                    config        = localise_config(Config, Host),
111 |                    valid_config  = localise_config(VConfig, Host),
112 |                    active_config = localise_config(AConfig, Host) }.
113 | 
114 | localise_node(Node = #node { name = Name }, Host) ->
115 |     Node #node { name = localise_name(Name, Host) }.
116 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_utils.erl:
--------------------------------------------------------------------------------
  1 | %% The contents of this file are subject to the Mozilla Public License 
  2 | %% Version 1.1 (the "License"); you may not use this file except in 
  3 | %% compliance with the License. You may obtain a copy of the License at 
  4 | %% https://www.mozilla.org/MPL/1.1/ 
  5 | %%
  6 | %% Software distributed under the License is distributed on an "AS IS" 
  7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
  8 | %% License for the specific language governing rights and limitations 
  9 | %% under the License. 
 10 | %%
 11 | %% The Original Code is RabbitMQ. 
 12 | %%
 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
 15 | %% Pivotal Software, Inc. All Rights Reserved.
 16 | 
 17 | -module(rabbit_clusterer_utils).
 18 | 
 19 | -export([stop_mnesia/0,
 20 |          stop_rabbit/0,
 21 |          start_rabbit_async/0,
 22 |          boot_rabbit_async/0,
 23 |          make_mnesia_singleton/1,
 24 |          eliminate_mnesia_dependencies/1,
 25 |          configure_cluster/2]).
 26 | 
 27 | %%----------------------------------------------------------------------------
 28 | 
 29 | -define(PRE_SLEEP, 10000). %% 10 seconds
 30 | 
 31 | stop_mnesia() ->
 32 |     stopped = mnesia:stop(),
 33 |     ok.
 34 | 
 35 | ensure_start_mnesia() ->
 36 |     ok = mnesia:start().
 37 | 
 38 | stop_rabbit() ->
 39 |     case application:stop(rabbit) of
 40 |         ok                             -> ok;
 41 |         {error, {not_started, rabbit}} -> ok;
 42 |         Other                          -> Other
 43 |     end.
 44 | 
 45 | start_rabbit_async() ->
 46 |     ok = spawn_starter(fun rabbit:start/0).
 47 | 
 48 | boot_rabbit_async() ->
 49 |     ok = spawn_starter(fun rabbit:boot/0).
 50 | 
 51 | spawn_starter(Fun) ->
 52 |     spawn(fun () ->
 53 |                   try
 54 |                       ok = Fun(),
 55 |                       rabbit_clusterer_coordinator:rabbit_booted()
 56 |                   catch
 57 |                       _Class:_Reason ->
 58 |                           rabbit_clusterer_coordinator:rabbit_boot_failed()
 59 |                   end
 60 |           end),
 61 |     ok.
 62 | 
 63 | make_mnesia_singleton(true) ->
 64 |     %% With mnesia not running, we can't call
 65 |     %% rabbit_mnesia:force_reset() because that tries to read in the
 66 |     %% cluster status files from the mnesia directory which might not
 67 |     %% exist if we're a completely virgin node. So we just do the rest
 68 |     %% manually.
 69 |     error_logger:info_msg("Clusterer Resetting Rabbit~n"),
 70 |     ok = rabbit_mnesia:ensure_mnesia_dir(),
 71 |     ok = rabbit_file:recursive_delete(
 72 |            filelib:wildcard(rabbit_mnesia:dir() ++ "/*")),
 73 |     ok = rabbit_node_monitor:reset_cluster_status(),
 74 |     ok;
 75 | make_mnesia_singleton(false) ->
 76 |     %% Note that this is wrong: in this case we actually want to
 77 |     %% eliminate everyone who isn't in our cluster - i.e. everyone
 78 |     %% mnesia thinks we're currently clustered with. However, due to
 79 |     %% limitations with del_table_copy (i.e. mnesia must not be
 80 |     %% running on remote node; it must be running on our node), this
 81 |     %% is difficult to orchestrate: it's easiest done by the
 82 |     %% eliminated nodes doing an RPC to us. But there are still cases
 83 |     %% where that may not work out correctly. However, this scenario
 84 |     %% can only occur when a cluster is being split up into other
 85 |     %% clusters. For MVP and this project, we don't consider that a
 86 |     %% use case, so we're going to just ignore this problem for the
 87 |     %% time being.
 88 |     eliminate_mnesia_dependencies([]).
 89 | 
 90 | eliminate_mnesia_dependencies(NodesToDelete) ->
 91 |     ok = rabbit_mnesia:ensure_mnesia_dir(),
 92 |     ok = ensure_start_mnesia(),
 93 |     %% rabbit_table:force_load() does not error if
 94 |     %% mnesia:force_load_table errors(!) Thus we can safely run this
 95 |     %% even in clean state - i.e. one where neither the schema nor any
 96 |     %% tables actually exist.
 97 |     ok = rabbit_table:force_load(),
 98 |     case rabbit_table:is_present() of
 99 |         true  -> ok = rabbit_table:wait_for_replicated();
100 |         false -> ok
101 |     end,
102 |     %% del_table_copy has to be done after the force_load but is also
103 |     %% usefully idempotent.
104 |     [{atomic,ok} = mnesia:del_table_copy(schema, N) || N <- NodesToDelete],
105 |     ok = remove_from_cluster_status(NodesToDelete),
106 |     ok = stop_mnesia(),
107 |     %% We had to force load in case we had to delete any schemas. But
108 |     %% once we've stopped mnesia (and we have to because rabbit
109 |     %% upgrades expect to find mnesia stopped), mnesia seems to forget
110 |     %% that it's been force_loaded and thus should now really behave
111 |     %% as if it's the master. Consequently we have to touch the
112 |     %% force_load file in the mnesia dir which rabbit_mnesia then
113 |     %% finds and does another force load when rabbit actually boots.
114 |     ok = rabbit_file:write_file(filename:join(rabbit_mnesia:dir(), "force_load"), <<"">>),
115 |     ok.
116 | 
117 | configure_cluster(Nodes, MyNodeType) ->
118 |     case application:load(rabbit) of
119 |         ok                                -> ok;
120 |         {error, {already_loaded, rabbit}} -> ok
121 |     end,
122 |     ok = application:set_env(rabbit, cluster_nodes, {Nodes, MyNodeType}).
123 | 
124 | remove_from_cluster_status(Nodes) ->
125 |     try
126 |         {All, Disc, Running} = rabbit_node_monitor:read_cluster_status(),
127 |         ok = rabbit_node_monitor:write_cluster_status(
128 |                {All -- Nodes, Disc -- Nodes, Running -- Nodes})
129 |     catch
130 |         {error, {corrupt_or_missing_cluster_files, _Stat, _Run}} ->
131 |             ok = rabbit_node_monitor:reset_cluster_status()
132 |     end.
133 | 


--------------------------------------------------------------------------------
/test/src/clusterer_test.erl:
--------------------------------------------------------------------------------
  1 | -module(clusterer_test).
  2 | 
  3 | -export([test/1, test/2, test_program/1]).
  4 | 
  5 | -include("clusterer_test.hrl").
  6 | 
  7 | %%----------------------------------------------------------------------------
  8 | %%
  9 | %% Testing the Clusterer
 10 | %%
 11 | %% Testing the Clusterer is challenging given the level of
 12 | %% concurrency, eventually-consistent design, and the vast number of
 13 | %% scenarios and modifications possible. The approach we take here is
 14 | %% to deterministically generate programs which are then filtered and
 15 | %% interpreted. These programs describe how to construct up to one
 16 | %% cluster and operations upon the cluster.
 17 | %%
 18 | %% A program is a sequence of steps from a given starting state. Each
 19 | %% step contains up to one instruction for each node, up to one
 20 | %% instruction for modifying the cluster config, and up to one
 21 | %% instruction for creating or deleting a node. All instructions are
 22 | %% independent thus all the instructions within a step can be run in
 23 | %% parallel. For example, a step can not contain both a "switch node X
 24 | %% on" and a "delete node X" instruction. During program generation we
 25 | %% capture the expected state of all the nodes at the end of each
 26 | %% step. When we come to interpret the program, we use this expected
 27 | %% state to compare to what we observe of the Real World. If we reach
 28 | %% the end of the program and no divergence is observed then the
 29 | %% program passed successfully.
 30 | %%
 31 | %% Program generation is driven by a seed. When the seed is reduced to
 32 | %% 0, there is no more entropy available, and so program generation
 33 | %% halts. At each step, the set of available instructions to choose
 34 | %% from is highly dependent on the predicted state of the nodes at
 35 | %% this point. Having constructed a list of viable instructions at a
 36 | %% given point in the program, the seed is used to select the
 37 | %% instruction. The list will always include the 'noop' instruction
 38 | %% (but will never be just the 'noop' instruction), and the seed
 39 | %% modulo the length of the list of valid instructions is used to
 40 | %% select which instruction is chosen. The new seed is the current
 41 | %% seed divided by the length of that list and is passed to the next
 42 | %% stage of instruction selection. Thus the entropy of the seed is
 43 | %% slowly reduced: it is consumed by selecting instructions from
 44 | %% lists. This strategy means every seed will result in a unique
 45 | %% program (though isomorphisms of various degrees are possible), and
 46 | %% that a given seed will always generate the same program. In other
 47 | %% words, the seed is equivalent to the perfect compression of the
 48 | %% program (proof left as an exercise to the reader...).
 49 | %%
 50 | %% Program generation is heavily constrained to avoid generating
 51 | %% programs where there are multiple possible valid outcomes, and to
 52 | %% ensure we never have more than a single cluster running. The latter
 53 | %% is essential to keep modelling of the nodes feasible.
 54 | %%
 55 | %% Having generated a program we then filter it to test whether or not
 56 | %% in contains any interesting aspects. Currently we just look for
 57 | %% programs which contain more than a single node running at any given
 58 | %% point in time, but more elaborate filters are possible. Having
 59 | %% selected a program to run, it is then passed to the interpreter.
 60 | %%
 61 | %% Some aspects of the interpreter are remarkably similar to the
 62 | %% program generation itself: in the program generator we have to
 63 | %% model changes to nodes and cluster configs and certainly much of
 64 | %% the mechanism for making changes to the cluster configs is very
 65 | %% similar in the interpreter. For nodes, we have one process per node
 66 | %% to allow the possibility of changes to the nodes themselves for
 67 | %% each step actually occurring in parallel. In this regard, given the
 68 | %% program doesn't really specify scheduling of instructions beyond
 69 | %% "concurrently", multiple different runs of the same program may
 70 | %% result in different instructions being evaluated at different
 71 | %% times. However, the point of the Clusterer (and the constraints of
 72 | %% the program generation) is that it should be eventually consistent:
 73 | %% the outcome of each step should be the independent of individual
 74 | %% scheduling of instructions within a step.
 75 | %%
 76 | %% To detect divergence we have to be aware that a cluster of nodes
 77 | %% may take a short period of time to stabilise at a new cluster
 78 | %% config, to turn off, etc. The strategy we adopt is not fool-proof,
 79 | %% but it'll do. We wait for all nodes to be stable in some way
 80 | %% (i.e. off, reset, on or pending_shutdown, but not booting). We then
 81 | %% wait a short amount of time and ask them all again for their
 82 | %% state. If they're all still stable and they're all still stable in
 83 | %% the same way, then we declare the cluster is stable and that we can
 84 | %% actually check this state for divergence. At this point we compare
 85 | %% their stable states and the cluster configs they're running with
 86 | %% the state our program generation predicted for each step and fail
 87 | %% if any divergence is detected.
 88 | %%
 89 | %%----------------------------------------------------------------------------
 90 | 
 91 | %% NB Limit is exclusive, not inclusive.
 92 | test(Limit) when Limit > 0 ->
 93 |     test(0, Limit).
 94 | 
 95 | test(From, To) when To > From ->
 96 |     case node() of
 97 |         'nonode@nohost' -> {error, must_be_distributed_node};
 98 |         Node            -> {_, Host} = rabbit_nodes:parts(Node),
 99 |                            io:format("Passed programs: ["),
100 |                            test_sequence(Host, To, From, 0)
101 |     end.
102 | 
103 | test_sequence(_Host, Limit, Limit, RanCount) ->
104 |     io:format("].~n~p programs were ran and passed~n", [RanCount]),
105 |     ok;
106 | test_sequence(Host, Limit, N, RanCount) ->
107 |     case test_program(Host, N) of
108 |         skip           -> test_sequence(Host, Limit, N+1, RanCount);
109 |         {_Program, ok} -> io:format("~p,", [N]),
110 |                           test_sequence(Host, Limit, N+1, RanCount+1);
111 |         {Program, Err} -> io:format("~nError encountered with program ~p:"
112 |                                     "~n~n~p~n~n~p~n", [N, Program, Err]),
113 |                           Err
114 |     end.
115 | 
116 | test_program(Seed) when is_integer(Seed) ->
117 |     {_, Host} = rabbit_nodes:parts(node()),
118 |     test_program(Host, Seed);
119 | test_program(NomadicProgram = {#state {}, Steps}) when is_list(Steps) ->
120 |     {_, Host} = rabbit_nodes:parts(node()),
121 |     Prog = clusterer_utils:localise_program(NomadicProgram, Host),
122 |     {NomadicProgram, clusterer_interpreter:run_program(Prog)}.
123 | 
124 | test_program(Host, Seed) ->
125 |     NomadicProgram = clusterer_program:generate_program(new_state(Seed)),
126 |     case filter_program(NomadicProgram) of
127 |         skip -> skip;
128 |         run  -> Prog = clusterer_utils:localise_program(NomadicProgram, Host),
129 |                 {NomadicProgram, clusterer_interpreter:run_program(Prog)}
130 |     end.
131 | 
132 | %%----------------------------------------------------------------------------
133 | 
134 | new_state(Seed) ->
135 |     #state { seed          = Seed,
136 |              node_count    = 0,
137 |              nodes         = orddict:new(),
138 |              config        = #config { nodes   = [],
139 |                                        gospel  = reset,
140 |                                        version = 0 },
141 |              valid_config  = undefined,
142 |              active_config = undefined
143 |            }.
144 | 
145 | filter_program(Program) ->
146 |     %% Eventually there'll be a more sophisticated set of filters here.
147 |     case two_ready(Program) of
148 |         true  -> run;
149 |         false -> skip
150 |     end.
151 | 
152 | two_ready({_InitialState, Steps}) ->
153 |     io:format("Steps: ~p~n", [Steps]),
154 |     lists:any(fun (#step { final_state = #state { nodes = Nodes } }) ->
155 |                       length([true || {_Name, #node { state = ready }}
156 |                                           <- orddict:to_list(Nodes)]) > 1
157 |               end, Steps).
158 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_comms.erl:
--------------------------------------------------------------------------------
  1 | %% The contents of this file are subject to the Mozilla Public License 
  2 | %% Version 1.1 (the "License"); you may not use this file except in 
  3 | %% compliance with the License. You may obtain a copy of the License at 
  4 | %% https://www.mozilla.org/MPL/1.1/ 
  5 | %%
  6 | %% Software distributed under the License is distributed on an "AS IS" 
  7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
  8 | %% License for the specific language governing rights and limitations 
  9 | %% under the License. 
 10 | %%
 11 | %% The Original Code is RabbitMQ. 
 12 | %%
 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
 15 | %% Pivotal Software, Inc. All Rights Reserved.
 16 | 
 17 | -module(rabbit_clusterer_comms).
 18 | 
 19 | -behaviour(gen_server).
 20 | 
 21 | -export([start_link/0, stop/1,
 22 |          multi_call/3, multi_cast/3,
 23 |          lock_nodes/2, lock/2, unlock/2]).
 24 | 
 25 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 26 |          terminate/2, code_change/3]).
 27 | 
 28 | -record(state, { token, locked_by, locking }).
 29 | 
 30 | -define(TARGET, rabbit_clusterer_coordinator).
 31 | 
 32 | %% In general the comms process exists to perform blocking calls to
 33 | %% other nodes, without causing the main coordinator process to
 34 | %% block. Thus the communication between the coordinator and the comms
 35 | %% is always async even if the comms process goes on to do blocking
 36 | %% communication with other nodes. Thus we explain the existence of
 37 | %% multi_call.
 38 | %%
 39 | %% Once we have multi_call and we care about message arrival order, we
 40 | %% have to have multi_cast too so that messages arrive in the same
 41 | %% order they were sent.
 42 | %%
 43 | %% We also push the locking API in here. This is rather more complex
 44 | %% and is only used by the rejoin transitioner, where it is also
 45 | %% documented. But essentially the comms pid is the lock, and the lock
 46 | %% is taken by some other pid, which the lock monitors. Should the pid
 47 | %% that holds the lock die, the lock is released.
 48 | 
 49 | start_link() ->
 50 |     Ref = make_ref(),
 51 |     {ok, Pid} = gen_server:start_link(?MODULE, [Ref], []),
 52 |     {ok, Pid, {Pid, Ref}}.
 53 | 
 54 | stop({Pid, _Ref}) ->
 55 |     ok = gen_server:cast(Pid, stop).
 56 | 
 57 | multi_call(Nodes, Msg, {Pid, _Ref}) ->
 58 |     %% We do a cast, not a call, so that the caller doesn't block -
 59 |     %% the result gets sent back async. This is essential to avoid a
 60 |     %% potential deadlock.
 61 |     ok = gen_server:cast(Pid, {multi_call, self(), Nodes, Msg}).
 62 | 
 63 | multi_cast(Nodes, Msg, {Pid, _Ref}) ->
 64 |     %% Reason for doing this is to ensure that both abcasts and
 65 |     %% multi_calls originate from the same process and so will be
 66 |     %% received in the same order as they're sent.
 67 |     ok = gen_server:cast(Pid, {multi_cast, Nodes, Msg}).
 68 | 
 69 | %% public api
 70 | lock_nodes(Nodes = [_|_], {Pid, _Ref}) ->
 71 |     ok = gen_server:cast(Pid, {lock_nodes, self(), Nodes}).
 72 | 
 73 | %% passed through from coordinator
 74 | lock(Locker, {Pid, _Ref}) ->
 75 |     ok = gen_server:cast(Pid, {lock, Locker}).
 76 | 
 77 | %% passed through from coordinator
 78 | unlock(Locker, {Pid, _Ref}) ->
 79 |     ok = gen_server:cast(Pid, {unlock, Locker}).
 80 | 
 81 | %%----------------------------------------------------------------------------
 82 | 
 83 | init([Ref]) ->
 84 |     {ok, #state { token     = {self(), Ref},
 85 |                   locked_by = undefined,
 86 |                   locking   = undefined }}.
 87 | 
 88 | handle_call(Msg, From, State) ->
 89 |     {stop, {unhandled_call, Msg, From}, State}.
 90 | 
 91 | handle_cast({multi_call, ReplyTo, Nodes, Msg},
 92 |             State = #state { token = Token }) ->
 93 |     %% 'infinity' does not cause it to wait for badnodes to become
 94 |     %% good.
 95 |     Result = gen_server:multi_call(Nodes, ?TARGET, Msg, infinity),
 96 |     gen_server:cast(ReplyTo, {comms, Token, Result}),
 97 |     {noreply, State};
 98 | 
 99 | handle_cast({multi_cast, Nodes, Msg}, State) ->
100 |     abcast = gen_server:abcast(Nodes, ?TARGET, Msg),
101 |     {noreply, State};
102 | 
103 | handle_cast({lock_nodes, ReplyTo, Nodes},
104 |             State = #state { locking = undefined }) ->
105 |     true = lists:member(node(), Nodes), %% ASSERTION
106 |     %% Of course, all of this has to be async too...
107 |     [First|_] = SortedNodes = lists:usort(Nodes),
108 |     [erlang:monitor(process, {?TARGET, N}) || N <- SortedNodes],
109 |     gen_server:cast({?TARGET, First}, {lock, self()}),
110 |     {noreply, State #state { locking = {[], SortedNodes, ReplyTo} }};
111 | 
112 | handle_cast({lock_ok, Node},
113 |             State = #state { locking = {_Locked, [Node], ReplyTo},
114 |                              token   = Token }) ->
115 |     gen_server:cast(ReplyTo, {comms, Token, lock_ok}),
116 |     {noreply, State #state { locking = undefined }};
117 | handle_cast({lock_ok, Node},
118 |             State = #state { locking = {Locked, [Node,Next|ToLock], ReplyTo} }) ->
119 |     gen_server:cast({?TARGET, Next}, {lock, self()}),
120 |     {noreply, State #state { locking = {[Node|Locked], [Next|ToLock], ReplyTo} }};
121 | 
122 | handle_cast({lock_rejected, Node},
123 |             State = #state { locking = {Locked, [Node|_ToLock], ReplyTo},
124 |                              token   = Token }) ->
125 |     gen_server:cast(ReplyTo, {comms, Token, lock_rejected}),
126 |     abcast = gen_server:abcast(Locked, ?TARGET, {unlock, self()}),
127 |     {noreply, State #state { locking = undefined }};
128 | 
129 | handle_cast({lock, Locker}, State = #state { locked_by = undefined }) ->
130 |     gen_server:cast(Locker, {lock_ok, node()}),
131 |     erlang:monitor(process, Locker),
132 |     {noreply, State #state { locked_by = Locker }};
133 | handle_cast({lock, Locker}, State) ->
134 |     gen_server:cast(Locker, {lock_rejected, node()}),
135 |     {noreply, State};
136 | 
137 | handle_cast({unlock, Locker}, State = #state { locked_by = Locker }) ->
138 |     {noreply, State #state { locked_by = undefined }};
139 | handle_cast({unlock, _Locker}, State) ->
140 |     %% Potential race between the DOWN and the unlock might well mean
141 |     %% that the DOWN gets here first, thus we unlock ourselves. At
142 |     %% that point we're free to be locked by someone else. Later on,
143 |     %% the unlock from the DOWN'd process gets here. Thus we don't
144 |     %% attempt to make any assertions about only receiving an unlock
145 |     %% from X when locked by X. Also, this could be an unlock coming
146 |     %% from a remote node which was originally for a lock held by an
147 |     %% older comms which has since been killed off.
148 |     {noreply, State};
149 | 
150 | handle_cast(stop, State) ->
151 |     {stop, normal, State};
152 | 
153 | handle_cast(Msg, State) ->
154 |     {stop, {unhandled_cast, Msg}, State}.
155 | 
156 | 
157 | handle_info({'DOWN', _MRef, process, {?TARGET, Node}, _Info},
158 |             State = #state { locking = Locking, token = Token }) ->
159 |     %% This DOWN must be from some node we're trying to lock.
160 |     Locking1 = case Locking of
161 |                    undefined ->
162 |                        Locking;
163 |                    {_Locked, [Node], ReplyTo} ->
164 |                        gen_server:cast(ReplyTo, {comms, Token, lock_ok}),
165 |                        undefined;
166 |                    {Locked, [Node,Next|ToLock], ReplyTo} ->
167 |                        gen_server:cast({?TARGET, Next}, {lock, self()}),
168 |                        {[Node|Locked], [Next|ToLock], ReplyTo};
169 |                    {Locked, ToLock, ReplyTo} ->
170 |                        {Locked -- [Node], ToLock -- [Node], ReplyTo}
171 |                end,
172 |     {noreply, State # state { locking = Locking1 }};
173 | handle_info({'DOWN', _MRef, process, Pid, _Info},
174 |             State = #state { locked_by = Pid }) ->
175 |     {noreply, State #state { locked_by = undefined }};
176 | handle_info({'DOWN', _MRef, process, _Pid, _Info}, State) ->
177 |     {noreply, State};
178 | handle_info(Msg, State) ->
179 |     {stop, {unhandled_info, Msg}, State}.
180 | 
181 | terminate(_Reason, _State) ->
182 |     ok.
183 | 
184 | code_change(_OldVsn, State, _Extra) ->
185 |     {ok, State}.
186 | 


--------------------------------------------------------------------------------
/test/src/clusterer_node.erl:
--------------------------------------------------------------------------------
  1 | -module(clusterer_node).
  2 | 
  3 | -export([observe_stable_state/1,
  4 |          start_link/2, delete/1,
  5 |          reset/1, start/1, start_with_config/2, apply_config/2, stop/1,
  6 |          exit/1]).
  7 | 
  8 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
  9 |          terminate/2, code_change/3]).
 10 | 
 11 | -include("clusterer_test.hrl").
 12 | 
 13 | -record(node_state, { name, name_str, port }).
 14 | 
 15 | -define(IS_NODE_OFF(R), R =:= noconnection; R =:= nodedown; R =:= noproc).
 16 | -define(SLEEP, timer:sleep(250)).
 17 | 
 18 | %%----------------------------------------------------------------------------
 19 | 
 20 | observe_stable_state([]) ->
 21 |     {stable, orddict:new()};
 22 | observe_stable_state(Pids) ->
 23 |     Self = self(),
 24 |     Ref = make_ref(),
 25 |     [gen_server:cast(Pid, {stable_state, Ref, Self}) || Pid <- Pids],
 26 |     Results = [receive
 27 |                    {stable_state, Ref, Name, Result} ->
 28 |                        {Name, Result}
 29 |                end || _Pid <- Pids],
 30 |     case [Name || {Name, false} <- Results] of
 31 |         [] -> {stable, orddict:from_list(lists:usort(Results))};
 32 |         _  -> not_stable
 33 |     end.
 34 | 
 35 | %%----------------------------------------------------------------------------
 36 | 
 37 | start_link(Name, Port) when is_atom(Name) andalso is_integer(Port) ->
 38 |     gen_server:start_link(?MODULE, [Name, Port], []).
 39 | 
 40 | delete(Pid) -> gen_server:cast(Pid, delete).
 41 | 
 42 | reset(Pid)  -> gen_server:cast(Pid, reset).
 43 | 
 44 | start(Pid)  -> gen_server:cast(Pid, start).
 45 | 
 46 | start_with_config(Pid, Config) ->
 47 |     gen_server:cast(Pid, {start_with_config, Config}).
 48 | 
 49 | apply_config(Pid, Config) -> gen_server:cast(Pid, {apply_config, Config}).
 50 | 
 51 | stop(Pid) -> gen_server:cast(Pid, stop).
 52 | 
 53 | exit(Pid) -> gen_server:call(Pid, exit, infinity).
 54 | 
 55 | %%----------------------------------------------------------------------------
 56 | 
 57 | init([Name, Port]) ->
 58 |     State = #node_state { name     = Name,
 59 |                           name_str = atom_to_list(Name),
 60 |                           port     = rabbit_misc:format("~p", [Port]) },
 61 |     pang = net_adm:ping(Name), %% ASSERTION
 62 |     ok = clean_db(State),
 63 |     {ok, State}.
 64 | 
 65 | handle_call(exit, _From, State = #node_state { name = Name }) ->
 66 |     ok = run_cmd("stop-node", State),
 67 |     ok = await_death(Name),
 68 |     ok = clean_db(State),
 69 |     {stop, normal, ok, State};
 70 | handle_call(Msg, From, State) ->
 71 |     {stop, {unhandled_call, Msg, From}, State}.
 72 | 
 73 | handle_cast(delete, State = #node_state { name = Name }) ->
 74 |     pang = net_adm:ping(Name), %% ASSERTION
 75 |     ok = clean_db(State),
 76 |     {stop, normal, State};
 77 | handle_cast(reset, State = #node_state { name = Name }) ->
 78 |     pang = net_adm:ping(Name), %% ASSERTION
 79 |     ok = clean_db(State),
 80 |     {noreply, State};
 81 | handle_cast(start, State = #node_state { name = Name }) ->
 82 |     pang = net_adm:ping(Name), %% ASSERTION
 83 |     ok = run_bg_cmd("run", "-noinput", State),
 84 |     ok = await_life(Name),
 85 |     {noreply, State};
 86 | handle_cast(stop, State = #node_state { name = Name }) ->
 87 |     pong = net_adm:ping(Name),
 88 |     ok = run_cmd("stop-node", State),
 89 |     ok = await_death(Name),
 90 |     {noreply, State};
 91 | handle_cast({start_with_config, Config},
 92 |             State = #node_state { name = Name, name_str = NameStr }) ->
 93 |     ok = store_external_cluster_config(NameStr, Config),
 94 |     ok = run_bg_cmd("run", "-rabbitmq_clusterer config \\\\\\\"" ++
 95 |                          external_config_file(NameStr) ++ "\\\\\\\" -noinput",
 96 |                      State),
 97 |     ok = await_life(Name),
 98 |     {noreply, State};
 99 | handle_cast({apply_config, Config},
100 |             State = #node_state { name = Name, name_str = NameStr }) ->
101 |     pong = net_adm:ping(Name),
102 |     ok = store_external_cluster_config(NameStr, Config),
103 |     ok = ctl("eval 'rabbit_clusterer:apply_config(\"" ++
104 |                  external_config_file(NameStr) ++ "\").'", State),
105 |     {noreply, State};
106 | handle_cast({stable_state, Ref, From},
107 |             State = #node_state { name = Name, name_str = NameStr}) ->
108 |     Result =
109 |         try
110 |             case rabbit_clusterer_coordinator:request_status(Name) of
111 |                 preboot                      -> false;
112 |                 {Config,  {transitioner, _}} -> {ready, convert(Config)};
113 |                 {_Config, booting}           -> false;
114 |                 { Config, ready}             -> {ready, convert(Config)}
115 |             end
116 |         catch
117 |             exit:{R, _}      when ?IS_NODE_OFF(R) ->
118 |                 case is_reset(NameStr) of
119 |                     true  -> reset;
120 |                     false -> off
121 |                 end;
122 |             exit:{{R, _}, _} when ?IS_NODE_OFF(R) ->
123 |                 case is_reset(NameStr) of
124 |                     true  -> reset;
125 |                     false -> off
126 |                 end;
127 |             _Class:_Reason ->
128 |                 false
129 |         end,
130 |     From ! {stable_state, Ref, Name, Result},
131 |     {noreply, State};
132 | handle_cast(Msg, State) ->
133 |     {stop, {unhandled_cast, Msg}, State}.
134 | 
135 | handle_info(Msg, State) ->
136 |     {stop, {unhandled_info, Msg}, State}.
137 | 
138 | terminate(_Reason, _State) ->
139 |     ok.
140 | 
141 | code_change(_OldVsn, State, _Extra) ->
142 |     {ok, State}.
143 | 
144 | await_death(Name) ->
145 |     await(Name, pong, pang).
146 | 
147 | await_life(Name) ->
148 |     await(Name, pang, pong).
149 | 
150 | await(Name, Again, Return) ->
151 |     case net_adm:ping(Name) of
152 |         Again  -> ?SLEEP,
153 |                   await(Name, Again, Return);
154 |         Return -> ok
155 |     end.
156 | 
157 | convert(ClustererConfig) ->
158 |     Version = rabbit_clusterer_config:version(ClustererConfig),
159 |     NodesNames = rabbit_clusterer_config:nodenames(ClustererConfig),
160 |     DiscNodeNames = rabbit_clusterer_config:disc_nodenames(ClustererConfig),
161 |     RamNodeNames = NodesNames -- DiscNodeNames,
162 |     Gospel = rabbit_clusterer_config:gospel(ClustererConfig),
163 |     #config { version          = Version,
164 |               nodes            = orddict:from_list(
165 |                                    [{Name, disc} || Name <- DiscNodeNames] ++
166 |                                        [{Name, ram} || Name <- RamNodeNames]),
167 |               gospel           = Gospel }.
168 | 
169 | makefile_dir() ->
170 |     filename:join(filename:dirname(code:which(rabbit)), "..").
171 | 
172 | mnesia_dir(NameStr) when is_list(NameStr) ->
173 |     DirName = "rabbitmq-" ++ NameStr ++ "-mnesia",
174 |     case {os:getenv("RABBITMQ_MNESIA_DIR"), os:getenv("TMPDIR")} of
175 |         {false, false } -> filename:join("/tmp", DirName);
176 |         {false, TmpDir} -> filename:join(TmpDir, DirName);
177 |         {Dir,   _     } -> Dir
178 |     end.
179 | 
180 | is_reset(NameStr) when is_list(NameStr) ->
181 |     Dir = mnesia_dir(NameStr),
182 |     case file:list_dir(Dir) of
183 |         {error, enoent} -> true;
184 |         {ok,    []    } -> true;
185 |         {ok,    _     } -> false
186 |     end.
187 | 
188 | external_config_file(NameStr) when is_list(NameStr) ->
189 |     mnesia_dir(NameStr) ++ "-external-cluster.config".
190 | 
191 | ctl(Action, #node_state { name_str = NameStr }) ->
192 |     Cmd = lists:flatten([filename:join(makefile_dir(), "scripts/rabbitmqctl"),
193 |                          " -n '",
194 |                          NameStr, "' ", Action, " ; echo $?"]),
195 |     Res = os:cmd(Cmd),
196 |     LastLine = hd(lists:reverse(string:tokens(Res, "\n"))),
197 |     "0" = LastLine, %% ASSERTION
198 |     ok.
199 | 
200 | run_cmd(Action, #node_state { name_str = NameStr, port = Port }) ->
201 |     Cmd = lists:flatten(["RABBITMQ_NODENAME=",
202 |                          NameStr,
203 |                          " RABBITMQ_NODE_PORT=",
204 |                          Port,
205 |                          " ",
206 |                          os:getenv("MAKE"),
207 |                          " -C ",
208 |                          makefile_dir(),
209 |                          " ",
210 |                          Action,
211 |                          " ; echo $?"]),
212 |     Res = os:cmd(Cmd),
213 |     LastLine = hd(lists:reverse(string:tokens(Res, "\n"))),
214 |     "0" = LastLine, %% ASSERTION
215 |     ok.
216 | 
217 | run_bg_cmd(Action, StartArgs, #node_state { name_str = NameStr, port = Port }) ->
218 |     Log = mnesia_dir(NameStr),
219 |     Cmd = lists:flatten(["RABBITMQ_NODENAME=",
220 |                          NameStr,
221 |                          " RABBITMQ_NODE_PORT=",
222 |                          Port,
223 |                          " RABBITMQ_SERVER_START_ARGS=\"",
224 |                          StartArgs,
225 |                          "\" setsid ",
226 |                          os:getenv("MAKE"),
227 |                          " -C ",
228 |                          makefile_dir(),
229 |                          " ",
230 |                          Action,
231 |                          " 1>> ",
232 |                          Log,
233 |                          "-stdout.log 2>> ",
234 |                          Log,
235 |                          "-stderr.log &"]),
236 |     os:cmd(Cmd),
237 |     ok.
238 | 
239 | clean_db(State = #node_state { name_str = NameStr }) ->
240 |     ok = run_cmd("cleandb", State),
241 |     case file:delete(mnesia_dir(NameStr) ++ "-cluster.config") of
242 |         ok              -> ok;
243 |         {error, enoent} -> ok;
244 |         Err             -> Err
245 |     end.
246 | 
247 | store_external_cluster_config(NameStr, Config) when is_list(NameStr) ->
248 |     ok = rabbit_file:write_term_file(external_config_file(NameStr),
249 |                                      [to_proplist(Config)]).
250 | 
251 | field_fold(Fun, Init) ->
252 |     {_Pos, Res} = lists:foldl(fun (FieldName, {Pos, Acc}) ->
253 |                                       {Pos + 1, Fun(FieldName, Pos, Acc)}
254 |                               end, {2, Init}, record_info(fields, config)),
255 |     Res.
256 | 
257 | to_proplist(Config = #config {}) ->
258 |     field_fold(fun (FieldName, Pos, ProplistN) ->
259 |                        [{FieldName, element(Pos, Config)} | ProplistN]
260 |                end, []).
261 | 


--------------------------------------------------------------------------------
/test/src/clusterer_interpreter.erl:
--------------------------------------------------------------------------------
  1 | -module(clusterer_interpreter).
  2 | 
  3 | -export([run_program/1]).
  4 | 
  5 | -include("clusterer_test.hrl").
  6 | 
  7 | -define(SLEEP, timer:sleep(500)).
  8 | 
  9 | %%----------------------------------------------------------------------------
 10 | 
 11 | run_program({InitialState, Steps}) ->
 12 |     run_program(Steps, InitialState).
 13 | 
 14 | run_program([], FinalState) ->
 15 |     ok = tidy(FinalState),
 16 |     ok;
 17 | run_program([Step | Steps], InitialState) ->
 18 |     PredictedState = Step #step.final_state,
 19 |     AchievedState = (run_step(Step #step { final_state = InitialState })
 20 |                     ) #step.final_state,
 21 |     case check_convergence(PredictedState, AchievedState) of
 22 |         ok ->
 23 |             case compare_state(AchievedState,
 24 |                                observe_stable_state(AchievedState)) of
 25 |                 {ok, ObservedState} -> run_program(Steps, ObservedState);
 26 |                 E1                  -> E1
 27 |             end;
 28 |         {error, E2} ->
 29 |             ok = tidy(AchievedState),
 30 |             {error, E2, Step}
 31 |     end.
 32 | 
 33 | run_step(Step) ->
 34 |     run_modify_config(run_existential_node(run_modify_nodes(Step))).
 35 | 
 36 | tidy(#state { nodes = Nodes }) ->
 37 |     [clusterer_node:exit(Pid)
 38 |      || {_Name, #node { pid = Pid }} <- orddict:to_list(Nodes)],
 39 |     ok.
 40 | 
 41 | %%----------------------------------------------------------------------------
 42 | 
 43 | check_convergence(#state { nodes         = NodesPred,
 44 |                            config        = Config,
 45 |                            valid_config  = VConfig,
 46 |                            active_config = AConfig },
 47 |                   #state { nodes         = NodesAchi,
 48 |                            config        = Config,
 49 |                            valid_config  = VConfig,
 50 |                            active_config = AConfig }) ->
 51 |     %% Configs should just match exactly. Nodes will differ only in
 52 |     %% that Achi will have pids
 53 |     case {orddict:fetch_keys(NodesPred), orddict:fetch_keys(NodesAchi)} of
 54 |         {Eq, Eq} ->
 55 |             orddict:fold(
 56 |               fun (_Name, _Node, {error, _} = Err) ->
 57 |                       Err;
 58 |                   (Name, #node { name = Name, state = StateAchi }, ok) ->
 59 |                       #node { name = Name, state = StatePred } =
 60 |                           orddict:fetch(Name, NodesPred),
 61 |                       case {StatePred, StateAchi} of
 62 |                           {EqSt,                  EqSt} -> ok;
 63 |                           {_,                     _   } ->
 64 |                               {error, {node_state_divergence, Name,
 65 |                                        StateAchi, StatePred}}
 66 |                       end
 67 |               end, ok, NodesAchi);
 68 |         {Pr, Ac} ->
 69 |             {error, {node_divergence, Pr, Ac}}
 70 |     end;
 71 | check_convergence(Pred, Achi) ->
 72 |     {error, {config_divergence, Pred, Achi}}.
 73 | 
 74 | observe_stable_state(State = #state { nodes = Nodes }) ->
 75 |     Pids = [Pid || {_Name, #node { pid = Pid }} <- orddict:to_list(Nodes)],
 76 |     case clusterer_node:observe_stable_state(Pids) of
 77 |         {stable, S} -> ?SLEEP, %% always sleep, just to allow some time
 78 |                        case clusterer_node:observe_stable_state(Pids) of
 79 |                            {stable, S} -> S; %% No one has changed, all good.
 80 |                            _           -> observe_stable_state(State)
 81 |                        end;
 82 |         _           -> ?SLEEP,
 83 |                        observe_stable_state(State)
 84 |     end.
 85 | 
 86 | compare_state(State = #state { nodes         = Nodes,
 87 |                                active_config = AConfig }, StableState) ->
 88 |     case {orddict:fetch_keys(Nodes), orddict:fetch_keys(Nodes)} of
 89 |         {Eq, Eq} ->
 90 |             Result =
 91 |                 orddict:fold(
 92 |                   fun (_Name, _Node, {error, _} = Err) ->
 93 |                           Err;
 94 |                       (Name, Node = #node { name = Name, state = NS }, Acc) ->
 95 |                           Observed = orddict:fetch(Name, StableState),
 96 |                           case {NS, Observed} of
 97 |                               {off, off} ->
 98 |                                   orddict:store(Name, Node, Acc);
 99 |                               {reset, reset} ->
100 |                                   orddict:store(Name, Node, Acc);
101 |                               {ready, {ready, AConfig}} ->
102 |                                   orddict:store(Name, Node, Acc);
103 |                               {_, _} = DivergenceSt ->
104 |                                   {error, {node_state_divergence, DivergenceSt}}
105 |                           end
106 |                   end, orddict:new(), Nodes),
107 |             case Result of
108 |                 {error, _} = Err -> Err;
109 |                 Nodes1           -> {ok, State #state { nodes = Nodes1 }}
110 |             end;
111 |         {_, _} = DivergenceNodes ->
112 |             {error, {nodes_divergence, DivergenceNodes}}
113 |     end.
114 | 
115 | %%----------------------------------------------------------------------------
116 | 
117 | run_modify_nodes(Step = #step { modify_node_instrs = Instrs,
118 |                                 final_state        = State }) ->
119 |     State1 = lists:foldr(fun run_modify_node_instr/2, State, Instrs),
120 |     Step #step { final_state = State1 }.
121 | 
122 | run_modify_node_instr(noop, State) ->
123 |     State;
124 | run_modify_node_instr({reset_node, Name}, State = #state { nodes = Nodes }) ->
125 |     Node = #node { state = off, pid = Pid } = orddict:fetch(Name, Nodes),
126 |     ok = clusterer_node:reset(Pid),
127 |     clusterer_utils:store_node(Node #node { state = reset }, State);
128 | run_modify_node_instr({start_node, Name},
129 |                       State = #state { nodes         = Nodes,
130 |                                        active_config = AConfig }) ->
131 |     Node = #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes),
132 |     true = NS =:= off orelse NS =:= reset, %% ASSERTION
133 |     ok = clusterer_node:start(Pid),
134 |     clusterer_utils:store_node(clusterer_utils:set_node_state(
135 |                                  Node #node { state = ready }, AConfig), State);
136 | run_modify_node_instr({start_node_with_config, Name, VConfig},
137 |                       State = #state { nodes        = Nodes,
138 |                                        valid_config = VConfig }) ->
139 |     Node = #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes),
140 |     true = NS =:= off orelse NS =:= reset, %% ASSERTION
141 |     ok = clusterer_node:start_with_config(Pid, VConfig),
142 |     clusterer_utils:make_config_active(
143 |       clusterer_utils:store_node(Node #node { state = ready }, State));
144 | run_modify_node_instr({apply_config_to_node, Name, VConfig},
145 |                       State = #state { nodes        = Nodes,
146 |                                        valid_config = VConfig }) ->
147 |     Node = #node { state = ready, pid = Pid } = orddict:fetch(Name, Nodes),
148 |     ok = clusterer_node:apply_config(Pid, VConfig),
149 |     clusterer_utils:make_config_active(clusterer_utils:store_node(Node, State));
150 | run_modify_node_instr({stop_node, Name}, State = #state { nodes = Nodes }) ->
151 |     Node = #node { state = ready, pid = Pid } = orddict:fetch(Name, Nodes),
152 |     ok = clusterer_node:stop(Pid),
153 |     clusterer_utils:store_node(Node #node { state = off }, State).
154 | 
155 | %%----------------------------------------------------------------------------
156 | 
157 | run_existential_node(Step = #step { existential_node_instr = Instr,
158 |                                     final_state            = State }) ->
159 |     State1 = run_existential_node_instr(Instr, State),
160 |     Step #step { final_state = State1 }.
161 | 
162 | run_existential_node_instr(noop, State) ->
163 |     State;
164 | run_existential_node_instr({create_node, Name, Port},
165 |                            State = #state { nodes = Nodes }) ->
166 |     false = orddict:is_key(Name, Nodes), %% ASSERTION
167 |     {ok, Pid} = clusterer_node:start_link(Name, Port),
168 |     Nodes1 = orddict:store(Name, #node { name  = Name,
169 |                                          port  = Port,
170 |                                          state = reset,
171 |                                          pid   = Pid }, Nodes),
172 |     State #state { nodes = Nodes1 };
173 | run_existential_node_instr({delete_node, Name},
174 |                            State = #state { nodes = Nodes }) ->
175 |     #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes),
176 |     true = NS =:= reset orelse NS =:= off, %% ASSERTION
177 |     ok = clusterer_node:delete(Pid),
178 |     State #state { nodes = orddict:erase(Name, Nodes) }.
179 | 
180 | %%----------------------------------------------------------------------------
181 | 
182 | run_modify_config(Step = #step { modify_config_instr = Instr,
183 |                                  final_state         = State }) ->
184 |     State1 = run_modify_config_instr(Instr, State),
185 |     Step #step { final_state = State1 }.
186 | 
187 | run_modify_config_instr(noop, State) ->
188 |     State;
189 | run_modify_config_instr({config_version_to, V},
190 |                         State = #state { config = Config =
191 |                                              #config { version = V1 } })
192 |   when V > V1 ->
193 |     clusterer_utils:set_config(Config #config { version = V }, State);
194 | run_modify_config_instr({config_gospel_to, V},
195 |                         State = #state { config = Config =
196 |                                              #config { gospel = V1 } })
197 |   when V =/= V1 ->
198 |     clusterer_utils:set_config(Config #config { gospel = V }, State);
199 | run_modify_config_instr({config_add_node, Name},
200 |                         State = #state { nodes = Nodes,
201 |                                          config = Config =
202 |                                              #config { nodes = ConfigNodes } }) ->
203 |     true  = orddict:is_key(Name, Nodes),       %% ASSERTION
204 |     false = orddict:is_key(Name, ConfigNodes), %% ASSERTION
205 |     ConfigNodes1 = orddict:store(Name, disc, ConfigNodes),
206 |     clusterer_utils:set_config(Config #config { nodes = ConfigNodes1 }, State);
207 | run_modify_config_instr({config_remove_node, Name},
208 |                         State = #state { config = Config =
209 |                                              #config { nodes  = ConfigNodes,
210 |                                                        gospel = Gospel } }) ->
211 |     %% We allow nodes to be exterminated even when they're in the
212 |     %% Config. We only require them to be off/reset. So no assertion
213 |     %% for Name in keys(Nodes).
214 |     true = Gospel =/= {node, Name},           %% ASSERTION
215 |     true = orddict:is_key(Name, ConfigNodes), %% ASSERTION
216 |     ConfigNodes1 = orddict:erase(Name, ConfigNodes),
217 |     clusterer_utils:set_config(Config #config { nodes = ConfigNodes1 }, State).
218 | 


--------------------------------------------------------------------------------
/test/src/clusterer_program.erl:
--------------------------------------------------------------------------------
  1 | -module(clusterer_program).
  2 | 
  3 | -export([generate_program/1]).
  4 | 
  5 | -include("clusterer_test.hrl").
  6 | 
  7 | -define(BASE_PORT, 10000).
  8 | 
  9 | %%----------------------------------------------------------------------------
 10 | 
 11 | generate_program(InitialState = #state {}) ->
 12 |     {InitialState, generate_steps([], InitialState)}.
 13 | 
 14 | generate_steps(Steps, #state { seed = 0 }) ->
 15 |     lists:reverse(Steps);
 16 | generate_steps(Steps, State) ->
 17 |     Step = generate_step(State),
 18 |     generate_steps([Step | Steps], Step #step.final_state).
 19 | 
 20 | generate_step(State) ->
 21 |     %% We want to avoid any dependencies between instructions within a
 22 |     %% step - i.e. they must all be able to be exec'd in parallel. To
 23 |     %% enforce that we generate the instructions in a particular
 24 |     %% order: 1) modify an existing node; 2) modify config; 3) create
 25 |     %% new node. However, "create new node" can also include "delete
 26 |     %% node" and we need to ensure that if we delete a node it is not
 27 |     %% also used in another instruction in the same step. Thus we do
 28 |     %% the existential instruction first, but it can result in a
 29 |     %% "delayed" instruction for creation that is exec'd at the end.
 30 |     Step = #step { modify_node_instrs     = [],
 31 |                    modify_config_instr    = noop,
 32 |                    existential_node_instr = noop,
 33 |                    final_state            = State },
 34 |     Step1 = step_if_seed(fun generate_existential_node_instructions/1, Step),
 35 |     Step2 = step_if_seed(fun generate_modify_node_instructions/1, Step1),
 36 |     Step3 = step_if_seed(fun generate_modify_config_instructions/1, Step2),
 37 |     eval_delayed_existential_instruction(Step3).
 38 | 
 39 | generate_modify_node_instructions(
 40 |   Step = #step { final_state = State = #state { nodes = Nodes } }) ->
 41 |     {NodeInstrs, State1} =
 42 |         orddict:fold(
 43 |           fun (_Name, _Node, {Instrs, StateN = #state { seed = 0 }}) ->
 44 |                   {Instrs, StateN};
 45 |               (Name, Node = #node { name = Name }, {Instrs, StateN}) ->
 46 |                   {NodeInstrFun, StateN1} =
 47 |                       choose_one_noop2(
 48 |                         lists:flatten(
 49 |                           modify_node_instructions(Node, StateN)), StateN),
 50 |                   {NodeInstr, StateN2} = NodeInstrFun(Node, StateN1),
 51 |                   {[NodeInstr | Instrs], StateN2}
 52 |           end, {[], State}, Nodes),
 53 |     Step #step { modify_node_instrs = NodeInstrs, final_state = State1 }.
 54 | 
 55 | generate_modify_config_instructions(
 56 |   Step = #step { final_state = State = #state { nodes  = Nodes,
 57 |                                                 config = Config } }) ->
 58 |     #config { nodes = ConfigNodes, gospel = Gospel } = Config,
 59 |     {InstrFun, State1} =
 60 |         choose_one_noop1(
 61 |           lists:flatten([fun update_version_instr/1,
 62 |                          case ConfigNodes of
 63 |                              [] -> [];
 64 |                              _  -> fun change_gospel_instr/1
 65 |                          end,
 66 |                          case orddict:size(Nodes) > orddict:size(ConfigNodes) of
 67 |                              true  -> [fun add_node_to_config_instr/1];
 68 |                              false -> []
 69 |                          end,
 70 |                          case orddict:size(ConfigNodes) > 0 andalso
 71 |                              [Gospel] =/=
 72 |                              [{node,N} || N <- orddict:fetch_keys(ConfigNodes)]
 73 |                          of
 74 |                              true  -> [fun remove_node_from_config_instr/1];
 75 |                              false -> []
 76 |                          end]), State),
 77 |     step_if_seed(fun (Step1 = #step { final_state = State2 }) ->
 78 |                          {ModifyConfigInstr, State3} = InstrFun(State2),
 79 |                          Step1 #step { modify_config_instr = ModifyConfigInstr,
 80 |                                        final_state         = State3 }
 81 |                  end, Step #step { final_state = State1 }).
 82 | 
 83 | generate_existential_node_instructions(
 84 |   Step = #step { final_state = State = #state { nodes = Nodes }}) ->
 85 |     {InstrFun, State1} =
 86 |         choose_one_noop1(
 87 |           lists:flatten(
 88 |             [fun create_node_fun_instr/1, %% this one is delayed
 89 |              case orddict:size( %% can only delete if we have some off nodes
 90 |                     orddict:filter(fun (_Name, #node { state = NS }) ->
 91 |                                            NS =:= reset orelse NS =:= off
 92 |                                    end, Nodes)) of
 93 |                  0 -> [];
 94 |                  _ -> [fun delete_node_instr/1]
 95 |              end]), State),
 96 |     step_if_seed(fun (Step1 = #step { final_state = State2 }) ->
 97 |                          {ExistNodeInstr, State3} = InstrFun(State2),
 98 |                          Step1 #step { existential_node_instr = ExistNodeInstr,
 99 |                                        final_state            = State3 }
100 |                  end, Step #step { final_state = State1 }).
101 | 
102 | %%----------------------------------------------------------------------------
103 | 
104 | modify_node_instructions(#node { name = Name, state = off },
105 |                          State = #state { valid_config  = VConfig,
106 |                                           active_config = AConfig }) ->
107 |     %% To keep life simpler, we only allow starting a node with the
108 |     %% new config if the new config uses the node.
109 |     [fun reset_node_instr/2,
110 |      case is_config_active(State) of
111 |          true  -> [fun start_node_instr/2];
112 |          false -> []
113 |      end,
114 |      case clusterer_utils:contains_node(Name, VConfig) andalso
115 |           AConfig =/= VConfig of
116 |          true  -> [fun start_node_with_config_instr/2];
117 |          false -> []
118 |      end];
119 | modify_node_instructions(#node { name = Name, state = reset },
120 |                          State = #state { valid_config  = VConfig,
121 |                                           active_config = AConfig }) ->
122 |     [case clusterer_utils:contains_node(Name, VConfig) andalso
123 |           AConfig =/= VConfig of
124 |          true  -> [fun start_node_with_config_instr/2];
125 |          false -> []
126 |      end,
127 |      case is_config_active(State) andalso
128 |           clusterer_utils:contains_node(Name, AConfig) of
129 |          true  -> [fun start_node_instr/2];
130 |          false -> []
131 |      end];
132 | modify_node_instructions(#node { state = ready },
133 |                          #state { valid_config  = VConfig,
134 |                                   active_config = AConfig }) ->
135 |     [fun stop_node_instr/2,
136 |      case VConfig of
137 |          #config {} when VConfig =/= AConfig -> [fun apply_config_instr/2];
138 |          _                                   -> []
139 |      end].
140 | 
141 | %%----------------------------------------------------------------------------
142 | 
143 | update_version_instr(
144 |   State = #state { config = Config = #config { version = V } }) ->
145 |     Config1 = Config #config { version = V + 1 },
146 |     {{config_version_to, V + 1},
147 |      clusterer_utils:set_config(Config1, State)}.
148 | 
149 | change_gospel_instr(
150 |   State = #state { config = Config = #config { nodes  = Nodes,
151 |                                              gospel = Gospel } }) ->
152 |     Values = [reset | [{node, N} || N <- orddict:fetch_keys(Nodes)]],
153 |     {Value, State1} = choose_one([V || V <- Values, V =/= Gospel], State),
154 |     Config1 = Config #config { gospel = Value },
155 |     {{config_gospel_to, Value},
156 |      clusterer_utils:set_config(Config1, State1)}.
157 | 
158 | add_node_to_config_instr(State = #state { config = Config =
159 |                                               #config { nodes = ConfigNodes },
160 |                                           nodes  = Nodes }) ->
161 |     Values = [V || V <- orddict:fetch_keys(Nodes),
162 |                    not orddict:is_key(V, ConfigNodes)],
163 |     {Value, State1} = choose_one(Values, State),
164 |     Config1 =
165 |         Config #config { nodes = orddict:store(Value, disc, ConfigNodes) },
166 |     {{config_add_node, Value},
167 |      clusterer_utils:set_config(Config1, State1)}.
168 | 
169 | remove_node_from_config_instr(
170 |   State = #state { config = Config = #config { nodes  = Nodes,
171 |                                                gospel = Gospel } }) ->
172 |     Values = [N || N <- orddict:fetch_keys(Nodes), {node, N} =/= Gospel],
173 |     {Value, State1} = choose_one(Values, State),
174 |     Config1 = Config #config { nodes = orddict:erase(Value, Nodes) },
175 |     {{config_remove_node, Value},
176 |      clusterer_utils:set_config(Config1, State1)}.
177 | 
178 | %%----------------------------------------------------------------------------
179 | 
180 | create_node_fun_instr(State) ->
181 |     {{delayed,
182 |       fun (State1) ->
183 |               {Name, Port, State2 = #state { nodes = Nodes }} =
184 |                   generate_name_port(State1),
185 |               Node = #node { name  = Name,
186 |                              port  = Port,
187 |                              pid   = undefined,
188 |                              state = reset },
189 |               {{create_node, Name, Port},
190 |                State2 #state { nodes = orddict:store(Name, Node, Nodes) }}
191 |       end}, State}.
192 | 
193 | delete_node_instr(State = #state { nodes = Nodes }) ->
194 |     Names = orddict:fetch_keys(
195 |               orddict:filter(fun (_Name, #node { state = NS }) ->
196 |                                      NS =:= reset orelse NS =:= off
197 |                              end, Nodes)),
198 |     {Name, State1} = choose_one(Names, State),
199 |     {{delete_node, Name}, State1 #state { nodes = orddict:erase(Name, Nodes) }}.
200 | 
201 | %%----------------------------------------------------------------------------
202 | 
203 | reset_node_instr(Node = #node { name = Name, state = off }, State) ->
204 |     {{reset_node, Name},
205 |      clusterer_utils:store_node(Node #node { state = reset }, State)}.
206 | 
207 | start_node_instr(Node = #node { name = Name, state = NS },
208 |                  State = #state { active_config = AConfig })
209 |   when NS =:= off orelse NS =:= reset ->
210 |     {{start_node, Name},
211 |      clusterer_utils:store_node(
212 |        clusterer_utils:set_node_state(
213 |          Node #node { state = ready }, AConfig), State)}.
214 | 
215 | start_node_with_config_instr(Node = #node { name = Name, state = NS },
216 |                              State = #state { valid_config = VConfig })
217 |   when NS =:= off orelse NS =:= reset ->
218 |     {{start_node_with_config, Name, VConfig},
219 |      clusterer_utils:make_config_active(
220 |        clusterer_utils:store_node(Node #node { state = ready }, State))}.
221 | 
222 | apply_config_instr(#node { name = Name },
223 |                    State = #state { valid_config = VConfig }) ->
224 |     {{apply_config_to_node, Name, VConfig},
225 |      clusterer_utils:make_config_active(State)}.
226 | 
227 | stop_node_instr(Node = #node { name = Name }, State) ->
228 |     {{stop_node, Name},
229 |      clusterer_utils:store_node(Node #node { state = off }, State)}.
230 | 
231 | %%----------------------------------------------------------------------------
232 | 
233 | is_config_active(#state { active_config = undefined }) ->
234 |     false;
235 | is_config_active(#state { nodes = Nodes,
236 |                           active_config = #config { nodes = ConfigNodes } }) ->
237 |     [] =/= orddict:filter(
238 |              fun (Name, _Disc) ->
239 |                      orddict:is_key(Name, Nodes) andalso
240 |                          ready =:= (orddict:fetch(Name, Nodes)) #node.state
241 |              end, ConfigNodes).
242 | 
243 | generate_name_port(State = #state { node_count = N }) ->
244 |     {list_to_atom(lists:flatten(io_lib:format("node~p@anyhost", [N]))),
245 |      ?BASE_PORT + N,
246 |      State #state { node_count = N+1 }}.
247 | 
248 | noop(State       ) -> {noop, State}.
249 | noop(_Node, State) -> {noop, State}.
250 | 
251 | choose_one_noop1(List, State) -> choose_one([fun noop/1 | List], State).
252 | choose_one_noop2(List, State) -> choose_one([fun noop/2 | List], State).
253 | 
254 | choose_one(List, State = #state { seed = Seed }) ->
255 |     Len = length(List),
256 |     {lists:nth(1 + (Seed rem Len), List), State #state { seed = Seed div Len }}.
257 | 
258 | step_if_seed(_Fun, Step = #step { final_state = #state { seed = 0 } }) ->
259 |     Step;
260 | step_if_seed(Fun,  Step = #step {}) ->
261 |     Fun(Step).
262 | 
263 | %% We only need this for the create_node case so we don't overly
264 | %% generalise this. Yes, I know this is not like me at all. Also, this
265 | %% case definitely doesn't need further seed so we don't wrap it in
266 | %% step_if_seed.
267 | eval_delayed_existential_instruction(
268 |   Step = #step { existential_node_instr = {delayed, Fun},
269 |                  final_state            = State }) ->
270 |     {ExistentialInstr, State1} = Fun(State),
271 |     Step #step { existential_node_instr = ExistentialInstr,
272 |                  final_state            = State1 };
273 | eval_delayed_existential_instruction(Step) ->
274 |     Step.
275 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_config.erl:
--------------------------------------------------------------------------------
  1 | %% The contents of this file are subject to the Mozilla Public License 
  2 | %% Version 1.1 (the "License"); you may not use this file except in 
  3 | %% compliance with the License. You may obtain a copy of the License at 
  4 | %% https://www.mozilla.org/MPL/1.1/ 
  5 | %%
  6 | %% Software distributed under the License is distributed on an "AS IS" 
  7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
  8 | %% License for the specific language governing rights and limitations 
  9 | %% under the License. 
 10 | %%
 11 | %% The Original Code is RabbitMQ. 
 12 | %%
 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
 15 | %% Pivotal Software, Inc. All Rights Reserved.
 16 | 
 17 | -module(rabbit_clusterer_config).
 18 | 
 19 | -export([load/2, load/1, store_internal/2, to_proplist/2,
 20 |          transfer_node_ids/2, update_node_id/4, add_node_ids/3, add_node_id/4,
 21 |          compare/2, is_compatible/2,
 22 |          contains_node/2, is_singleton/2, version/1, nodenames/1,
 23 |          disc_nodenames/1, node_type/2, node_id/2, gospel/1]).
 24 | 
 25 | -record(config, { version,
 26 |                   nodes,
 27 |                   gospel,
 28 |                   node_ids
 29 |                 }).
 30 | %%----------------------------------------------------------------------------
 31 | 
 32 | %% We can't put the file within mnesia dir because that upsets the
 33 | %% virgin detection in rabbit_mnesia!
 34 | internal_path() -> rabbit_mnesia:dir() ++ "-cluster.config".
 35 | 
 36 | external_path() -> application:get_env(rabbitmq_clusterer, config).
 37 | 
 38 | load(undefined)      -> load_external();
 39 | load(#config {} = C) -> case validate(C) of
 40 |                             ok  -> {ok, C};
 41 |                             Err -> Err
 42 |                         end;
 43 | load(PathOrPropList) -> load_external(PathOrPropList).
 44 | 
 45 | load(NodeID, Config) ->
 46 |     choose_external_or_internal(
 47 |       case load_external() of
 48 |           {ok, ExternalConfig} ->
 49 |               ExternalConfig;
 50 |           {error, no_external_config_provided} ->
 51 |               undefined;
 52 |           {error, Error} ->
 53 |               error_logger:info_msg(
 54 |                 "Ignoring external configuration due to error: ~p~n", [Error]),
 55 |               undefined
 56 |       end,
 57 |       case Config of
 58 |           undefined -> load_internal();
 59 |           _         -> {NodeID, Config}
 60 |       end).
 61 | 
 62 | load_external() ->
 63 |     case external_path() of
 64 |         {ok, PathOrProplist} -> load_external(PathOrProplist);
 65 |         undefined            -> {error, no_external_config_provided}
 66 |     end.
 67 | 
 68 | load_external(PathOrProplist) when is_list(PathOrProplist) ->
 69 |     ProplistOrErr = case PathOrProplist of
 70 |                         [{_,_}|_] -> {ok, [PathOrProplist]};
 71 |                         [_|_]     -> rabbit_file:read_term_file(PathOrProplist)
 72 |                     end,
 73 |     case ProplistOrErr of
 74 |         {ok, [Proplist]}   -> case from_proplist(Proplist) of
 75 |                                   {ok, _NodeID, Config} -> {ok, Config};
 76 |                                   {error, _} = Error    -> Error
 77 |                               end;
 78 |         {ok, Terms}        -> {error, rabbit_misc:format(
 79 |                                         "Config is not a single term: ~p",
 80 |                                         [Terms])};
 81 |         {error, _} = Error -> Error
 82 |     end;
 83 | load_external(Other) ->
 84 |     {error, rabbit_misc:format("External config not a path or proplist: ~p",
 85 |                                [Other])}.
 86 | 
 87 | load_internal() ->
 88 |     Proplist = case rabbit_file:read_term_file(internal_path()) of
 89 |                    {error, enoent}               -> undefined;
 90 |                    {ok, [Proplist1 = [{_,_}|_]]} -> Proplist1
 91 |                end,
 92 |     case Proplist of
 93 |         undefined -> undefined;
 94 |         _         -> {ok, NodeID, Config} = from_proplist(Proplist),
 95 |                      true = is_binary(NodeID), %% ASSERTION
 96 |                      {NodeID, Config}
 97 |     end.
 98 | 
 99 | store_internal(NodeID, Config) ->
100 |     ok = filelib:ensure_dir(filename:dirname(internal_path())),
101 |     ok = rabbit_file:write_term_file(internal_path(),
102 |                                      [to_proplist(NodeID, Config)]).
103 | 
104 | choose_external_or_internal(undefined, undefined) ->
105 |     {ok, NodeID, NewConfig} = default_config(),
106 |     {NodeID, NewConfig, undefined};
107 | choose_external_or_internal(NewConfig, undefined) ->
108 |     %% We only have an external config and no internal config, so we
109 |     %% have no NodeID, so we must generate one.
110 |     NodeID = create_node_id(),
111 |     {NodeID, tidy_node_ids(NodeID, NewConfig), undefined};
112 | choose_external_or_internal(undefined, {NodeID, OldConfig}) ->
113 |     {NodeID, OldConfig, OldConfig};
114 | choose_external_or_internal(NewConfig, {NodeID, OldConfig}) ->
115 |     case compare(NewConfig, OldConfig) of
116 |         younger -> %% New cluster config has been applied
117 |                    {NodeID, NewConfig, OldConfig};
118 |         invalid -> error_logger:info_msg(
119 |                      "Ignoring invalid user-provided configuration", []),
120 |                    {NodeID, OldConfig, OldConfig};
121 |         _       -> %% All other cases, we ignore the user-provided config.
122 |                    {NodeID, OldConfig, OldConfig}
123 |     end.
124 | 
125 | %% Note that here we intentionally deal with NodeID being in the
126 | %% proplist as on disk but not in the #config record.
127 | default_config() ->
128 |     NodeID = create_node_id(),
129 |     MyNode = node(),
130 |     from_proplist(
131 |       [{version,          0},
132 |        {nodes,            [{MyNode, disc}]},
133 |        {gospel,           {node, MyNode}},
134 |        {node_id,          NodeID},
135 |        {node_ids,         orddict:from_list([{MyNode, NodeID}])}
136 |       ]).
137 | 
138 | create_node_id() ->
139 |     %% We can't use rabbit_guid here because it may not have been
140 |     %% started at this stage. We only need a fresh node_id when we're
141 |     %% a virgin node. But we also want to ensure that when we are a
142 |     %% virgin node our node id will be different from if we existed
143 |     %% previously, hence the use of erlang:system_time() which can go
144 |     %% wrong if time is set backwards, but we hope that won't happen.
145 |     erlang:md5(term_to_binary({node(), erlang:system_time()})).
146 | 
147 | %%----------------------------------------------------------------------------
148 | 
149 | required_keys() -> [version, nodes, gospel].
150 | 
151 | optional_keys() -> [{node_ids, orddict:new()}].
152 | 
153 | field_fold(Fun, Init) ->
154 |     {_Pos, Res} = lists:foldl(fun (FieldName, {Pos, Acc}) ->
155 |                                       {Pos + 1, Fun(FieldName, Pos, Acc)}
156 |                               end, {2, Init}, record_info(fields, config)),
157 |     Res.
158 | 
159 | to_proplist(NodeID, Config = #config {}) ->
160 |     [{node_id, NodeID} |
161 |      field_fold(fun (FieldName, Pos, ProplistN) ->
162 |                         [{FieldName, element(Pos, Config)} | ProplistN]
163 |                 end, [])].
164 | 
165 | from_proplist(Proplist) when is_list(Proplist) ->
166 |     case check_required_keys(Proplist) of
167 |         ok ->
168 |             Proplist1 = add_optional_keys(Proplist),
169 |             Config = #config { nodes = Nodes } =
170 |                 field_fold(
171 |                   fun (FieldName, Pos, ConfigN) ->
172 |                           setelement(Pos, ConfigN,
173 |                                      proplists:get_value(FieldName, Proplist1))
174 |                   end, #config {}),
175 |             case validate(Config) of
176 |                 ok ->
177 |                     {ok, proplists:get_value(node_id, Proplist1),
178 |                      Config #config { nodes = normalise_nodes(Nodes) }};
179 |                 {error, _} = Err ->
180 |                     Err
181 |             end;
182 |         {error, _} = Err ->
183 |             Err
184 |     end;
185 | from_proplist(Other) ->
186 |     {error, rabbit_misc:format("Config is not a proplist: ~p", [Other])}.
187 | 
188 | check_required_keys(Proplist) ->
189 |     case required_keys() -- proplists:get_keys(Proplist) of
190 |         []      -> ok;
191 |         Missing -> {error, rabbit_misc:format(
192 |                              "Required keys missing from cluster config: ~p",
193 |                              [Missing])}
194 |     end.
195 | 
196 | add_optional_keys(Proplist) ->
197 |     lists:foldr(fun ({Key, _Default} = E, ProplistN) ->
198 |                         case proplists:is_defined(Key, ProplistN) of
199 |                             true  -> ProplistN;
200 |                             false -> [E | ProplistN]
201 |                         end
202 |                 end, Proplist, optional_keys()).
203 | 
204 | validate(Config) ->
205 |     field_fold(fun (FieldName, Pos, ok) ->
206 |                        validate_key(FieldName, element(Pos, Config), Config);
207 |                    (_FieldName, _Pos, {error, _E} = Err) ->
208 |                        Err
209 |                end, ok).
210 | 
211 | validate_key(version, Version, _Config)
212 |   when is_integer(Version) andalso Version >= 0 ->
213 |     ok;
214 | validate_key(version, Version, _Config) ->
215 |     {error, rabbit_misc:format("Require version to be non-negative integer: ~p",
216 |                                [Version])};
217 | validate_key(nodes, Nodes, _Config) when is_list(Nodes) ->
218 |     {Result, Disc, NodeNames} =
219 |         lists:foldr(
220 |           fun ({Node, disc}, {ok, _, NN}) when is_atom(Node) ->
221 |                   {ok, true, [Node | NN]};
222 |               ({Node, disk}, {ok, _, NN}) when is_atom(Node) ->
223 |                   {ok, true, [Node | NN]};
224 |               ({Node, ram }, {ok, D, NN}) when is_atom(Node) ->
225 |                   {ok, D,    [Node | NN]};
226 |               (Node,         {ok, _, NN}) when is_atom(Node) ->
227 |                   {ok, true, [Node | NN]};
228 |               (Other,        {ok, _, _NN}) ->
229 |                   {error, rabbit_misc:format("Invalid node: ~p", [Other]), []};
230 |               (_, {error, _E, _NN} = Err) -> Err
231 |           end, {ok, false, []}, Nodes),
232 |     case {Result, Disc, length(NodeNames) =:= length(lists:usort(NodeNames))} of
233 |         {ok, true, true} ->
234 |             ok;
235 |         {ok, true, false} ->
236 |             {error, rabbit_misc:format(
237 |                       "Some nodes specified more than once: ~p", [NodeNames])};
238 |         {ok, false, _} when length(NodeNames) =:= 0 ->
239 |             ok;
240 |         {ok, false, _} ->
241 |             {error, rabbit_misc:format(
242 |                       "Require at least one disc node: ~p", [Nodes])};
243 |         {error, Err, _} ->
244 |             {error, Err}
245 |     end;
246 | validate_key(nodes, Nodes, _Config) ->
247 |     {error,
248 |      rabbit_misc:format("Require nodes to be a list of nodes: ~p", [Nodes])};
249 | validate_key(gospel, reset, _Config) ->
250 |     ok;
251 | validate_key(gospel, {node, Node}, Config = #config { nodes = Nodes }) ->
252 |     case [true || N <- Nodes,
253 |                   (N =:= {Node,  ram} orelse
254 |                    N =:= {Node, disc} orelse
255 |                    N =:= {Node, disk} orelse
256 |                    N =:= Node)] of
257 |         []    -> {error, rabbit_misc:format(
258 |                            "Node in gospel (~p) is not in nodes (~p)",
259 |                            [Node, Config #config.nodes])};
260 |         [_|_] -> ok
261 |     end;
262 | validate_key(gospel, Gospel, _Config) ->
263 |     {error, rabbit_misc:format("Invalid gospel setting: ~p", [Gospel])};
264 | validate_key(node_ids, Orddict, _Config) when is_list(Orddict) ->
265 |     ok;
266 | validate_key(node_ids, Orddict, _Config) ->
267 |     {error,
268 |      rabbit_misc:format("Requires node_ids to be an orddict: ~p", [Orddict])}.
269 | 
270 | normalise_nodes(Nodes) when is_list(Nodes) ->
271 |     orddict:from_list(
272 |       lists:usort(
273 |         lists:map(fun ({Node, disc} = E) when is_atom(Node) -> E;
274 |                       ({Node, disk})     when is_atom(Node) -> {Node, disc};
275 |                       (Node)             when is_atom(Node) -> {Node, disc};
276 |                       ({Node, ram} = E)  when is_atom(Node) -> E
277 |                   end, Nodes))).
278 | 
279 | %%----------------------------------------------------------------------------
280 | 
281 | transfer_node_ids(undefined, Dest) ->
282 |     Dest;
283 | transfer_node_ids(#config { node_ids = NodeIDs }, Dest = #config { }) ->
284 |     Dest #config { node_ids = NodeIDs }.
285 | 
286 | update_node_id(Node, #config { node_ids = NodeIDsRemote },
287 |                NodeID, Config = #config { node_ids = NodeIDsLocal }) ->
288 |     NodeIDsLocal1 = case orddict:find(Node, NodeIDsRemote) of
289 |                         error    -> NodeIDsLocal;
290 |                         {ok, ID} -> orddict:store(Node, ID, NodeIDsLocal)
291 |                     end,
292 |     tidy_node_ids(NodeID, Config #config { node_ids = NodeIDsLocal1 }).
293 | 
294 | add_node_ids(ExtraNodeIDs, NodeID, Config = #config { node_ids = NodeIDs }) ->
295 |     NodeIDs1 = orddict:merge(fun (_Node, _A, B) -> B end,
296 |                              NodeIDs, orddict:from_list(ExtraNodeIDs)),
297 |     tidy_node_ids(NodeID, Config #config { node_ids = NodeIDs1 }).
298 | 
299 | add_node_id(NewNode, NewNodeID, NodeID,
300 |             Config = #config { node_ids = NodeIDs }) ->
301 |     %% Note that if NewNode isn't in Config then tidy_node_ids will do
302 |     %% the right thing, and also that Changed will always be false.
303 |     Changed = case orddict:find(NewNode, NodeIDs) of
304 |                   error            -> false;
305 |                   {ok, NewNodeID}  -> false;
306 |                   {ok, _NewNodeID} -> true
307 |               end,
308 |     NodeIDs1 = orddict:store(NewNode, NewNodeID, NodeIDs),
309 |     {Changed, tidy_node_ids(NodeID, Config #config { node_ids = NodeIDs1 })}.
310 | 
311 | tidy_node_ids(NodeID, Config = #config { nodes = Nodes, node_ids = NodeIDs }) ->
312 |     MyNode = node(),
313 |     NodeIDs1 = orddict:filter(fun (N, _ID) -> orddict:is_key(N, Nodes) end,
314 |                               NodeIDs),
315 |     %% our own node_id may have changed or be missing.
316 |     NodeIDs2 = case orddict:is_key(MyNode, Nodes) of
317 |                    true  -> orddict:store(MyNode, NodeID, NodeIDs1);
318 |                    false -> NodeIDs1
319 |                end,
320 |     Config #config { node_ids = NodeIDs2 }.
321 | 
322 | %%----------------------------------------------------------------------------
323 | 
324 | compare(ConfigA = #config { version = VA },
325 |         ConfigB = #config { version = VB }) ->
326 |     %% node_ids are semantically irrevelant for comparison
327 |     case {ConfigA #config { node_ids = undefined },
328 |           ConfigB #config { node_ids = undefined }} of
329 |         {EQ, EQ}              -> coeval;
330 |         _        when VA > VB -> younger;
331 |         _        when VA < VB -> older;
332 |         _                     -> invalid
333 |     end.
334 | 
335 | %% If the config has changed, we need to figure out whether we need to
336 | %% do a full join (which may well include wiping out mnesia) or
337 | %% whether the config has simply evolved and we can do something
338 | %% softer (maybe nothing at all). Essentially, if the gospel node in
339 | %% the new config is someone we thought we knew but who's been reset
340 | %% (so their node_id has changed) then we'll need to do a fresh sync
341 | %% to them.
342 | is_compatible(Config,                                Config) -> true;
343 | is_compatible(#config {},                         undefined) -> false;
344 | is_compatible(#config { gospel = reset },        _ConfigOld) -> false;
345 | is_compatible(#config { gospel = {node, Node},
346 |                         node_ids = NodeIDsNew },
347 |               #config { node_ids = NodeIDsOld } = ConfigOld) ->
348 |     case (contains_node(node(), ConfigOld) andalso
349 |           contains_node(Node,   ConfigOld)) of
350 |         true  -> case {orddict:find(Node, NodeIDsNew),
351 |                        orddict:find(Node, NodeIDsOld)} of
352 |                      {{ok, IdA}, {ok, IdB}} when IdA =/= IdB -> false;
353 |                      {_        , _        }                  -> true
354 |                  end;
355 |         false -> false
356 |     end.
357 | 
358 | %%----------------------------------------------------------------------------
359 | 
360 | contains_node(Node, #config { nodes = Nodes }) -> orddict:is_key(Node, Nodes).
361 | 
362 | is_singleton( Node, #config { nodes = [{Node, disc}] }) -> true;
363 | is_singleton(_Node, _Config)                            -> false.
364 | 
365 | version(#config { version = Version }) -> Version.
366 | 
367 | nodenames(#config { nodes = Nodes }) -> orddict:fetch_keys(Nodes).
368 | 
369 | disc_nodenames(#config { nodes = Nodes }) ->
370 |     orddict:fetch_keys(orddict:filter(fun (_K, V) -> V =:= disc end, Nodes)).
371 | 
372 | node_type(Node, #config { nodes = Nodes }) -> orddict:fetch(Node, Nodes).
373 | 
374 | node_id(Node, #config { node_ids = NodeIDs }) -> orddict:fetch(Node, NodeIDs).
375 | 
376 | gospel(#config { gospel = Gospel }) -> Gospel.
377 | 


--------------------------------------------------------------------------------
/rabbitmq-components.mk:
--------------------------------------------------------------------------------
  1 | ifeq ($(.DEFAULT_GOAL),)
  2 | # Define default goal to `all` because this file defines some targets
  3 | # before the inclusion of erlang.mk leading to the wrong target becoming
  4 | # the default.
  5 | .DEFAULT_GOAL = all
  6 | endif
  7 | 
  8 | # PROJECT_VERSION defaults to:
  9 | #   1. the version exported by rabbitmq-server-release;
 10 | #   2. the version stored in `git-revisions.txt`, if it exists;
 11 | #   3. a version based on git-describe(1), if it is a Git clone;
 12 | #   4. 0.0.0
 13 | 
 14 | PROJECT_VERSION := $(RABBITMQ_VERSION)
 15 | 
 16 | ifeq ($(PROJECT_VERSION),)
 17 | PROJECT_VERSION := $(shell \
 18 | if test -f git-revisions.txt; then \
 19 | 	head -n1 git-revisions.txt | \
 20 | 	awk '{print $$$(words $(PROJECT_DESCRIPTION) version);}'; \
 21 | else \
 22 | 	(git describe --dirty --abbrev=7 --tags --always --first-parent \
 23 | 	 2>/dev/null || echo rabbitmq_v0_0_0) | \
 24 | 	sed -e 's/^rabbitmq_v//' -e 's/^v//' -e 's/_/./g' -e 's/-/+/' \
 25 | 	 -e 's/-/./g'; \
 26 | fi)
 27 | endif
 28 | 
 29 | # --------------------------------------------------------------------
 30 | # RabbitMQ components.
 31 | # --------------------------------------------------------------------
 32 | 
 33 | # For RabbitMQ repositories, we want to checkout branches which match
 34 | # the parent project. For instance, if the parent project is on a
 35 | # release tag, dependencies must be on the same release tag. If the
 36 | # parent project is on a topic branch, dependencies must be on the same
 37 | # topic branch or fallback to `stable` or `master` whichever was the
 38 | # base of the topic branch.
 39 | 
 40 | dep_amqp_client                       = git_rmq rabbitmq-erlang-client $(current_rmq_ref) $(base_rmq_ref) master
 41 | dep_amqp10_client                     = git_rmq rabbitmq-amqp1.0-client $(current_rmq_ref) $(base_rmq_ref) master
 42 | dep_amqp10_common                     = git_rmq rabbitmq-amqp1.0-common $(current_rmq_ref) $(base_rmq_ref) master
 43 | dep_rabbit                            = git_rmq rabbitmq-server $(current_rmq_ref) $(base_rmq_ref) master
 44 | dep_rabbit_common                     = git_rmq rabbitmq-common $(current_rmq_ref) $(base_rmq_ref) master
 45 | dep_rabbitmq_amqp1_0                  = git_rmq rabbitmq-amqp1.0 $(current_rmq_ref) $(base_rmq_ref) master
 46 | dep_rabbitmq_auth_backend_amqp        = git_rmq rabbitmq-auth-backend-amqp $(current_rmq_ref) $(base_rmq_ref) master
 47 | dep_rabbitmq_auth_backend_cache       = git_rmq rabbitmq-auth-backend-cache $(current_rmq_ref) $(base_rmq_ref) master
 48 | dep_rabbitmq_auth_backend_http        = git_rmq rabbitmq-auth-backend-http $(current_rmq_ref) $(base_rmq_ref) master
 49 | dep_rabbitmq_auth_backend_ldap        = git_rmq rabbitmq-auth-backend-ldap $(current_rmq_ref) $(base_rmq_ref) master
 50 | dep_rabbitmq_auth_mechanism_ssl       = git_rmq rabbitmq-auth-mechanism-ssl $(current_rmq_ref) $(base_rmq_ref) master
 51 | dep_rabbitmq_aws                      = git_rmq rabbitmq-aws $(current_rmq_ref) $(base_rmq_ref) master
 52 | dep_rabbitmq_boot_steps_visualiser    = git_rmq rabbitmq-boot-steps-visualiser $(current_rmq_ref) $(base_rmq_ref) master
 53 | dep_rabbitmq_clusterer                = git_rmq rabbitmq-clusterer $(current_rmq_ref) $(base_rmq_ref) master
 54 | dep_rabbitmq_cli                      = git_rmq rabbitmq-cli $(current_rmq_ref) $(base_rmq_ref) master
 55 | dep_rabbitmq_codegen                  = git_rmq rabbitmq-codegen $(current_rmq_ref) $(base_rmq_ref) master
 56 | dep_rabbitmq_consistent_hash_exchange = git_rmq rabbitmq-consistent-hash-exchange $(current_rmq_ref) $(base_rmq_ref) master
 57 | dep_rabbitmq_ct_client_helpers        = git_rmq rabbitmq-ct-client-helpers $(current_rmq_ref) $(base_rmq_ref) master
 58 | dep_rabbitmq_ct_helpers               = git_rmq rabbitmq-ct-helpers $(current_rmq_ref) $(base_rmq_ref) master
 59 | dep_rabbitmq_delayed_message_exchange = git_rmq rabbitmq-delayed-message-exchange $(current_rmq_ref) $(base_rmq_ref) master
 60 | dep_rabbitmq_dotnet_client            = git_rmq rabbitmq-dotnet-client $(current_rmq_ref) $(base_rmq_ref) master
 61 | dep_rabbitmq_event_exchange           = git_rmq rabbitmq-event-exchange $(current_rmq_ref) $(base_rmq_ref) master
 62 | dep_rabbitmq_federation               = git_rmq rabbitmq-federation $(current_rmq_ref) $(base_rmq_ref) master
 63 | dep_rabbitmq_federation_management    = git_rmq rabbitmq-federation-management $(current_rmq_ref) $(base_rmq_ref) master
 64 | dep_rabbitmq_java_client              = git_rmq rabbitmq-java-client $(current_rmq_ref) $(base_rmq_ref) master
 65 | dep_rabbitmq_jms_client               = git_rmq rabbitmq-jms-client $(current_rmq_ref) $(base_rmq_ref) master
 66 | dep_rabbitmq_jms_cts                  = git_rmq rabbitmq-jms-cts $(current_rmq_ref) $(base_rmq_ref) master
 67 | dep_rabbitmq_jms_topic_exchange       = git_rmq rabbitmq-jms-topic-exchange $(current_rmq_ref) $(base_rmq_ref) master
 68 | dep_rabbitmq_lvc_exchange             = git_rmq rabbitmq-lvc-exchange $(current_rmq_ref) $(base_rmq_ref) master
 69 | dep_rabbitmq_management               = git_rmq rabbitmq-management $(current_rmq_ref) $(base_rmq_ref) master
 70 | dep_rabbitmq_management_agent         = git_rmq rabbitmq-management-agent $(current_rmq_ref) $(base_rmq_ref) master
 71 | dep_rabbitmq_management_exchange      = git_rmq rabbitmq-management-exchange $(current_rmq_ref) $(base_rmq_ref) master
 72 | dep_rabbitmq_management_themes        = git_rmq rabbitmq-management-themes $(current_rmq_ref) $(base_rmq_ref) master
 73 | dep_rabbitmq_management_visualiser    = git_rmq rabbitmq-management-visualiser $(current_rmq_ref) $(base_rmq_ref) master
 74 | dep_rabbitmq_message_timestamp        = git_rmq rabbitmq-message-timestamp $(current_rmq_ref) $(base_rmq_ref) master
 75 | dep_rabbitmq_metronome                = git_rmq rabbitmq-metronome $(current_rmq_ref) $(base_rmq_ref) master
 76 | dep_rabbitmq_mqtt                     = git_rmq rabbitmq-mqtt $(current_rmq_ref) $(base_rmq_ref) master
 77 | dep_rabbitmq_objc_client              = git_rmq rabbitmq-objc-client $(current_rmq_ref) $(base_rmq_ref) master
 78 | dep_rabbitmq_peer_discovery_aws       = git_rmq rabbitmq-peer-discovery-aws $(current_rmq_ref) $(base_rmq_ref) master
 79 | dep_rabbitmq_peer_discovery_common    = git_rmq rabbitmq-peer-discovery-common $(current_rmq_ref) $(base_rmq_ref) master
 80 | dep_rabbitmq_peer_discovery_consul    = git_rmq rabbitmq-peer-discovery-consul $(current_rmq_ref) $(base_rmq_ref) master
 81 | dep_rabbitmq_peer_discovery_etcd      = git_rmq rabbitmq-peer-discovery-etcd $(current_rmq_ref) $(base_rmq_ref) master
 82 | dep_rabbitmq_peer_discovery_k8s       = git_rmq rabbitmq-peer-discovery-k8s $(current_rmq_ref) $(base_rmq_ref) master
 83 | dep_rabbitmq_random_exchange          = git_rmq rabbitmq-random-exchange $(current_rmq_ref) $(base_rmq_ref) master
 84 | dep_rabbitmq_recent_history_exchange  = git_rmq rabbitmq-recent-history-exchange $(current_rmq_ref) $(base_rmq_ref) master
 85 | dep_rabbitmq_routing_node_stamp       = git_rmq rabbitmq-routing-node-stamp $(current_rmq_ref) $(base_rmq_ref) master
 86 | dep_rabbitmq_rtopic_exchange          = git_rmq rabbitmq-rtopic-exchange $(current_rmq_ref) $(base_rmq_ref) master
 87 | dep_rabbitmq_server_release           = git_rmq rabbitmq-server-release $(current_rmq_ref) $(base_rmq_ref) master
 88 | dep_rabbitmq_sharding                 = git_rmq rabbitmq-sharding $(current_rmq_ref) $(base_rmq_ref) master
 89 | dep_rabbitmq_shovel                   = git_rmq rabbitmq-shovel $(current_rmq_ref) $(base_rmq_ref) master
 90 | dep_rabbitmq_shovel_management        = git_rmq rabbitmq-shovel-management $(current_rmq_ref) $(base_rmq_ref) master
 91 | dep_rabbitmq_stomp                    = git_rmq rabbitmq-stomp $(current_rmq_ref) $(base_rmq_ref) master
 92 | dep_rabbitmq_toke                     = git_rmq rabbitmq-toke $(current_rmq_ref) $(base_rmq_ref) master
 93 | dep_rabbitmq_top                      = git_rmq rabbitmq-top $(current_rmq_ref) $(base_rmq_ref) master
 94 | dep_rabbitmq_tracing                  = git_rmq rabbitmq-tracing $(current_rmq_ref) $(base_rmq_ref) master
 95 | dep_rabbitmq_trust_store              = git_rmq rabbitmq-trust-store $(current_rmq_ref) $(base_rmq_ref) master
 96 | dep_rabbitmq_test                     = git_rmq rabbitmq-test $(current_rmq_ref) $(base_rmq_ref) master
 97 | dep_rabbitmq_web_dispatch             = git_rmq rabbitmq-web-dispatch $(current_rmq_ref) $(base_rmq_ref) master
 98 | dep_rabbitmq_web_stomp                = git_rmq rabbitmq-web-stomp $(current_rmq_ref) $(base_rmq_ref) master
 99 | dep_rabbitmq_web_stomp_examples       = git_rmq rabbitmq-web-stomp-examples $(current_rmq_ref) $(base_rmq_ref) master
100 | dep_rabbitmq_web_mqtt                 = git_rmq rabbitmq-web-mqtt $(current_rmq_ref) $(base_rmq_ref) master
101 | dep_rabbitmq_web_mqtt_examples        = git_rmq rabbitmq-web-mqtt-examples $(current_rmq_ref) $(base_rmq_ref) master
102 | dep_rabbitmq_website                  = git_rmq rabbitmq-website $(current_rmq_ref) $(base_rmq_ref) live master
103 | dep_toke                              = git_rmq toke $(current_rmq_ref) $(base_rmq_ref) master
104 | 
105 | dep_rabbitmq_public_umbrella          = git_rmq rabbitmq-public-umbrella $(current_rmq_ref) $(base_rmq_ref) master
106 | 
107 | # Third-party dependencies version pinning.
108 | #
109 | # We do that in this file, which is copied in all projects, to ensure
110 | # all projects use the same versions. It avoids conflicts and makes it
111 | # possible to work with rabbitmq-public-umbrella.
112 | 
113 | dep_cowboy = hex 2.6.1
114 | dep_cowlib = hex 2.7.0
115 | dep_jsx = hex 2.9.0
116 | dep_lager = hex 3.6.5
117 | dep_ra = git https://github.com/rabbitmq/ra.git master
118 | dep_ranch = hex 1.7.1
119 | dep_recon = hex 2.3.6
120 | 
121 | dep_sockjs = git https://github.com/rabbitmq/sockjs-erlang.git 405990ea62353d98d36dbf5e1e64942d9b0a1daf
122 | 
123 | RABBITMQ_COMPONENTS = amqp_client \
124 | 		      amqp10_common \
125 | 		      amqp10_client \
126 | 		      rabbit \
127 | 		      rabbit_common \
128 | 		      rabbitmq_amqp1_0 \
129 | 		      rabbitmq_auth_backend_amqp \
130 | 		      rabbitmq_auth_backend_cache \
131 | 		      rabbitmq_auth_backend_http \
132 | 		      rabbitmq_auth_backend_ldap \
133 | 		      rabbitmq_auth_mechanism_ssl \
134 | 		      rabbitmq_aws \
135 | 		      rabbitmq_boot_steps_visualiser \
136 | 		      rabbitmq_clusterer \
137 | 		      rabbitmq_cli \
138 | 		      rabbitmq_codegen \
139 | 		      rabbitmq_consistent_hash_exchange \
140 | 		      rabbitmq_ct_client_helpers \
141 | 		      rabbitmq_ct_helpers \
142 | 		      rabbitmq_delayed_message_exchange \
143 | 		      rabbitmq_dotnet_client \
144 | 		      rabbitmq_event_exchange \
145 | 		      rabbitmq_federation \
146 | 		      rabbitmq_federation_management \
147 | 		      rabbitmq_java_client \
148 | 		      rabbitmq_jms_client \
149 | 		      rabbitmq_jms_cts \
150 | 		      rabbitmq_jms_topic_exchange \
151 | 		      rabbitmq_lvc_exchange \
152 | 		      rabbitmq_management \
153 | 		      rabbitmq_management_agent \
154 | 		      rabbitmq_management_exchange \
155 | 		      rabbitmq_management_themes \
156 | 		      rabbitmq_management_visualiser \
157 | 		      rabbitmq_message_timestamp \
158 | 		      rabbitmq_metronome \
159 | 		      rabbitmq_mqtt \
160 | 		      rabbitmq_objc_client \
161 | 		      rabbitmq_peer_discovery_aws \
162 | 		      rabbitmq_peer_discovery_common \
163 | 		      rabbitmq_peer_discovery_consul \
164 | 		      rabbitmq_peer_discovery_etcd \
165 | 		      rabbitmq_peer_discovery_k8s \
166 | 		      rabbitmq_random_exchange \
167 | 		      rabbitmq_recent_history_exchange \
168 | 		      rabbitmq_routing_node_stamp \
169 | 		      rabbitmq_rtopic_exchange \
170 | 		      rabbitmq_server_release \
171 | 		      rabbitmq_sharding \
172 | 		      rabbitmq_shovel \
173 | 		      rabbitmq_shovel_management \
174 | 		      rabbitmq_stomp \
175 | 		      rabbitmq_toke \
176 | 		      rabbitmq_top \
177 | 		      rabbitmq_tracing \
178 | 		      rabbitmq_trust_store \
179 | 		      rabbitmq_web_dispatch \
180 | 		      rabbitmq_web_mqtt \
181 | 		      rabbitmq_web_mqtt_examples \
182 | 		      rabbitmq_web_stomp \
183 | 		      rabbitmq_web_stomp_examples \
184 | 		      rabbitmq_website
185 | 
186 | # Several components have a custom erlang.mk/build.config, mainly
187 | # to disable eunit. Therefore, we can't use the top-level project's
188 | # erlang.mk copy.
189 | NO_AUTOPATCH += $(RABBITMQ_COMPONENTS)
190 | 
191 | ifeq ($(origin current_rmq_ref),undefined)
192 | ifneq ($(wildcard .git),)
193 | current_rmq_ref := $(shell (\
194 | 	ref=$$(LANG=C git branch --list | awk '/^\* \(.*detached / {ref=$$0; sub(/.*detached [^ ]+ /, "", ref); sub(/\)$$/, "", ref); print ref; exit;} /^\* / {ref=$$0; sub(/^\* /, "", ref); print ref; exit}');\
195 | 	if test "$$(git rev-parse --short HEAD)" != "$$ref"; then echo "$$ref"; fi))
196 | else
197 | current_rmq_ref := master
198 | endif
199 | endif
200 | export current_rmq_ref
201 | 
202 | ifeq ($(origin base_rmq_ref),undefined)
203 | ifneq ($(wildcard .git),)
204 | possible_base_rmq_ref := master
205 | ifeq ($(possible_base_rmq_ref),$(current_rmq_ref))
206 | base_rmq_ref := $(current_rmq_ref)
207 | else
208 | base_rmq_ref := $(shell \
209 | 	(git rev-parse --verify -q master >/dev/null && \
210 | 	 git rev-parse --verify -q $(possible_base_rmq_ref) >/dev/null && \
211 | 	 git merge-base --is-ancestor $$(git merge-base master HEAD) $(possible_base_rmq_ref) && \
212 | 	 echo $(possible_base_rmq_ref)) || \
213 | 	echo master)
214 | endif
215 | else
216 | base_rmq_ref := master
217 | endif
218 | endif
219 | export base_rmq_ref
220 | 
221 | # Repository URL selection.
222 | #
223 | # First, we infer other components' location from the current project
224 | # repository URL, if it's a Git repository:
225 | #   - We take the "origin" remote URL as the base
226 | # - The current project name and repository name is replaced by the
227 | #   target's properties:
228 | #       eg. rabbitmq-common is replaced by rabbitmq-codegen
229 | #       eg. rabbit_common is replaced by rabbitmq_codegen
230 | #
231 | # If cloning from this computed location fails, we fallback to RabbitMQ
232 | # upstream which is GitHub.
233 | 
234 | # Maccro to transform eg. "rabbit_common" to "rabbitmq-common".
235 | rmq_cmp_repo_name = $(word 2,$(dep_$(1)))
236 | 
237 | # Upstream URL for the current project.
238 | RABBITMQ_COMPONENT_REPO_NAME := $(call rmq_cmp_repo_name,$(PROJECT))
239 | RABBITMQ_UPSTREAM_FETCH_URL ?= https://github.com/rabbitmq/$(RABBITMQ_COMPONENT_REPO_NAME).git
240 | RABBITMQ_UPSTREAM_PUSH_URL ?= git@github.com:rabbitmq/$(RABBITMQ_COMPONENT_REPO_NAME).git
241 | 
242 | # Current URL for the current project. If this is not a Git clone,
243 | # default to the upstream Git repository.
244 | ifneq ($(wildcard .git),)
245 | git_origin_fetch_url := $(shell git config remote.origin.url)
246 | git_origin_push_url := $(shell git config remote.origin.pushurl || git config remote.origin.url)
247 | RABBITMQ_CURRENT_FETCH_URL ?= $(git_origin_fetch_url)
248 | RABBITMQ_CURRENT_PUSH_URL ?= $(git_origin_push_url)
249 | else
250 | RABBITMQ_CURRENT_FETCH_URL ?= $(RABBITMQ_UPSTREAM_FETCH_URL)
251 | RABBITMQ_CURRENT_PUSH_URL ?= $(RABBITMQ_UPSTREAM_PUSH_URL)
252 | endif
253 | 
254 | # Macro to replace the following pattern:
255 | #   1. /foo.git -> /bar.git
256 | #   2. /foo     -> /bar
257 | #   3. /foo/    -> /bar/
258 | subst_repo_name = $(patsubst %/$(1)/%,%/$(2)/%,$(patsubst %/$(1),%/$(2),$(patsubst %/$(1).git,%/$(2).git,$(3))))
259 | 
260 | # Macro to replace both the project's name (eg. "rabbit_common") and
261 | # repository name (eg. "rabbitmq-common") by the target's equivalent.
262 | #
263 | # This macro is kept on one line because we don't want whitespaces in
264 | # the returned value, as it's used in $(dep_fetch_git_rmq) in a shell
265 | # single-quoted string.
266 | dep_rmq_repo = $(if $(dep_$(2)),$(call subst_repo_name,$(PROJECT),$(2),$(call subst_repo_name,$(RABBITMQ_COMPONENT_REPO_NAME),$(call rmq_cmp_repo_name,$(2)),$(1))),$(pkg_$(1)_repo))
267 | 
268 | dep_rmq_commits = $(if $(dep_$(1)),					\
269 | 		  $(wordlist 3,$(words $(dep_$(1))),$(dep_$(1))),	\
270 | 		  $(pkg_$(1)_commit))
271 | 
272 | define dep_fetch_git_rmq
273 | 	fetch_url1='$(call dep_rmq_repo,$(RABBITMQ_CURRENT_FETCH_URL),$(1))'; \
274 | 	fetch_url2='$(call dep_rmq_repo,$(RABBITMQ_UPSTREAM_FETCH_URL),$(1))'; \
275 | 	if test "$$$$fetch_url1" != '$(RABBITMQ_CURRENT_FETCH_URL)' && \
276 | 	 git clone -q -n -- "$$$$fetch_url1" $(DEPS_DIR)/$(call dep_name,$(1)); then \
277 | 	    fetch_url="$$$$fetch_url1"; \
278 | 	    push_url='$(call dep_rmq_repo,$(RABBITMQ_CURRENT_PUSH_URL),$(1))'; \
279 | 	elif git clone -q -n -- "$$$$fetch_url2" $(DEPS_DIR)/$(call dep_name,$(1)); then \
280 | 	    fetch_url="$$$$fetch_url2"; \
281 | 	    push_url='$(call dep_rmq_repo,$(RABBITMQ_UPSTREAM_PUSH_URL),$(1))'; \
282 | 	fi; \
283 | 	cd $(DEPS_DIR)/$(call dep_name,$(1)) && ( \
284 | 	$(foreach ref,$(call dep_rmq_commits,$(1)), \
285 | 	  git checkout -q $(ref) >/dev/null 2>&1 || \
286 | 	  ) \
287 | 	(echo "error: no valid pathspec among: $(call dep_rmq_commits,$(1))" \
288 | 	  1>&2 && false) ) && \
289 | 	(test "$$$$fetch_url" = "$$$$push_url" || \
290 | 	 git remote set-url --push origin "$$$$push_url")
291 | endef
292 | 
293 | # --------------------------------------------------------------------
294 | # Component distribution.
295 | # --------------------------------------------------------------------
296 | 
297 | list-dist-deps::
298 | 	@:
299 | 
300 | prepare-dist::
301 | 	@:
302 | 
303 | # --------------------------------------------------------------------
304 | # Umbrella-specific settings.
305 | # --------------------------------------------------------------------
306 | 
307 | # If this project is under the Umbrella project, we override $(DEPS_DIR)
308 | # to point to the Umbrella's one. We also disable `make distclean` so
309 | # $(DEPS_DIR) is not accidentally removed.
310 | 
311 | ifneq ($(wildcard ../../UMBRELLA.md),)
312 | UNDER_UMBRELLA = 1
313 | else ifneq ($(wildcard UMBRELLA.md),)
314 | UNDER_UMBRELLA = 1
315 | endif
316 | 
317 | ifeq ($(UNDER_UMBRELLA),1)
318 | ifneq ($(PROJECT),rabbitmq_public_umbrella)
319 | DEPS_DIR ?= $(abspath ..)
320 | endif
321 | 
322 | ifneq ($(filter distclean distclean-deps,$(MAKECMDGOALS)),)
323 | SKIP_DEPS = 1
324 | endif
325 | endif
326 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RabbitMQ Clusterer
  2 | 
  3 | This plugin is **no longer maintained**, and **completely unnecessary** with [supported releases of RabbitMQ](https://www.rabbitmq.com/versions.html).
  4 | 
  5 | This plugin provided an alternative means for creating and maintaining
  6 | RabbitMQ clusters. It is highly opinionated and was created with specific
  7 | opinionated infrastructure provisioning tooling in mind. Team RabbitMQ
  8 | **considers it to be a failed experiment** and **highly recommends against it**.
  9 | 
 10 | Please [upgrade to a supported RabbitMQ version](https://www.rabbitmq.com/upgrade.html) instead.
 11 | It provides a [peer discovery subsystem](https://www.rabbitmq.com/cluster-formation.html) introduced in RabbitMQ 3.7.0 or
 12 | its predecessor, [rabbitmq-autocluster](https://github.com/rabbitmq/rabbitmq-autocluster).
 13 | That plugin is not a strict alternative to this one but targets a wider range of provisioning scenarios.
 14 | 
 15 | ## Project status
 16 | 
 17 | The plugin was created to handle arbitrary order on nodes restart.
 18 | **Since RabbitMQ version 3.6.7 this problem is [addressed](https://www.rabbitmq.com/clustering.html#restarting)** in the core.
 19 | 
 20 | This plugin is considered deprecated, and it's recommended to switch
 21 | to RabbitMQ's built-in [cluster formation feature](https://www.rabbitmq.com/configure.html) in order to avoid
 22 | known issues that this plugin's opinionated behavior entails (such as
 23 | [#7](https://github.com/rabbitmq/rabbitmq-clusterer/issues/7)).
 24 | 
 25 | ## Overview
 26 | 
 27 | Traditional RabbitMQ clustering is not very friendly to infrastructure
 28 | automation tools such as Chef, Puppet or BOSH. The
 29 | existing tooling (`rabbitmqctl join_cluster` and friends) is
 30 | imperative, requires more oversight and does not handle potentially
 31 | random node boot order very well. The Clusterer has been specifically
 32 | designed with automated deployment tools in mind.
 33 | 
 34 | Unlike the existing tooling, the Clusterer is declarative and goal
 35 | directed: you tell it the overall shape of the cluster you want to
 36 | construct and the clusterer tries to achieve that. With the Clusterer,
 37 | cluster configuration can be provided in a single location (a configuration
 38 | file).
 39 | 
 40 | With `rabbitmq-clusterer`, nodes in a cluster can be restarted in any order,
 41 | which can be the case with automation tools performing upgrades/reconfiguration,
 42 | or due to node failure timing.
 43 | 
 44 | 
 45 | ## Project Maturity
 46 | 
 47 | This plugin is **ancient abandonware**. Do not use. Yes, really, it no longer has any reasons to exist.
 48 | 
 49 | 
 50 | ## Compatibility With Traditional RabbitMQ Clustering
 51 | 
 52 | The Clusterer is not generally compatible with the existing clustering
 53 | tool-set. Do not use any of the `rabbitmqctl` commands relating to
 54 | changing clusters: `join_cluster`, `change_cluster_node_type`, and `update_cluster_nodes` must not be used.
 55 | If you do use these, this plugin likely won't be able to perform its jobs.
 56 | 
 57 | `rabbitmqctl cluster_status` may be used to inspect a cluster
 58 | state, but the Clusterer sends to the standard Rabbit log files
 59 | details about any clusters it joins or leaves. See the *Inspecting the
 60 | Clusterer Status* section further down.
 61 | 
 62 | `rabbitmqctl stop_app`, `rabbitmqctl forget_cluster_node`, and `rabbitmqctl start_app`
 63 | can be used to force a node out of a cluster before cluster config can be changed. While
 64 | this is not generally recommended, there can be valid reasons for doing so, e.g. node
 65 | running out of disk space and/or needing replacement for other reasons.
 66 | 
 67 | `cluster_nodes` in the RabbitMQ config file is incompatible with this plugin
 68 | and must not be used.
 69 | 
 70 | 
 71 | ## Installation
 72 | 
 73 | Binary builds of this plugin are available from
 74 | 
 75 |  * [Bintray](https://bintray.com/rabbitmq/community-plugins/rabbitmq_clusterer) (like with other [RabbitMQ Community Plugins](https://rabbitmq.com/community-plugins.html))
 76 |  * [GitHub releases page](https://github.com/rabbitmq/rabbitmq-clusterer/releases)
 77 | 
 78 | As with all other plugins, you must put the plugin archive (`.ez`) in
 79 | the [RabbitMQ plugins directory](https://www.rabbitmq.com/relocate.html)
 80 | and enable it with `rabbitmq-plugins enable rabbitmq_clusterer --offline`.
 81 | 
 82 | ## For Recent RabbitMQ Versions (3.5.4 and Later)
 83 | 
 84 | Compiled plugin file needs to be placed into .
 85 | 
 86 | To use the plugin, it is necessary to override `RABBITMQ_BOOT_MODULE` to `rabbit_clusterer`. This
 87 | is done similarly to [other RabbitMQ environment variables](https://rabbitmq.com/configure.html).
 88 | 
 89 | Because this plugin coordinates RabbitMQ node start, it needs to be manually added to the Erlang VM
 90 | code path:
 91 | 
 92 | ```
 93 | export RABBITMQ_BOOT_MODULE=rabbit_clusterer
 94 | export RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /path/to/rabbitmq/plugins/rabbitmq_clusterer.ez/rabbitmq_clusterer-{clusterer-version}/ebin"
 95 | ```
 96 | 
 97 | where `{clusterer-version}` is the build of the plugin (see [GitHub releases page](https://github.com/rabbitmq/rabbitmq-clusterer/releases) and [Bintray](https://bintray.com/rabbitmq/community-plugins/rabbitmq_clusterer)):
 98 | 
 99 | ```
100 | export RABBITMQ_BOOT_MODULE=rabbit_clusterer
101 | export RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /path/to/rabbitmq/plugins/rabbitmq_clusterer-1.0.3.ez/rabbitmq_clusterer-1.0.3/ebin"
102 | ```
103 | 
104 | Since `.ez` files are `.zip` archives, they can be easily inspected when you are not sure about
105 | the exact name of the directory you the file you've downloaded.
106 | 
107 | ## For RabbitMQ 3.4.x
108 | 
109 | ### rabbitmq-server Patch
110 | 
111 | With RabbitMQ versions earlier than `3.5.4`, it is necessary to apply a `rabbitmq-server`
112 | patch and re-compile the broker.
113 | 
114 | The patch is provided in
115 | [rabbitmq-clusterer/rabbitmq-server.patch](https://github.com/rabbitmq/rabbitmq-clusterer/blob/master/rabbitmq-server.patch).
116 | Change into the server scripts
117 | directory and apply it with:
118 | 
119 |     patch -p1 < rabbitmq-clusterer/rabbitmq-server.patch
120 | 
121 | The patch assumes the plugin archive is at `${RABBITMQ_PLUGINS_DIR}/rabbitmq_clusterer.ez`.
122 | 
123 | 
124 | ## Usage in Environments with Dynamic Hostnames (e.g. Kubernetes)
125 | 
126 | Since this plugin assumes that all cluster members are known ahead of time
127 | and listed in the config, environments with dynamically generated hostnames
128 | must be configured to use known (or completely predictable) hostnames.
129 | 
130 | For Kubernetes specifically, there's an [example repository](https://github.com/MattFriedman/kubernetes-rabbitmq-clusterer) contributed by Matt Friendman.
131 | 
132 | There's also [another example that uses Kubernetes 1.5.x](https://github.com/nanit/kubernetes-rabbitmq-cluster).
133 | 
134 | 
135 | ## Cluster Config Specification
136 | 
137 | The Clusterer will communicate a new valid config to both all the
138 | nodes of its current config, and in addition to all the nodes in the
139 | new config. Even if the cluster is able to be formed in the absence of
140 | some nodes indicated in the config, the nodes of the cluster will
141 | continue to attempt to make contact with any missing nodes and will
142 | pass the config to them if and when they eventually appear.
143 | 
144 | All of which means that you generally only need to supply new configs
145 | to a single node of any cluster. There is no harm in doing more than
146 | this. The Clusterer stores on disk the currently applied config (it
147 | stores this next to the mnesia directory Rabbit uses for all its
148 | persistent data) and so if a node goes down, it will have a record of
149 | the config in operation when it was last up. When it comes back up, it
150 | will attempt to rejoin that cluster, regardless of whether this node
151 | was ever explicitly given this config.
152 | 
153 | There are a couple of ways to specify a cluster config: via an external
154 | file or inline.
155 | 
156 | ### Using External Config File
157 | 
158 | In a `rabbitmq_clusterer` section in `rabbitmq.config` file you
159 | can add a `config` entry that is a path to configuration file.
160 | 
161 | Below are some examples.
162 | 
163 | When using `rabbitmq.conf` (currently only available in RabbitMQ master):
164 | 
165 |     clusterer.config = /path/to/my/cluster.config
166 | 
167 | When using the classic configuration format (`rabbitmq.config`, prior to 3.7.0) or `advanced.config`:
168 | 
169 |       [{rabbitmq_clusterer,
170 |           [{config, "/path/to/my/cluster.config"}]
171 |        }].
172 | 
173 | Like with `rabbitmq.config` or any other Erlang terms file,
174 | the dot at the end is mandatory.
175 | 
176 | ### Using Inline Configuration in rabbitmq.config
177 | 
178 | It is possible to provide cluster configuration in `rabbitmq.config`.
179 | 
180 | In `rabbitmq.conf`:
181 | 
182 |     clusterer.version = 43
183 |     clusterer.nodes.disc.1 = rabbit@hostA
184 |     clusterer.nodes.disc.2 = rabbit@hostD
185 |     clusterer.nodes.ram.1  = rabbit@hostB
186 |     clusterer.gospel.node = rabbit@hostD
187 | 
188 | Or, using the classic config format (`rabbitmq.config`, prior to 3.7.0) or `advanced.config`:
189 | 
190 |       [{rabbitmq_clusterer,
191 |           [{config,
192 |               [{version, 43},
193 |                {nodes, [{rabbit@hostA, disc}, {rabbit@hostB, ram}, {rabbit@hostD, disc}]},
194 |                {gospel, {node, rabbit@hostD}}]
195 |            }]
196 |        }].
197 | 
198 | This approach makes configuration management with tools such as Chef somewhat
199 | less convenient, so external configuration file is the recommended option.
200 | 
201 | 
202 | ### Using rabbitmqctl eval
203 | 
204 | `rabbitmqctl eval 'rabbit_clusterer:apply_config().'`
205 | 
206 | **This will only have any effect if there is an entry in the
207 | `rabbitmq.config` file for the Clusterer as above, and a path is
208 | specified as the value rather than a config directly.**
209 | 
210 | If that is the case, then this will cause the node to reload the
211 | file containing cluster config and apply it. Note that you cannot
212 | change the path itself in the `rabbitmq.config` file dynamically:
213 | neither Rabbit nor the Clusterer will pick up any changes to that
214 | file without restarting the whole Erlang node.
215 | 
216 | `rabbitmqctl eval 'rabbit_clusterer:apply_config("/path/to/my/other/cluster.config").'`
217 | 
218 | This will cause the Clusterer to attempt to load the indicated file
219 | as a cluster config and apply it. Using this method rather than the
220 | above allows the path to change dynamically and does not depend on
221 | any entries in the `rabbitmq.config` file. The path provided here is
222 | not retained in any way: providing the path here does not influence
223 | future calls to `rabbit_clusterer:apply_config().` - using
224 | `rabbit_clusterer:apply_config().` *always* attempts to inspect the
225 | path as found in `rabbitmq.config` when the node was started.
226 | 
227 | Note if you really want to, rather than suppling a path to a file,
228 | you can supply the cluster config as a proplist directly, just as
229 | you can in the `rabbitmq.config` file itself.
230 | 
231 | 
232 | 
233 | ## Cluster Configuration
234 | 
235 | A cluster config is an Erlang proplist consisting of just four
236 | tuples. The config can be supplied to Rabbit in a variety of ways and
237 | it is in general only necessary to supply a config to a single node of
238 | a cluster: the Clusterer will take care of distributing the config to
239 | all the other nodes as necessary.
240 | 
241 |     [{version, 43},
242 |      {nodes, [{rabbit@hostA, disc}, {rabbit@hostB, ram}, {rabbit@hostD, disc}]},
243 |      {gospel, {node, rabbit@hostD}}].
244 | 
245 | The above gives an example cluster config. This specifies that the
246 | cluster is formed out of the nodes `rabbit@hostA`, `rabbit@hostB` and
247 | `rabbit@hostD` and that `rabbit@hostA` and `rabbit@hostD` are *disc*
248 | nodes and `rabbit@hostB` is a *ram* node. The `nodes` tuple is really
249 | the only tuple that describes the shape of the cluster. The other
250 | tuples describe how to achieve the cluster, and are thus mainly
251 | irrelevant once the cluster has been achieved.
252 | 
253 | In general, the Clusterer will wait indefinitely for the conditions to
254 | be correct to form any given cluster. This is in contrast to the
255 | existing tools which will either timeout or in some cases take
256 | (arguably) unsafe actions. For example, the existing tools will allow
257 | a fresh node to fully start when it is supplied with a cluster
258 | configuration which involves other nodes which are not currently
259 | contactable. This is unsafe because those other nodes might not be
260 | fresh nodes: the intention would be for the fresh node to sync with
261 | those other nodes and preserve the data those nodes hold. When those
262 | other nodes eventually return, manual intervention is then required to
263 | throw away some data and preserve others. The Clusterer, by contrast,
264 | would wait until it could either verify that all the nodes to be part
265 | of the cluster are fresh (so there is no data to preserve at all), or
266 | failing that would wait until one of the non-fresh nodes was fully up
267 | and running, at which point it could sync with that node.
268 | 
269 | * version: non negative integer
270 | 
271 |     All configs are versioned and this is used to decide which of any
272 |     two configs is the youngest. A config which has a smaller version
273 |     number is older. Configs will be ignored unless they are younger
274 |     than the current config. Note that in lieu of any config being
275 |     provided by the user, the default config is used which has a
276 |     version of 0. Thus user supplied configs should use a version of 1
277 |     or greater.
278 | 
279 | * nodes: list
280 | 
281 |     List the names of the nodes that are to be in the cluster. If you
282 |     list node names directly then they are considered to be disc
283 |     nodes. If you specify nodes by using a tuple, you can specify a
284 |     disc node using either `disc` or `disk`. If you want to specify
285 |     ram nodes, you must use a tuple, with `ram` as the second
286 |     element. Order of nodes does not matter. The following are all
287 |     equivalent.
288 | 
289 |         {nodes, [rabbit@hostA, rabbit@hostD, {rabbit@hostB, ram}]}
290 |         {nodes, [rabbit@hostD, rabbit@hostA, {rabbit@hostB, ram}]}
291 |         {nodes, [{rabbit@hostB, ram}, rabbit@hostD, rabbit@hostA]}
292 |         {nodes, [rabbit@hostA, {rabbit@hostD, disk}, {rabbit@hostB, ram}]}
293 |         {nodes, [{rabbit@hostA, disc}, {rabbit@hostD, disk}, {rabbit@hostB, ram}]}
294 | 
295 | * gospel: `reset` or `{node, `*nodename*`}`
296 | 
297 |     When multiple nodes are to become a cluster (or indeed multiple
298 |     clusters are to merge: you can think of an unclustered node as a
299 |     cluster of a single node) some data must be lost and some data can
300 |     be preserved: given two unclustered nodes *A* and *B* that are to
301 |     become a cluster, either *A*'s data can survive or *B*`s data can
302 |     survive, or neither, but not both. The `gospel` tuple allows you
303 |     to specify which data should survive:
304 | 
305 |     * `reset` will reset all nodes in the cluster. This will apply
306 |       *every time the cluster config is changed and applied* (i.e. if
307 |       you change some other setting in the config, bump the version
308 |       number, leave the gospel as `reset` and apply the config to any
309 |       node in your cluster, you will find the entire cluster
310 |       resets). This is deliberate: it allows you to very easily and
311 |       quickly reset an entire cluster, but in general you'll only
312 |       occasionally want to set `gospel` to `reset`.
313 | 
314 |     * `{node, nodename}` The nodename must appear in the `nodes`
315 |       tuple. The data held by the existing cluster of which *nodename*
316 |       is a member will survive. Nodes that are listed in the `nodes`
317 |       tuple but which are not currently members of the same cluster as
318 |       *nodename* will be reset. The phrasing here is very deliberate:
319 |       it is not necessary for *nodename* to actually be up and running
320 |       for this to work. If you have an existing cluster of nodes *A*
321 |       and *B* and you want to add in node *C* you can set the `gospel`
322 |       to be `{node, A}`, add *C* to the `nodes` tuple, bump the
323 |       version and apply the config to *C* and provided *at least one*
324 |       of *A* or *B* is up and running, *C* will successfully
325 |       cluster. I.e. if only *B* is up, *B* still knows that it is
326 |       clustered with *A*, it just happens to be the case that *A* is
327 |       currently unavailable. Thus *C* can cluster with *B* and both
328 |       will happily work, awaiting the return of *A*.
329 | 
330 |       In this particular case, the subsequent behaviour when *A*
331 |       returns is important. If *A* has been reset and is now running
332 |       an older config then it is *A* that is reset again to join back
333 |       in with *B* and *C*. I.e. the `gospel` setting is really
334 |       identifying that the data that *A* holds at a particular moment
335 |       in time is the data to be preserved. When *A* comes back, having
336 |       been reset, *A* realises that the `gospel` is indicating an
337 |       older version of *A*, which is preserved by the surviving
338 |       cluster nodes of *B* and *C*, not the newer reset data held by
339 |       *A*. The upshot of this is that in your cluster, if a node
340 |       fails, goes down and has to be reset, then to join it back into
341 |       the cluster you don't need to alter anything in the cluster
342 |       config (and indeed shouldn't): even if the failed node was named
343 |       as the `gospel`, you shouldn't make any changes to the config.
344 | 
345 |       By contrast, if *A* comes back and has been reset but is now
346 |       running a younger config than *B* and *C*, then that younger
347 |       config will propagate to *B* and *C*. If *A* is named as the
348 |       gospel in the new younger config, then that refers to the data
349 |       held by the new younger *A*, and so *B* and *C* will reset as
350 |       necessary.
351 | 
352 | 
353 | 
354 | ### Mistakes in config files
355 | 
356 | Config files can contain mistakes. If you apply a config file using
357 | `rabbitmqctl eval` then you'll get feedback directly. If you specify
358 | the config file via `rabbitmq.config` then and mistakes will be logged
359 | to Rabbit's log files.
360 | 
361 | In general, the Clusterer tries reasonably hard to give informative
362 | messages about what it doesn't like, but that can only occur if the
363 | config is syntactically valid in the first place. If you forget to
364 | bump the version number it will complain, and generally whenever the
365 | Clusterer comes across configs with equal version numbers but
366 | semantically different contents it takes highly evasive action: in
367 | some situations, it may decide to shut down the whole Erlang node
368 | immediately. It is your responsibility to manage the version numbers:
369 | the Clusterer expects to be able to order configs by version numbers,
370 | and thus determine the youngest config. You need to ensure it can do
371 | this. If you're building cluster configs automatically, one sensible
372 | approach would be to set the version to the number of seconds since
373 | epoch, for example.
374 | 
375 | 
376 | ## Inspecting the Clusterer Status
377 | 
378 | `rabbitmqctl cluster_status` presents basic information about
379 | clusters, but does not interact with the Clusterer. `rabbitmqctl eval
380 | 'rabbit_clusterer:status().'`, on the other hand, does, and shows
381 | which config is in operation by the node and what the Clusterer is
382 | trying to do. If the cluster has been established then the command
383 | will also display which nodes are known to be currently up and
384 | running.
385 | 
386 | 
387 | ## Building From Source
388 | 
389 | The Clusterer reuses parts of the RabbitMQ [umbrella repository](https://github.com/rabbitmq/rabbitmq-public-umbrella). Before
390 | building the plugin, make sure it is cloned as `rabbitmq_clusterer` under it,
391 | much [like other plugins](https://www.rabbitmq.com/plugin-development.html).
392 | 
393 | To build the plugin run `make`. The `VERSION` environment variable is used to specify plugin version, e.g.:
394 | 
395 |     VERSION=3.6.6 make
396 | 
397 | To package the plugin run `make dist`. In some cases, `make clean dist` is the
398 | safest option.
399 | 
400 | ### Linking in Development Environment
401 | 
402 | If you're running a development environment and want to link through
403 | from the `rabbit/plugins` directory, link to
404 | `rabbitmq_clusterer/plugins/rabbitmq_clusterer-$VERSION.ez`. Do not just
405 | link to the `rabbitmq_clusterer` directory.
406 | 
407 | 
408 | ## License and Copyright
409 | 
410 | (c) 2013-2017 Pivotal Software Inc.
411 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                           MOZILLA PUBLIC LICENSE
  2 |                                 Version 1.1
  3 | 
  4 |                               ---------------
  5 | 
  6 | 1. Definitions.
  7 | 
  8 |      1.0.1. "Commercial Use" means distribution or otherwise making the
  9 |      Covered Code available to a third party.
 10 | 
 11 |      1.1. "Contributor" means each entity that creates or contributes to
 12 |      the creation of Modifications.
 13 | 
 14 |      1.2. "Contributor Version" means the combination of the Original
 15 |      Code, prior Modifications used by a Contributor, and the Modifications
 16 |      made by that particular Contributor.
 17 | 
 18 |      1.3. "Covered Code" means the Original Code or Modifications or the
 19 |      combination of the Original Code and Modifications, in each case
 20 |      including portions thereof.
 21 | 
 22 |      1.4. "Electronic Distribution Mechanism" means a mechanism generally
 23 |      accepted in the software development community for the electronic
 24 |      transfer of data.
 25 | 
 26 |      1.5. "Executable" means Covered Code in any form other than Source
 27 |      Code.
 28 | 
 29 |      1.6. "Initial Developer" means the individual or entity identified
 30 |      as the Initial Developer in the Source Code notice required by Exhibit
 31 |      A.
 32 | 
 33 |      1.7. "Larger Work" means a work which combines Covered Code or
 34 |      portions thereof with code not governed by the terms of this License.
 35 | 
 36 |      1.8. "License" means this document.
 37 | 
 38 |      1.8.1. "Licensable" means having the right to grant, to the maximum
 39 |      extent possible, whether at the time of the initial grant or
 40 |      subsequently acquired, any and all of the rights conveyed herein.
 41 | 
 42 |      1.9. "Modifications" means any addition to or deletion from the
 43 |      substance or structure of either the Original Code or any previous
 44 |      Modifications. When Covered Code is released as a series of files, a
 45 |      Modification is:
 46 |           A. Any addition to or deletion from the contents of a file
 47 |           containing Original Code or previous Modifications.
 48 | 
 49 |           B. Any new file that contains any part of the Original Code or
 50 |           previous Modifications.
 51 | 
 52 |      1.10. "Original Code" means Source Code of computer software code
 53 |      which is described in the Source Code notice required by Exhibit A as
 54 |      Original Code, and which, at the time of its release under this
 55 |      License is not already Covered Code governed by this License.
 56 | 
 57 |      1.10.1. "Patent Claims" means any patent claim(s), now owned or
 58 |      hereafter acquired, including without limitation,  method, process,
 59 |      and apparatus claims, in any patent Licensable by grantor.
 60 | 
 61 |      1.11. "Source Code" means the preferred form of the Covered Code for
 62 |      making modifications to it, including all modules it contains, plus
 63 |      any associated interface definition files, scripts used to control
 64 |      compilation and installation of an Executable, or source code
 65 |      differential comparisons against either the Original Code or another
 66 |      well known, available Covered Code of the Contributor's choice. The
 67 |      Source Code can be in a compressed or archival form, provided the
 68 |      appropriate decompression or de-archiving software is widely available
 69 |      for no charge.
 70 | 
 71 |      1.12. "You" (or "Your")  means an individual or a legal entity
 72 |      exercising rights under, and complying with all of the terms of, this
 73 |      License or a future version of this License issued under Section 6.1.
 74 |      For legal entities, "You" includes any entity which controls, is
 75 |      controlled by, or is under common control with You. For purposes of
 76 |      this definition, "control" means (a) the power, direct or indirect,
 77 |      to cause the direction or management of such entity, whether by
 78 |      contract or otherwise, or (b) ownership of more than fifty percent
 79 |      (50%) of the outstanding shares or beneficial ownership of such
 80 |      entity.
 81 | 
 82 | 2. Source Code License.
 83 | 
 84 |      2.1. The Initial Developer Grant.
 85 |      The Initial Developer hereby grants You a world-wide, royalty-free,
 86 |      non-exclusive license, subject to third party intellectual property
 87 |      claims:
 88 |           (a)  under intellectual property rights (other than patent or
 89 |           trademark) Licensable by Initial Developer to use, reproduce,
 90 |           modify, display, perform, sublicense and distribute the Original
 91 |           Code (or portions thereof) with or without Modifications, and/or
 92 |           as part of a Larger Work; and
 93 | 
 94 |           (b) under Patents Claims infringed by the making, using or
 95 |           selling of Original Code, to make, have made, use, practice,
 96 |           sell, and offer for sale, and/or otherwise dispose of the
 97 |           Original Code (or portions thereof).
 98 | 
 99 |           (c) the licenses granted in this Section 2.1(a) and (b) are
100 |           effective on the date Initial Developer first distributes
101 |           Original Code under the terms of this License.
102 | 
103 |           (d) Notwithstanding Section 2.1(b) above, no patent license is
104 |           granted: 1) for code that You delete from the Original Code; 2)
105 |           separate from the Original Code;  or 3) for infringements caused
106 |           by: i) the modification of the Original Code or ii) the
107 |           combination of the Original Code with other software or devices.
108 | 
109 |      2.2. Contributor Grant.
110 |      Subject to third party intellectual property claims, each Contributor
111 |      hereby grants You a world-wide, royalty-free, non-exclusive license
112 | 
113 |           (a)  under intellectual property rights (other than patent or
114 |           trademark) Licensable by Contributor, to use, reproduce, modify,
115 |           display, perform, sublicense and distribute the Modifications
116 |           created by such Contributor (or portions thereof) either on an
117 |           unmodified basis, with other Modifications, as Covered Code
118 |           and/or as part of a Larger Work; and
119 | 
120 |           (b) under Patent Claims infringed by the making, using, or
121 |           selling of  Modifications made by that Contributor either alone
122 |           and/or in combination with its Contributor Version (or portions
123 |           of such combination), to make, use, sell, offer for sale, have
124 |           made, and/or otherwise dispose of: 1) Modifications made by that
125 |           Contributor (or portions thereof); and 2) the combination of
126 |           Modifications made by that Contributor with its Contributor
127 |           Version (or portions of such combination).
128 | 
129 |           (c) the licenses granted in Sections 2.2(a) and 2.2(b) are
130 |           effective on the date Contributor first makes Commercial Use of
131 |           the Covered Code.
132 | 
133 |           (d)    Notwithstanding Section 2.2(b) above, no patent license is
134 |           granted: 1) for any code that Contributor has deleted from the
135 |           Contributor Version; 2)  separate from the Contributor Version;
136 |           3)  for infringements caused by: i) third party modifications of
137 |           Contributor Version or ii)  the combination of Modifications made
138 |           by that Contributor with other software  (except as part of the
139 |           Contributor Version) or other devices; or 4) under Patent Claims
140 |           infringed by Covered Code in the absence of Modifications made by
141 |           that Contributor.
142 | 
143 | 3. Distribution Obligations.
144 | 
145 |      3.1. Application of License.
146 |      The Modifications which You create or to which You contribute are
147 |      governed by the terms of this License, including without limitation
148 |      Section 2.2. The Source Code version of Covered Code may be
149 |      distributed only under the terms of this License or a future version
150 |      of this License released under Section 6.1, and You must include a
151 |      copy of this License with every copy of the Source Code You
152 |      distribute. You may not offer or impose any terms on any Source Code
153 |      version that alters or restricts the applicable version of this
154 |      License or the recipients' rights hereunder. However, You may include
155 |      an additional document offering the additional rights described in
156 |      Section 3.5.
157 | 
158 |      3.2. Availability of Source Code.
159 |      Any Modification which You create or to which You contribute must be
160 |      made available in Source Code form under the terms of this License
161 |      either on the same media as an Executable version or via an accepted
162 |      Electronic Distribution Mechanism to anyone to whom you made an
163 |      Executable version available; and if made available via Electronic
164 |      Distribution Mechanism, must remain available for at least twelve (12)
165 |      months after the date it initially became available, or at least six
166 |      (6) months after a subsequent version of that particular Modification
167 |      has been made available to such recipients. You are responsible for
168 |      ensuring that the Source Code version remains available even if the
169 |      Electronic Distribution Mechanism is maintained by a third party.
170 | 
171 |      3.3. Description of Modifications.
172 |      You must cause all Covered Code to which You contribute to contain a
173 |      file documenting the changes You made to create that Covered Code and
174 |      the date of any change. You must include a prominent statement that
175 |      the Modification is derived, directly or indirectly, from Original
176 |      Code provided by the Initial Developer and including the name of the
177 |      Initial Developer in (a) the Source Code, and (b) in any notice in an
178 |      Executable version or related documentation in which You describe the
179 |      origin or ownership of the Covered Code.
180 | 
181 |      3.4. Intellectual Property Matters
182 |           (a) Third Party Claims.
183 |           If Contributor has knowledge that a license under a third party's
184 |           intellectual property rights is required to exercise the rights
185 |           granted by such Contributor under Sections 2.1 or 2.2,
186 |           Contributor must include a text file with the Source Code
187 |           distribution titled "LEGAL" which describes the claim and the
188 |           party making the claim in sufficient detail that a recipient will
189 |           know whom to contact. If Contributor obtains such knowledge after
190 |           the Modification is made available as described in Section 3.2,
191 |           Contributor shall promptly modify the LEGAL file in all copies
192 |           Contributor makes available thereafter and shall take other steps
193 |           (such as notifying appropriate mailing lists or newsgroups)
194 |           reasonably calculated to inform those who received the Covered
195 |           Code that new knowledge has been obtained.
196 | 
197 |           (b) Contributor APIs.
198 |           If Contributor's Modifications include an application programming
199 |           interface and Contributor has knowledge of patent licenses which
200 |           are reasonably necessary to implement that API, Contributor must
201 |           also include this information in the LEGAL file.
202 | 
203 |                (c)    Representations.
204 |           Contributor represents that, except as disclosed pursuant to
205 |           Section 3.4(a) above, Contributor believes that Contributor's
206 |           Modifications are Contributor's original creation(s) and/or
207 |           Contributor has sufficient rights to grant the rights conveyed by
208 |           this License.
209 | 
210 |      3.5. Required Notices.
211 |      You must duplicate the notice in Exhibit A in each file of the Source
212 |      Code.  If it is not possible to put such notice in a particular Source
213 |      Code file due to its structure, then You must include such notice in a
214 |      location (such as a relevant directory) where a user would be likely
215 |      to look for such a notice.  If You created one or more Modification(s)
216 |      You may add your name as a Contributor to the notice described in
217 |      Exhibit A.  You must also duplicate this License in any documentation
218 |      for the Source Code where You describe recipients' rights or ownership
219 |      rights relating to Covered Code.  You may choose to offer, and to
220 |      charge a fee for, warranty, support, indemnity or liability
221 |      obligations to one or more recipients of Covered Code. However, You
222 |      may do so only on Your own behalf, and not on behalf of the Initial
223 |      Developer or any Contributor. You must make it absolutely clear than
224 |      any such warranty, support, indemnity or liability obligation is
225 |      offered by You alone, and You hereby agree to indemnify the Initial
226 |      Developer and every Contributor for any liability incurred by the
227 |      Initial Developer or such Contributor as a result of warranty,
228 |      support, indemnity or liability terms You offer.
229 | 
230 |      3.6. Distribution of Executable Versions.
231 |      You may distribute Covered Code in Executable form only if the
232 |      requirements of Section 3.1-3.5 have been met for that Covered Code,
233 |      and if You include a notice stating that the Source Code version of
234 |      the Covered Code is available under the terms of this License,
235 |      including a description of how and where You have fulfilled the
236 |      obligations of Section 3.2. The notice must be conspicuously included
237 |      in any notice in an Executable version, related documentation or
238 |      collateral in which You describe recipients' rights relating to the
239 |      Covered Code. You may distribute the Executable version of Covered
240 |      Code or ownership rights under a license of Your choice, which may
241 |      contain terms different from this License, provided that You are in
242 |      compliance with the terms of this License and that the license for the
243 |      Executable version does not attempt to limit or alter the recipient's
244 |      rights in the Source Code version from the rights set forth in this
245 |      License. If You distribute the Executable version under a different
246 |      license You must make it absolutely clear that any terms which differ
247 |      from this License are offered by You alone, not by the Initial
248 |      Developer or any Contributor. You hereby agree to indemnify the
249 |      Initial Developer and every Contributor for any liability incurred by
250 |      the Initial Developer or such Contributor as a result of any such
251 |      terms You offer.
252 | 
253 |      3.7. Larger Works.
254 |      You may create a Larger Work by combining Covered Code with other code
255 |      not governed by the terms of this License and distribute the Larger
256 |      Work as a single product. In such a case, You must make sure the
257 |      requirements of this License are fulfilled for the Covered Code.
258 | 
259 | 4. Inability to Comply Due to Statute or Regulation.
260 | 
261 |      If it is impossible for You to comply with any of the terms of this
262 |      License with respect to some or all of the Covered Code due to
263 |      statute, judicial order, or regulation then You must: (a) comply with
264 |      the terms of this License to the maximum extent possible; and (b)
265 |      describe the limitations and the code they affect. Such description
266 |      must be included in the LEGAL file described in Section 3.4 and must
267 |      be included with all distributions of the Source Code. Except to the
268 |      extent prohibited by statute or regulation, such description must be
269 |      sufficiently detailed for a recipient of ordinary skill to be able to
270 |      understand it.
271 | 
272 | 5. Application of this License.
273 | 
274 |      This License applies to code to which the Initial Developer has
275 |      attached the notice in Exhibit A and to related Covered Code.
276 | 
277 | 6. Versions of the License.
278 | 
279 |      6.1. New Versions.
280 |      Netscape Communications Corporation ("Netscape") may publish revised
281 |      and/or new versions of the License from time to time. Each version
282 |      will be given a distinguishing version number.
283 | 
284 |      6.2. Effect of New Versions.
285 |      Once Covered Code has been published under a particular version of the
286 |      License, You may always continue to use it under the terms of that
287 |      version. You may also choose to use such Covered Code under the terms
288 |      of any subsequent version of the License published by Netscape. No one
289 |      other than Netscape has the right to modify the terms applicable to
290 |      Covered Code created under this License.
291 | 
292 |      6.3. Derivative Works.
293 |      If You create or use a modified version of this License (which you may
294 |      only do in order to apply it to code which is not already Covered Code
295 |      governed by this License), You must (a) rename Your license so that
296 |      the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape",
297 |      "MPL", "NPL" or any confusingly similar phrase do not appear in your
298 |      license (except to note that your license differs from this License)
299 |      and (b) otherwise make it clear that Your version of the license
300 |      contains terms which differ from the Mozilla Public License and
301 |      Netscape Public License. (Filling in the name of the Initial
302 |      Developer, Original Code or Contributor in the notice described in
303 |      Exhibit A shall not of themselves be deemed to be modifications of
304 |      this License.)
305 | 
306 | 7. DISCLAIMER OF WARRANTY.
307 | 
308 |      COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
309 |      WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
310 |      WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF
311 |      DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING.
312 |      THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE
313 |      IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT,
314 |      YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE
315 |      COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER
316 |      OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
317 |      ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
318 | 
319 | 8. TERMINATION.
320 | 
321 |      8.1.  This License and the rights granted hereunder will terminate
322 |      automatically if You fail to comply with terms herein and fail to cure
323 |      such breach within 30 days of becoming aware of the breach. All
324 |      sublicenses to the Covered Code which are properly granted shall
325 |      survive any termination of this License. Provisions which, by their
326 |      nature, must remain in effect beyond the termination of this License
327 |      shall survive.
328 | 
329 |      8.2.  If You initiate litigation by asserting a patent infringement
330 |      claim (excluding declatory judgment actions) against Initial Developer
331 |      or a Contributor (the Initial Developer or Contributor against whom
332 |      You file such action is referred to as "Participant")  alleging that:
333 | 
334 |      (a)  such Participant's Contributor Version directly or indirectly
335 |      infringes any patent, then any and all rights granted by such
336 |      Participant to You under Sections 2.1 and/or 2.2 of this License
337 |      shall, upon 60 days notice from Participant terminate prospectively,
338 |      unless if within 60 days after receipt of notice You either: (i)
339 |      agree in writing to pay Participant a mutually agreeable reasonable
340 |      royalty for Your past and future use of Modifications made by such
341 |      Participant, or (ii) withdraw Your litigation claim with respect to
342 |      the Contributor Version against such Participant.  If within 60 days
343 |      of notice, a reasonable royalty and payment arrangement are not
344 |      mutually agreed upon in writing by the parties or the litigation claim
345 |      is not withdrawn, the rights granted by Participant to You under
346 |      Sections 2.1 and/or 2.2 automatically terminate at the expiration of
347 |      the 60 day notice period specified above.
348 | 
349 |      (b)  any software, hardware, or device, other than such Participant's
350 |      Contributor Version, directly or indirectly infringes any patent, then
351 |      any rights granted to You by such Participant under Sections 2.1(b)
352 |      and 2.2(b) are revoked effective as of the date You first made, used,
353 |      sold, distributed, or had made, Modifications made by that
354 |      Participant.
355 | 
356 |      8.3.  If You assert a patent infringement claim against Participant
357 |      alleging that such Participant's Contributor Version directly or
358 |      indirectly infringes any patent where such claim is resolved (such as
359 |      by license or settlement) prior to the initiation of patent
360 |      infringement litigation, then the reasonable value of the licenses
361 |      granted by such Participant under Sections 2.1 or 2.2 shall be taken
362 |      into account in determining the amount or value of any payment or
363 |      license.
364 | 
365 |      8.4.  In the event of termination under Sections 8.1 or 8.2 above,
366 |      all end user license agreements (excluding distributors and resellers)
367 |      which have been validly granted by You or any distributor hereunder
368 |      prior to termination shall survive termination.
369 | 
370 | 9. LIMITATION OF LIABILITY.
371 | 
372 |      UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
373 |      (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL
374 |      DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE,
375 |      OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR
376 |      ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY
377 |      CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL,
378 |      WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
379 |      COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
380 |      INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
381 |      LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
382 |      RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
383 |      PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE
384 |      EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO
385 |      THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
386 | 
387 | 10. U.S. GOVERNMENT END USERS.
388 | 
389 |      The Covered Code is a "commercial item," as that term is defined in
390 |      48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
391 |      software" and "commercial computer software documentation," as such
392 |      terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48
393 |      C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995),
394 |      all U.S. Government End Users acquire Covered Code with only those
395 |      rights set forth herein.
396 | 
397 | 11. MISCELLANEOUS.
398 | 
399 |      This License represents the complete agreement concerning subject
400 |      matter hereof. If any provision of this License is held to be
401 |      unenforceable, such provision shall be reformed only to the extent
402 |      necessary to make it enforceable. This License shall be governed by
403 |      California law provisions (except to the extent applicable law, if
404 |      any, provides otherwise), excluding its conflict-of-law provisions.
405 |      With respect to disputes in which at least one party is a citizen of,
406 |      or an entity chartered or registered to do business in the United
407 |      States of America, any litigation relating to this License shall be
408 |      subject to the jurisdiction of the Federal Courts of the Northern
409 |      District of California, with venue lying in Santa Clara County,
410 |      California, with the losing party responsible for costs, including
411 |      without limitation, court costs and reasonable attorneys' fees and
412 |      expenses. The application of the United Nations Convention on
413 |      Contracts for the International Sale of Goods is expressly excluded.
414 |      Any law or regulation which provides that the language of a contract
415 |      shall be construed against the drafter shall not apply to this
416 |      License.
417 | 
418 | 12. RESPONSIBILITY FOR CLAIMS.
419 | 
420 |      As between Initial Developer and the Contributors, each party is
421 |      responsible for claims and damages arising, directly or indirectly,
422 |      out of its utilization of rights under this License and You agree to
423 |      work with Initial Developer and Contributors to distribute such
424 |      responsibility on an equitable basis. Nothing herein is intended or
425 |      shall be deemed to constitute any admission of liability.
426 | 
427 | 13. MULTIPLE-LICENSED CODE.
428 | 
429 |      Initial Developer may designate portions of the Covered Code as
430 |      "Multiple-Licensed".  "Multiple-Licensed" means that the Initial
431 |      Developer permits you to utilize portions of the Covered Code under
432 |      Your choice of the MPL or the alternative licenses, if any, specified
433 |      by the Initial Developer in the file described in Exhibit A.
434 | 
435 | EXHIBIT A -Mozilla Public License.
436 | 
437 |      ``The contents of this file are subject to the Mozilla Public License
438 |      Version 1.1 (the "License"); you may not use this file except in
439 |      compliance with the License. You may obtain a copy of the License at
440 |      https://www.mozilla.org/MPL/
441 | 
442 |      Software distributed under the License is distributed on an "AS IS"
443 |      basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
444 |      License for the specific language governing rights and limitations
445 |      under the License.
446 | 
447 |      The Original Code is RabbitMQ.
448 | 
449 |      The Initial Developer of the Original Code is Pivotal Software, Inc.
450 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_coordinator.erl:
--------------------------------------------------------------------------------
  1 | %% The contents of this file are subject to the Mozilla Public License 
  2 | %% Version 1.1 (the "License"); you may not use this file except in 
  3 | %% compliance with the License. You may obtain a copy of the License at 
  4 | %% https://www.mozilla.org/MPL/1.1/ 
  5 | %%
  6 | %% Software distributed under the License is distributed on an "AS IS" 
  7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
  8 | %% License for the specific language governing rights and limitations 
  9 | %% under the License. 
 10 | %%
 11 | %% The Original Code is RabbitMQ. 
 12 | %%
 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
 15 | %% Pivotal Software, Inc. All Rights Reserved.
 16 | 
 17 | -module(rabbit_clusterer_coordinator).
 18 | 
 19 | -behaviour(gen_server).
 20 | 
 21 | -export([begin_coordination/0,
 22 |          rabbit_booted/0,
 23 |          rabbit_boot_failed/0,
 24 |          send_new_config/2,
 25 |          template_new_config/1,
 26 |          apply_config/1,
 27 |          request_status/1]).
 28 | 
 29 | -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
 30 |          terminate/2, code_change/3]).
 31 | 
 32 | -define(SERVER, ?MODULE).
 33 | 
 34 | -define(IS_TRANSITIONER(X), (X =:= {transitioner, join} orelse
 35 |                              X =:= {transitioner, rejoin})).
 36 | 
 37 | -record(state, { status,
 38 |                  node_id,
 39 |                  config,
 40 |                  transitioner_state,
 41 |                  comms,
 42 |                  nodes,
 43 |                  alive_mrefs,
 44 |                  dead,
 45 |                  poke_timer_ref,
 46 |                  booted,
 47 |                  last_boot_failed
 48 |                }).
 49 | 
 50 | %%----------------------------------------------------------------------------
 51 | 
 52 | start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
 53 | 
 54 | begin_coordination() -> ok = gen_server:cast(?SERVER, begin_coordination).
 55 | 
 56 | rabbit_booted() -> ok = gen_server:cast(?SERVER, rabbit_booted).
 57 | 
 58 | rabbit_boot_failed() -> ok = gen_server:cast(?SERVER, rabbit_boot_failed).
 59 | 
 60 | send_new_config(Config, Node) when is_atom(Node) ->
 61 |     %% Node may be undefined. gen_server:cast doesn't error. This is
 62 |     %% what we want.
 63 |     ok = gen_server:cast({?SERVER, Node}, template_new_config(Config));
 64 | send_new_config(_Config, []) ->
 65 |     ok;
 66 | send_new_config(Config, Nodes) when is_list(Nodes) ->
 67 |     abcast = gen_server:abcast(
 68 |                lists:usort(Nodes), ?SERVER, template_new_config(Config)),
 69 |     ok.
 70 | 
 71 | template_new_config(Config) -> {new_config, Config, node()}.
 72 | 
 73 | apply_config(Config) ->
 74 |     gen_server:call(?SERVER, {apply_config, Config}, infinity).
 75 | 
 76 | request_status(Node) ->
 77 |     gen_server:call(
 78 |       {?SERVER, Node}, {request_status, undefined, <<>>}, infinity).
 79 | 
 80 | %%----------------------------------------------------------------------------
 81 | 
 82 | init([]) -> {ok, #state { status             = preboot,
 83 |                           node_id            = undefined,
 84 |                           config             = undefined,
 85 |                           transitioner_state = undefined,
 86 |                           comms              = undefined,
 87 |                           nodes              = [],
 88 |                           alive_mrefs        = [],
 89 |                           dead               = [],
 90 |                           poke_timer_ref     = undefined,
 91 |                           booted             = false,
 92 |                           last_boot_failed   = false
 93 |                         }}.
 94 | 
 95 | %%----------------
 96 | %% Call
 97 | %%----------------
 98 | 
 99 | %% request_status requires a response and is only used by the
100 | %% transitioners to perform coordination when joining or rejoining a
101 | %% cluster.
102 | handle_call({request_status, _Node, _NodeID}, _From,
103 |             State = #state { status = preboot }) ->
104 |     %% If status = preboot then we have the situation that a remote
105 |     %% node is contacting us (it's in {transitioner,_}) before we've
106 |     %% even started reading in our cluster configs. We need to "ignore"
107 |     %% them. They'll either wait for us, or they'll start up and bring
108 |     %% us in later on anyway.
109 |     reply(preboot, State);
110 | handle_call({request_status, NewNode, NewNodeID}, From,
111 |             State = #state { status = Status = {transitioner, _} }) ->
112 |     Fun = fun (Config) -> gen_server:reply(From, {Config, Status}), ok end,
113 |     noreply(transitioner_event(
114 |               {request_config, NewNode, NewNodeID, Fun}, State));
115 | handle_call({request_status, NewNode, NewNodeID}, _From,
116 |             State = #state { status  = Status,
117 |                              node_id = NodeID,
118 |                              config  = Config }) ->
119 |     %% Status \in {booting, ready}
120 |     %%
121 |     %% Consider we're running (ready) and we're already clustered with
122 |     %% NewNode, though it's currently down and is just coming back up,
123 |     %% after being reset. At this point, we will learn of its new
124 |     %% NodeID, but we must ignore that: if we merged it into our
125 |     %% config here then should NewNode be starting up with a newer
126 |     %% config that eventually involves us, we would lose the ability
127 |     %% in is_compatible to detect the node has been reset. Hence
128 |     %% ignoring NewNodeID here.
129 |     %%
130 |     %% Equally however, consider we're running in a cluster which has
131 |     %% some missing nodes. Those nodes then come online and request
132 |     %% our status. We should here record their ID. So we want to add
133 |     %% their ID in only if we don't have a record of it already.
134 |     %%
135 |     %% This is consistent with the behaviour of the transitioners
136 |     %% (above head) who will restart the transition if the NewNode has
137 |     %% changed its ID.
138 |     Config1 = case rabbit_clusterer_config:add_node_id(NewNode, NewNodeID,
139 |                                                        NodeID, Config) of
140 |                   {true,  _Config} -> Config;
141 |                   {false, Config2} -> Config2
142 |               end,
143 |     reply({Config1, Status}, State #state { config = Config1 });
144 | 
145 | %% This is where a call from the transitioner on one node to the
146 | %% transitioner on another node lands.
147 | handle_call({{transitioner, _TKind} = Status, Msg}, From,
148 |             State = #state { status = Status }) ->
149 |     Fun = fun (Result) -> gen_server:reply(From, Result), ok end,
150 |     noreply(transitioner_event({Msg, Fun}, State));
151 | handle_call({{transitioner, _TKind}, _Msg}, _From, State) ->
152 |     reply(invalid, State);
153 | 
154 | handle_call({apply_config, NewConfig}, From,
155 |             State = #state { status = Status,
156 |                              config = Config })
157 |   when Status =:= ready orelse ?IS_TRANSITIONER(Status) ->
158 |     case {rabbit_clusterer_config:load(NewConfig), Status} of
159 |         {{ok, NewConfig1}, {transitioner, _}} ->
160 |             %% We have to defer to the transitioner here which means
161 |             %% we can't give back as good feedback, but never
162 |             %% mind. The transitioner will do the comparison for us
163 |             %% with whatever it's currently trying to transition to.
164 |             gen_server:reply(From, transition_in_progress_ok),
165 |             noreply(transitioner_event(
166 |                       {new_config, NewConfig1, undefined}, State));
167 |         {{ok, NewConfig1}, ready} ->
168 |             ReadyNotRunning = Status =:= ready andalso not rabbit:is_running(),
169 |             case rabbit_clusterer_config:compare(NewConfig1, Config) of
170 |                 younger when ReadyNotRunning ->
171 |                            reply({rabbit_not_running, NewConfig1}, State);
172 |                 younger -> gen_server:reply(
173 |                              From, {beginning_transition_to_provided_config,
174 |                                     NewConfig1}),
175 |                            noreply(begin_transition(NewConfig1, State));
176 |                 older   -> reply({provided_config_is_older_than_current,
177 |                                   NewConfig1, Config}, State);
178 |                 coeval  -> reply({provided_config_already_applied,
179 |                                   NewConfig1}, State);
180 |                 invalid ->
181 |                     reply(
182 |                       {provided_config_has_same_version_but_differs_from_current,
183 |                        NewConfig1, Config}, State)
184 |             end;
185 |         {{error, Reason}, _} ->
186 |             reply({invalid_config_specification, NewConfig, Reason}, State)
187 |     end;
188 | handle_call({apply_config, _Config}, _From,
189 |             State = #state { status = Status }) ->
190 |     reply({cannot_apply_config_currently, Status}, State);
191 | 
192 | %% anything else kills us
193 | handle_call(Msg, From, State) ->
194 |     {stop, {unhandled_call, Msg, From}, State}.
195 | 
196 | %%----------------
197 | %% Cast
198 | %%----------------
199 | 
200 | handle_cast(begin_coordination, State = #state { status  = preboot,
201 |                                                  node_id = NodeID,
202 |                                                  config  = Config }) ->
203 |     {NewNodeID, NewConfig, OldConfig} = rabbit_clusterer_config:load(
204 |                                           NodeID, Config),
205 |     noreply(
206 |       begin_transition(NewConfig, State #state { node_id = NewNodeID,
207 |                                                  config  = OldConfig }));
208 | handle_cast(begin_coordination, State) ->
209 |     noreply(State);
210 | 
211 | handle_cast({comms, Comms, Result},
212 |             State = #state { comms = Comms, status = {transitioner, _} }) ->
213 |     %% This is a response from the comms process coming back to the
214 |     %% transitioner
215 |     noreply(transitioner_event({comms, Result}, State));
216 | handle_cast({comms, _Comms, _Result}, State) ->
217 |     %% Ignore it - either we're not transitioning, or it's from an old
218 |     %% comms pid.
219 |     noreply(State);
220 | 
221 | %% new_config is sent to update nodes that we come across through some
222 | %% means that we think they're running an old config and should be
223 | %% updated to run a newer config. It is also sent periodically to any
224 | %% missing nodes in the cluster to make sure that should they appear
225 | %% they will be informed of the cluster config we expect them to take
226 | %% part in.
227 | handle_cast({new_config, _ConfigRemote, Node},
228 |             State = #state { status = preboot,
229 |                              nodes  = Nodes }) ->
230 |     %% In preboot we don't know what our eventual config is going to
231 |     %% be so as a result we just ignore the provided remote config but
232 |     %% make a note to send over our eventual config to this node once
233 |     %% we've sorted ourselves out.
234 |     %%
235 |     %% Don't worry about dupes, we'll filter them out when we come to
236 |     %% deal with the list.
237 |     noreply(State #state { nodes = [Node | Nodes] });
238 | handle_cast({new_config, ConfigRemote, Node},
239 |             State = #state { status  = booting,
240 |                              nodes   = Nodes,
241 |                              node_id = NodeID,
242 |                              config  = Config }) ->
243 |     %% In booting, it's not safe to reconfigure our own rabbit, and
244 |     %% given the transitioning state of mnesia during rabbit boot we
245 |     %% don't want anyone else to interfere either, so again, we just
246 |     %% wait. But we do update our config node_id map if the
247 |     %% ConfigRemote is coeval with our own.
248 |     case rabbit_clusterer_config:compare(ConfigRemote, Config) of
249 |         coeval -> Config1 = rabbit_clusterer_config:update_node_id(
250 |                               Node, ConfigRemote, NodeID, Config),
251 |                   ok = rabbit_clusterer_config:store_internal(
252 |                          NodeID, Config1),
253 |                   noreply(State #state { config = Config1 });
254 |         _      -> noreply(State #state { nodes = [Node | Nodes] })
255 |     end;
256 | handle_cast({new_config, ConfigRemote, Node},
257 |             State = #state { status = {transitioner, _} }) ->
258 |     %% We have to deal with this case because we could have the
259 |     %% situation where we are blocked in the transitioner waiting for
260 |     %% another node to come up but there really is a younger config
261 |     %% that has become available that we should be transitioning
262 |     %% to. If we don't deal with this we can potentially have a
263 |     %% deadlock.
264 |     noreply(transitioner_event({new_config, ConfigRemote, Node}, State));
265 | handle_cast({new_config, ConfigRemote, Node},
266 |             State = #state { status  = ready,
267 |                              node_id = NodeID,
268 |                              config  = Config }) ->
269 |     %% We a) know what our config really is; b) it's safe to begin
270 |     %% transitions to other configurations.
271 |     Running = rabbit:is_running(),
272 |     case rabbit_clusterer_config:compare(ConfigRemote, Config) of
273 |         younger when not Running ->
274 |                    %% Something has stopped Rabbit. Maybe the
275 |                    %% partition handler. Thus we're going to refuse to
276 |                    %% do anything for the time being.
277 |                    noreply(State);
278 |         younger -> %% Remote is younger. We should switch to it. We
279 |                    %% deliberately do not merge across the configs at
280 |                    %% this stage as it would break is_compatible.
281 |                    %% begin_transition will reboot if necessary.
282 |                    noreply(begin_transition(ConfigRemote, State));
283 |         older   -> ok = send_new_config(Config, Node),
284 |                    noreply(State);
285 |         coeval  -> Config1 = rabbit_clusterer_config:update_node_id(
286 |                                Node, ConfigRemote, NodeID, Config),
287 |                    ok = rabbit_clusterer_config:store_internal(
288 |                           NodeID, Config1),
289 |                    noreply(State #state { config = Config1 });
290 |         invalid -> %% Whilst invalid, the fact is that we are ready,
291 |                    %% so we don't want to disturb that.
292 |                    noreply(State)
293 |     end;
294 | 
295 | handle_cast(rabbit_booted, State = #state { status = booting }) ->
296 |     %% Note that we don't allow any transition to start whilst we're
297 |     %% booting so it should be safe to assert we can only receive
298 |     %% rabbit_booted when in booting.
299 |     noreply(set_status(ready, State #state { booted           = true,
300 |                                              last_boot_failed = false }));
301 | handle_cast(rabbit_booted, State = #state { status = preboot }) ->
302 |     %% Very likely they forgot to edit the rabbit-server
303 |     %% script. Complain very loudly.
304 |     Msg = "RabbitMQ Clusterer is enabled as a plugin but has "
305 |         "not been started correctly. Terminating RabbitMQ.~n",
306 |     error_logger:error_msg(Msg, []),
307 |     io:format(Msg, []),
308 |     init:stop(),
309 |     {stop, startup_error, State};
310 | handle_cast(rabbit_booted, State = #state { status = ready }) ->
311 |     %% This can happen if the partition handler stopped and then
312 |     %% restarted rabbit.
313 |     noreply(State);
314 | 
315 | handle_cast(rabbit_boot_failed, State = #state { status = booting,
316 |                                                  config = Config }) ->
317 |     %% Just to be on the safe side, do the stop_rabbit as well
318 |     %% (thinking is that rabbit itself could have managed to start
319 |     %% just fine, but some plugin/separate-app failed to start;
320 |     %% possibly rabbit could still be running). The stop_mnesia is
321 |     %% crucial: rabbit expects mnesia to be stopped on boot, so if the
322 |     %% boot failed, we must be sure to stop mnesia.
323 |     ok = rabbit_clusterer_utils:stop_rabbit(),
324 |     ok = rabbit_clusterer_utils:stop_mnesia(),
325 |     noreply(begin_transition(Config, State #state { last_boot_failed = true }));
326 | 
327 | handle_cast({lock, Locker}, State = #state { comms = undefined }) ->
328 |     gen_server:cast(Locker, {lock_rejected, node()}),
329 |     noreply(State);
330 | handle_cast({lock, Locker}, State = #state { comms = Comms }) ->
331 |     ok = rabbit_clusterer_comms:lock(Locker, Comms),
332 |     noreply(State);
333 | handle_cast({unlock, _Locker}, State = #state { comms = undefined }) ->
334 |     noreply(State);
335 | handle_cast({unlock, Locker}, State = #state { comms = Comms }) ->
336 |     ok = rabbit_clusterer_comms:unlock(Locker, Comms),
337 |     noreply(State);
338 | 
339 | %% anything else kills us
340 | handle_cast(Msg, State) ->
341 |     {stop, {unhandled_cast, Msg}, State}.
342 | 
343 | %%----------------
344 | %% Info
345 | %%----------------
346 | 
347 | handle_info({transitioner_delay, Event},
348 |             State = #state { status = {transitioner, _} }) ->
349 |     %% A transitioner wanted some sort of timer based callback. Note
350 |     %% it is the transitioner's responsibility to filter out
351 |     %% invalid/outdated etc delayed events.
352 |     noreply(transitioner_event(Event, State));
353 | handle_info({transitioner_delay, _Event}, State) ->
354 |     noreply(State);
355 | 
356 | %% Monitoring stuff
357 | handle_info({'DOWN', MRef, process, {?SERVER, Node}, _Info},
358 |             State = #state { alive_mrefs = Alive, dead = Dead }) ->
359 |     case lists:delete(MRef, Alive) of
360 |         Alive  -> noreply(State);
361 |         Alive1 -> noreply(ensure_poke_timer(
362 |                             State #state { alive_mrefs = Alive1,
363 |                                            dead        = [Node | Dead] }))
364 |     end;
365 | handle_info(poke_the_dead, State = #state { dead        = Dead,
366 |                                             alive_mrefs = Alive,
367 |                                             status      = ready,
368 |                                             config      = Config }) ->
369 |     %% When we're transitioning to something else (or even booting) we
370 |     %% don't bother with the poke as the transitioner will take care
371 |     %% of updating nodes we want to cluster with and the surrounding
372 |     %% code will update the nodes we're currently clustered with and
373 |     %% any other nodes that contacted us whilst we were transitioning
374 |     %% or booting.
375 |     MRefsNew = [erlang:monitor(process, {?SERVER, N}) || N <- Dead],
376 |     ok = send_new_config(Config, Dead),
377 |     Alive1 = MRefsNew ++ Alive,
378 |     noreply(State #state { dead           = [],
379 |                            alive_mrefs    = Alive1,
380 |                            poke_timer_ref = undefined });
381 | handle_info(poke_the_dead, State) ->
382 |     noreply(State #state { poke_timer_ref = undefined });
383 | 
384 | %% anything else kills us
385 | handle_info(Msg, State) ->
386 |     {stop, {unhandled_info, Msg}, State}.
387 | 
388 | %%----------------
389 | %% Rest
390 | %%----------------
391 | 
392 | terminate(_Reason, _State) -> ok.
393 | 
394 | code_change(_OldVsn, State, _Extra) -> {ok, State}.
395 | 
396 | %%----------------------------------------------------------------------------
397 | %% Status changes state machine
398 | %%----------------------------------------------------------------------------
399 | 
400 | %% Here we enforce the state machine of valid changes to status.
401 | 
402 | %% preboot           -> a transitioner ({transitioner, TKind})
403 | %% preboot           -> shutdown
404 | %% {transitioner, _} -> booting
405 | %% {transitioner, _} -> a transitioner
406 | %% {transitioner, _} -> shutdown
407 | %% booting           -> ready
408 | %% booting           -> booting
409 | %% booting           -> a transitioner
410 | %% ready             -> a transitioner
411 | %% ready             -> shutdown
412 | 
413 | set_status(NewStatus, State) when ?IS_TRANSITIONER(NewStatus) ->
414 |     State #state { status = NewStatus };
415 | set_status(booting, State = #state { status  = Status,
416 |                                      booted  = Booted,
417 |                                      node_id = NodeID,
418 |                                      config  = Config })
419 |   when ?IS_TRANSITIONER(Status) orelse Status =:= booting ->
420 |     error_logger:info_msg(
421 |       "Clusterer booting Rabbit into cluster configuration:~n~p~n",
422 |       [rabbit_clusterer_config:to_proplist(NodeID, Config)]),
423 |     case Booted of
424 |         true  -> ok = rabbit_clusterer_utils:start_rabbit_async();
425 |         false -> ok = rabbit_clusterer_utils:boot_rabbit_async()
426 |     end,
427 |     State #state { status = booting };
428 | set_status(ready, State = #state { status = booting }) ->
429 |     error_logger:info_msg("Cluster achieved and Rabbit running.~n"),
430 |     update_monitoring(State #state { status = ready });
431 | set_status(shutdown, State = #state { status = Status })
432 |   when Status =/= booting ->
433 |     case Status of
434 |         ready -> %% Even though we think we're ready, there might
435 |                  %% still be some rabbit boot actions going on...
436 |                  ok = stop_rabbit();
437 |         _     -> ok
438 |     end,
439 |     error_logger:info_msg("Clusterer stopping node now.~n"),
440 |     init:stop(),
441 |     State #state { status = shutdown }.
442 | 
443 | noreply(State = #state { status = shutdown }) ->
444 |     {stop, normal, State};
445 | noreply(State) ->
446 |     {noreply, State}.
447 | 
448 | reply(Reply, State = #state { status = shutdown }) ->
449 |     {stop, normal, Reply, State};
450 | reply(Reply, State) ->
451 |     {reply, Reply, State}.
452 | 
453 | %%----------------------------------------------------------------------------
454 | %% Changing cluster config
455 | %%----------------------------------------------------------------------------
456 | 
457 | begin_transition(NewConfig, State = #state { config = Config }) ->
458 |     case rabbit_clusterer_config:contains_node(node(), NewConfig) of
459 |         false -> process_transitioner_response({shutdown, NewConfig}, State);
460 |         true  -> begin_transition(
461 |                    rabbit_clusterer_config:is_compatible(NewConfig, Config),
462 |                    rabbit_clusterer_config:transfer_node_ids(Config, NewConfig),
463 |                    State)
464 |     end.
465 | 
466 | begin_transition(true,     NewConfig, State = #state { status  = ready,
467 |                                                        node_id = NodeID }) ->
468 |     ok = rabbit_clusterer_config:store_internal(NodeID, NewConfig),
469 |     error_logger:info_msg(
470 |       "Clusterer seemlessly transitioned to new configuration:~n~p~n",
471 |       [rabbit_clusterer_config:to_proplist(NodeID, NewConfig)]),
472 |     update_monitoring(State #state { config = NewConfig });
473 | begin_transition(false,    NewConfig, State = #state { status = ready }) ->
474 |     ok = stop_rabbit(),
475 |     join_or_rejoin(join,   NewConfig, State);
476 | begin_transition(true,     NewConfig, State) ->
477 |     join_or_rejoin(rejoin, NewConfig, State);
478 | begin_transition(false,    NewConfig, State) ->
479 |     join_or_rejoin(join,   NewConfig, State).
480 | 
481 | join_or_rejoin(TKind, NewConfig, State = #state { node_id          = NodeID,
482 |                                                   nodes            = Nodes,
483 |                                                   last_boot_failed = LBF }) ->
484 |     ok = send_new_config(NewConfig, Nodes),
485 |     %% Wipe out alive_mrefs and dead so that if we get DOWN's we don't
486 |     %% care about them.
487 |     {Comms, State1} = fresh_comms(State #state { alive_mrefs = [],
488 |                                                  dead        = [],
489 |                                                  nodes       = [] }),
490 |     process_transitioner_response(
491 |       rabbit_clusterer_transitioner:init(TKind, NodeID, NewConfig, LBF, Comms),
492 |       set_status({transitioner, TKind}, State1)).
493 | 
494 | transitioner_event(Event, State = #state { status = {transitioner, _TKind},
495 |                                            transitioner_state = TState }) ->
496 |     process_transitioner_response(
497 |       rabbit_clusterer_transitioner:event(Event, TState), State).
498 | 
499 | process_transitioner_response({continue, TState}, State) ->
500 |     State #state { transitioner_state = TState };
501 | process_transitioner_response({SuccessOrShutdown, ConfigNew},
502 |                               State = #state { node_id = NodeID })
503 |   when SuccessOrShutdown =:= success orelse SuccessOrShutdown =:= shutdown ->
504 |     %% Both success and shutdown are treated the same as they're exit
505 |     %% nodes from the states of the transitioners. If we've had a
506 |     %% config applied to us that tells us to shutdown, we must record
507 |     %% that config, otherwise we can later be restarted and try to
508 |     %% start up with an outdated config.
509 |     ok = rabbit_clusterer_config:store_internal(NodeID, ConfigNew),
510 |     State1 = stop_comms(State #state { transitioner_state = undefined,
511 |                                        config             = ConfigNew }),
512 |     case SuccessOrShutdown of
513 |         success  -> %% Wait for the ready transition before updating monitors
514 |                     set_status(booting, State1);
515 |         shutdown -> set_status(shutdown, stop_monitoring(State1))
516 |     end;
517 | process_transitioner_response({config_changed, ConfigNew}, State) ->
518 |     %% begin_transition relies on unmerged configs, so don't merge
519 |     %% through here.
520 |     begin_transition(ConfigNew, State);
521 | process_transitioner_response({sleep, Delay, Event, TState}, State) ->
522 |     erlang:send_after(Delay, self(), {transitioner_delay, Event}),
523 |     State #state { transitioner_state = TState };
524 | process_transitioner_response({invalid_config, Config},
525 |                               State = #state { node_id = NodeID }) ->
526 |     %% An invalid config was detected somewhere. We shut ourselves
527 |     %% down, but we do not write out the config. Do not
528 |     %% update_monitoring either.
529 |     State1 = stop_comms(State #state { transitioner_state = undefined }),
530 |     error_logger:info_msg("Multiple different configurations with equal "
531 |                           "version numbers detected. Shutting down.~n~p~n",
532 |                           [rabbit_clusterer_config:to_proplist(
533 |                              NodeID, Config)]),
534 |     set_status(shutdown, State1).
535 | 
536 | fresh_comms(State) ->
537 |     State1 = stop_comms(State),
538 |     {ok, Token} = rabbit_clusterer_comms_sup:start_comms(),
539 |     {Token, State1 #state { comms = Token }}.
540 | 
541 | stop_comms(State = #state { comms = undefined }) ->
542 |     State;
543 | stop_comms(State = #state { comms = Token }) ->
544 |     ok = rabbit_clusterer_comms:stop(Token),
545 |     State #state { comms = undefined }.
546 | 
547 | %%----------------------------------------------------------------------------
548 | %% Helpers
549 | %%----------------------------------------------------------------------------
550 | 
551 | stop_rabbit() ->
552 |     %% This is not idempotent and always assumes that Rabbit should be
553 |     %% running (or at least booting) before being called.
554 |     error_logger:info_msg("Clusterer stopping Rabbit.~n"),
555 |     ok = rabbit:await_startup(),
556 |     ok = rabbit_clusterer_utils:stop_rabbit(),
557 |     ok = rabbit_clusterer_utils:stop_mnesia(),
558 |     ok.
559 | 
560 | update_monitoring(State = #state { config = ConfigNew,
561 |                                    nodes  = NodesOld }) ->
562 |     State1 = stop_monitoring(State),
563 |     NodesNew = rabbit_clusterer_config:nodenames(ConfigNew) -- [node()],
564 |     ok = send_new_config(ConfigNew, NodesNew -- NodesOld),
565 |     AliveNew = [erlang:monitor(process, {?SERVER, N}) || N <- NodesNew],
566 |     State1 #state { nodes       = NodesNew,
567 |                     alive_mrefs = AliveNew}.
568 | 
569 | stop_monitoring(State = #state { config      = ConfigNew,
570 |                                  nodes       = NodesOld,
571 |                                  alive_mrefs = AliveOld }) ->
572 |     ok = send_new_config(ConfigNew, NodesOld),
573 |     [erlang:demonitor(MRef) || MRef <- AliveOld],
574 |     State #state { nodes          = [],
575 |                    alive_mrefs    = [],
576 |                    dead           = [],
577 |                    poke_timer_ref = undefined }.
578 | 
579 | ensure_poke_timer(State = #state { poke_timer_ref = undefined }) ->
580 |     %% TODO: justify 2000
581 |     State #state { poke_timer_ref =
582 |                        erlang:send_after(2000, self(), poke_the_dead) };
583 | ensure_poke_timer(State) ->
584 |     State.
585 | 


--------------------------------------------------------------------------------
/src/rabbit_clusterer_transitioner.erl:
--------------------------------------------------------------------------------
  1 | %% The contents of this file are subject to the Mozilla Public License 
  2 | %% Version 1.1 (the "License"); you may not use this file except in 
  3 | %% compliance with the License. You may obtain a copy of the License at 
  4 | %% https://www.mozilla.org/MPL/1.1/ 
  5 | %%
  6 | %% Software distributed under the License is distributed on an "AS IS" 
  7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
  8 | %% License for the specific language governing rights and limitations 
  9 | %% under the License. 
 10 | %%
 11 | %% The Original Code is RabbitMQ. 
 12 | %%
 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 
 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016
 15 | %% Pivotal Software, Inc. All Rights Reserved.
 16 | 
 17 | -module(rabbit_clusterer_transitioner).
 18 | 
 19 | -export([init/5, event/2]).
 20 | 
 21 | -record(state, { kind, status, node_id, config, comms, awaiting, eliminable }).
 22 | 
 23 | %% Concerns for join:
 24 | %%
 25 | %% We need to figure out what our peers are doing. If any of them are
 26 | %% up and running we can just join in with them. The only other case
 27 | %% we care about is when everyone in the cluster config is alive
 28 | %% (i.e. no BadNodes) and everyone is joining, just like us. In this
 29 | %% case we know that there's no one with knowledge that must be
 30 | %% preserved, so we elect a leader (based on gospel, though in this
 31 | %% case it's not actually necessary to pay attention to gospel, it's
 32 | %% just an easy and unambiguous decider). The leader then comes up on
 33 | %% its own and everyone else waits for them to become ready, and then
 34 | %% syncs.
 35 | %%
 36 | %% In all other cases (i.e. there are nodes rejoining etc) then we
 37 | %% just wait and try again as we should be guaranteed to end up in
 38 | %% some state with some nodes up and running and we can then sync to
 39 | %% them.
 40 | %%
 41 | %%
 42 | %% Concerns for rejoin:
 43 | %%
 44 | %% - Cluster could have grown or shrunk since we last saw it.
 45 | %%
 46 | %% - We want to avoid the timeout on mnesia:wait_for_tables, so we
 47 | %% need to manage the "dependencies" ourselves.
 48 | %%
 49 | %% - The disk-nodes-running-when-we-last-shutdown result can be
 50 | %% satisfied by any one of those nodes managing to start up. It's
 51 | %% certainly not that we depend on *all* of the nodes in there, mearly
 52 | %% *any*.
 53 | %%
 54 | %% - We try to shrink that set as much as possible. If we can get it
 55 | %% down to the empty set then we can consider ourselves the "winner"
 56 | %% and can start up without waiting for anyone else.
 57 | %%
 58 | %% - We can remove a node N from that set if:
 59 | %%   a) We can show that N (transitively) depends on us (i.e. we have
 60 | %%     a cycle) and its other dependencies we can also disregard.
 61 | %%   b) We can show that N is currently joining and not
 62 | %%     rejoining. Thus it has been reset and we're witnessing hostname
 63 | %%     reuse. In this case we must ignore N: if we don't then there's
 64 | %%     a risk all the rejoining nodes decide to depend on N, and N (as
 65 | %%     it's joining, not rejoining) waits for everyone else. Thus
 66 | %%     deadlock.
 67 | %%
 68 | %% It's tempting to consider a generalisation of (b) where if we see
 69 | %% that we depend on a node that is currently rejoining but has a
 70 | %% different node id than what we were expecting then it must have
 71 | %% been reset since we last saw it and so we shouldn't depend on
 72 | %% it. However, this is wrong: the fact that it's rejoining shows that
 73 | %% it managed to join (and then be stopped) the cluster after we last
 74 | %% saw it. Thus it still has more up-to-date information than us, so
 75 | %% we should still depend on it. In this case it should also follow
 76 | %% that (a) won't hold for such an N either.
 77 | %%
 78 | %% Both (a) and (b) require that we can communicate with N. Thus if we
 79 | %% have a dependency on a node we can't contact then we can't
 80 | %% eliminate it as a dependency, so we just have to wait for either
 81 | %% said node to come up, or for someone else to determine they can
 82 | %% start.
 83 | %%
 84 | %% The problem with the cluster shrinking is that we have the
 85 | %% possibility of multiple leaders. If A and B both depend on C and
 86 | %% the cluster shrinks, losing C, then A and B could both come up,
 87 | %% learn of the loss of C and thus declare themselves the leader. It
 88 | %% would be possible for both A and B to have *only* C in their
 89 | %% initial set of disk-nodes-running-when-we-last-shutdown (in
 90 | %% general, the act of adding a node to a cluster and all the nodes
 91 | %% rewriting their nodes-running file is non-atomic so we need to be
 92 | %% as accomodating as possible here) so neither would feel necessary
 93 | %% to wait for each other. Consequently, we have to have some locking
 94 | %% to make sure that we don't have multiple leaders (which could cause
 95 | %% an mnesia fail-to-merge issue). The rule about locking is that you
 96 | %% have to take locks in the same order, and then you can't
 97 | %% deadlock. So, we sort all the nodes from the cluster config, and
 98 | %% grab locks in order. If a node is down, that's treated as being ok
 99 | %% (i.e. you don't abort). You also have to lock yourself. Only when
100 | %% you have all the locks can you actually boot. Why do we lock
101 | %% everyone? Because we can't agree on who to lock. If you tried to
102 | %% pick someone (eg minimum node) then you'd find that could change as
103 | %% other nodes come up or go down, so it's not stable. So lock
104 | %% everyone.
105 | %%
106 | %% Unsurprisingly, this gets a bit more complex. The lock is the Comms
107 | %% Pid, and the lock is taken by Comms Pids. The lock monitors the
108 | %% taker. This is elegant in that if A locks A and B, and then a new
109 | %% cluster config is applied to A then A's comms will be restarted, so
110 | %% the old comms Pid will die, so the locks are released. Similarly,
111 | %% on success, the comms will be stopped, so the lock releases. This
112 | %% is simple and nice. Where it gets slightly more complex is what
113 | %% happens if A locks A and B and then a new config is applied to
114 | %% B. If that were to happen then that would clearly invalidate the
115 | %% config that A is also using. B will forward new config to A too. B
116 | %% and A will both restart their comms, in any order. If B goes first,
117 | %% we don't want B to be held up, so as B will get a new comms, it
118 | %% also gets a new lock as the lock is the comms Pid itself. So when B
119 | %% restarts its comms, it's unlocking itself too.
120 | 
121 | -define(MINI_SLEEP, 500).
122 | -define(BIG_SLEEP, 5000).
123 | 
124 | %%----------------------------------------------------------------------------
125 | %% API
126 | %%----------------------------------------------------------------------------
127 | 
128 | init(Kind, NodeID, Config, PreSleep, Comms) ->
129 |     case rabbit_clusterer_config:is_singleton(node(), Config) of
130 |         true  -> ok = rabbit_clusterer_utils:make_mnesia_singleton(
131 |                         Kind =:= join andalso
132 |                         rabbit_clusterer_config:gospel(Config) =:= reset),
133 |                  {success, Config};
134 |         false -> State = #state { kind       = Kind,
135 |                                   node_id    = NodeID,
136 |                                   config     = Config,
137 |                                   comms      = Comms,
138 |                                   awaiting   = undefined,
139 |                                   eliminable = [] },
140 |                  %% If the last boot failed, we want to sleep for a
141 |                  %% while before trying another boot. This is (a)
142 |                  %% generally polite and avoids spinning too rapidly;
143 |                  %% (b) gives a period of time during which we're
144 |                  %% lockable by other nodes and generally receptive to
145 |                  %% other nodes trying to do things. This is important
146 |                  %% in the case of upgrades: we might be a node trying
147 |                  %% to join in with an existing cluster but we're on a
148 |                  %% new version, so the rabbit boot
149 |                  %% fails. Consequently, as the other nodes get
150 |                  %% updated we need to cope with them potentially
151 |                  %% having different configs and needing to
152 |                  %% communicate with us, thus we need to be responsive
153 |                  %% during this sleep period.
154 |                  case PreSleep of
155 |                      true  -> delayed_request_status(?BIG_SLEEP, State);
156 |                      false -> request_status(State)
157 |                  end
158 |     end.
159 | 
160 | event({comms, {Replies, BadNodes}}, State = #state { kind    = Kind,
161 |                                                      status  = awaiting_status,
162 |                                                      node_id = NodeID,
163 |                                                      config  = Config }) ->
164 |     case analyse_node_statuses(Replies, NodeID, Config) of
165 |         invalid ->
166 |             {invalid_config, Config};
167 |         {Youngest, OlderThanUs, StatusDict} ->
168 |             case rabbit_clusterer_config:compare(Youngest, Config) of
169 |                 coeval when OlderThanUs =:= [] ->
170 |                     %% We have the most up to date config. But we must
171 |                     %% use Youngest from here on as it has the updated
172 |                     %% node_ids map.
173 |                     (case Kind of
174 |                          join   -> fun maybe_join/3;
175 |                          rejoin -> fun maybe_rejoin/3
176 |                      end)(BadNodes, StatusDict,
177 |                           State #state { config = Youngest });
178 |                 coeval ->
179 |                     %% Update nodes which are older than us. In
180 |                     %% reality they're likely to receive lots of the
181 |                     %% same update from everyone else, but meh,
182 |                     %% they'll just have to cope.
183 |                     %%
184 |                     %% We deliberately do this cast out of Comms to
185 |                     %% preserve ordering of messages.
186 |                     Msg = rabbit_clusterer_coordinator:template_new_config(
187 |                             Youngest),
188 |                     ok = rabbit_clusterer_comms:multi_cast(
189 |                            OlderThanUs, Msg, State #state.comms),
190 |                     request_status(State #state { config = Youngest });
191 |                 younger -> %% cannot be older or invalid
192 |                     {config_changed, Youngest}
193 |             end
194 |     end;
195 | event({comms, {Replies, BadNodes}}, State = #state { kind     = rejoin,
196 |                                                      status   = awaiting_awaiting,
197 |                                                      awaiting = MyAwaiting }) ->
198 |     InvalidOrUndef = [N || {N, Res} <- Replies,
199 |                            Res =:= invalid orelse Res =:= undefined ],
200 |     case {BadNodes, InvalidOrUndef} of
201 |         {[], []} ->
202 |             MyNode = node(),
203 |             G = digraph:new(),
204 |             try
205 |                 %% To win, we need to find that we are in a cycle, and
206 |                 %% that cycle, treated as a single unit, has no
207 |                 %% outgoing edges. If we detect this, then we can
208 |                 %% start to grab locks. In all other cases, we just go
209 |                 %% back around.
210 |                 %% Add all vertices. This is slightly harder than
211 |                 %% you'd imagine because we could have that a node
212 |                 %% depends on a node which we've not queried yet
213 |                 %% (because it's a badnode).
214 |                 Replies1 = [{MyNode, MyAwaiting} | Replies],
215 |                 Nodes = lists:usort(
216 |                           lists:append(
217 |                             [[N|Awaiting] || {N, Awaiting} <- Replies1])),
218 |                 [digraph:add_vertex(G, N) || N <- Nodes],
219 |                 [digraph:add_edge(G, N, T) || {N, Awaiting} <- Replies1,
220 |                                               T <- Awaiting],
221 |                 %% We want to use the
222 |                 %% digraph_utils:cyclic_strong_components/1 call as it
223 |                 %% captures the general case nicely: it returns a list
224 |                 %% of groups of nodes where each group is a set of
225 |                 %% nodes which are dependent on each other. However,
226 |                 %% for simple graphs with no loops at all, this call
227 |                 %% can return an empty list. Rather than detect and
228 |                 %% special case for that, we instead make every node
229 |                 %% dependent on itself. For simple graphs, this will
230 |                 %% result in each group returned being a single node.
231 |                 [digraph:add_edge(G, N, N) || N <- Nodes],
232 |                 CSC = digraph_utils:cyclic_strong_components(G),
233 |                 [OurComponent] = [C || C <- CSC, lists:member(MyNode, C)],
234 |                 %% Detect if there are any outbound edges from this
235 |                 %% component
236 |                 case [N || V <- OurComponent,
237 |                            N <- digraph:out_neighbours(G, V),
238 |                            not lists:member(N, OurComponent) ] of
239 |                     [] -> %% We appear to be in the "root"
240 |                           %% component. Begin the fight.
241 |                           lock_nodes(State);
242 |                     _  -> delayed_request_status(State)
243 |                 end
244 |             after
245 |                 true = digraph:delete(G)
246 |             end;
247 |         _ ->
248 |             %% Go around again...
249 |             delayed_request_status(State)
250 |     end;
251 | 
252 | event({comms, lock_rejected}, State = #state { kind   = rejoin,
253 |                                                status = awaiting_lock }) ->
254 |     %% Oh, well let's just wait and try again. Something must have
255 |     %% changed.
256 |     delayed_request_status(State);
257 | event({comms, lock_ok}, #state { kind       = rejoin,
258 |                                  status     = awaiting_lock,
259 |                                  config     = Config,
260 |                                  eliminable = Eliminable }) ->
261 |     ok = rabbit_clusterer_utils:eliminate_mnesia_dependencies(Eliminable),
262 |     {success, Config};
263 | 
264 | event({request_awaiting, Fun}, State = #state { kind     = rejoin,
265 |                                                 awaiting = Awaiting }) ->
266 |     ok = Fun(Awaiting),
267 |     {continue, State};
268 | 
269 | event({delayed_request_status, Ref},
270 |       State = #state { status = {delayed_request_status, Ref} }) ->
271 |     request_status(State);
272 | event({delayed_request_status, _Ref}, State) ->
273 |     %% ignore it
274 |     {continue, State};
275 | 
276 | event({request_config, NewNode, NewNodeID, Fun},
277 |       State = #state { node_id = NodeID, config = Config }) ->
278 |     %% Right here we could have a node that we're dependent on being
279 |     %% reset.
280 |     {NodeIDChanged, Config1} =
281 |         rabbit_clusterer_config:add_node_id(NewNode, NewNodeID, NodeID, Config),
282 |     ok = Fun(Config1),
283 |     case NodeIDChanged of
284 |         true  -> {config_changed, Config1};
285 |         false -> {continue, State #state { config = Config1 }}
286 |     end;
287 | 
288 | event({new_config, ConfigRemote, Node},
289 |       State = #state { node_id = NodeID, config = Config }) ->
290 |     case rabbit_clusterer_config:compare(ConfigRemote, Config) of
291 |         younger -> %% Here we also need to make sure we forward this to
292 |                    %% anyone we're currently trying to cluster with:
293 |                    %% the fact that we're about to change which config
294 |                    %% we're using clearly invalidates our current
295 |                    %% config and it's not just us using this
296 |                    %% config. We send from here and not comms as we're
297 |                    %% about to kill off comms anyway so there's no
298 |                    %% ordering issues to consider.
299 |                    ok = rabbit_clusterer_coordinator:send_new_config(
300 |                           ConfigRemote,
301 |                           rabbit_clusterer_config:nodenames(Config) --
302 |                               [node(), Node]),
303 |                    {config_changed, ConfigRemote};
304 |         older   -> ok = rabbit_clusterer_coordinator:send_new_config(Config, Node),
305 |                    {continue, State};
306 |         coeval  -> Config1 = rabbit_clusterer_config:update_node_id(
307 |                                Node, ConfigRemote, NodeID, Config),
308 |                    {continue, State #state { config = Config1 }};
309 |         invalid -> %% ignore
310 |                    {continue, State}
311 |     end.
312 | 
313 | %%----------------------------------------------------------------------------
314 | %% 'join' helpers
315 | %%----------------------------------------------------------------------------
316 | 
317 | maybe_join(BadNodes, StatusDict, State = #state { config = Config }) ->
318 |     %% Everyone here has the same config, thus Statuses can be trusted
319 |     %% as the statuses of all nodes trying to achieve *this* config
320 |     %% and not some other config.
321 |     %%
322 |     %% Expected entries in Statuses are:
323 |     %% - preboot:
324 |     %%    Clusterer has started, but the boot step not yet hit
325 |     %% - {transitioner, join}:
326 |     %%    it's joining some cluster - blocked in Clusterer
327 |     %% - {transitioner, rejoin}:
328 |     %%    it's rejoining some cluster - blocked in Clusterer
329 |     %% - booting:
330 |     %%    Clusterer is happy and the rest of rabbit is currently
331 |     %%    booting
332 |     %% - ready:
333 |     %%    Clusterer is happy and enough of rabbit has booted
334 |     Statuses   = dict:fetch_keys(StatusDict),
335 |     ReadyNodes = lists:member(ready, Statuses),
336 |     AllJoining = [{transitioner, join}] =:= Statuses,
337 |     %% ReadyNodes are nodes that are in this cluster (well, they could
338 |     %% be in any cluster, but seeing as we've checked everyone has the
339 |     %% same cluster config as us, we're sure it really is *this*
340 |     %% cluster) and are fully up and running.
341 |     %%
342 |     %% If ReadyNodes exists, we should just reset and join into that,
343 |     %% and ignore anything about gospel: it's possible that gospel is
344 |     %% {node, node()} but that, in combination with ReadyNodes,
345 |     %% suggests that the cluster previously existed with an older
346 |     %% version of 'us': we must have been cleaned out and restarted
347 |     %% (aka host-name reuse). Here we don't care about BadNodes.
348 |     %%
349 |     %% If ReadyNodes doesn't exist we can only safely proceed if there
350 |     %% are no BadNodes, and everyone is joining (rather than
351 |     %% rejoining) i.e. transitioner kind for all is 'join'. In all
352 |     %% other cases, we must wait:
353 |     %%
354 |     %% - If BadNodes =/= [] then there may be a node that was cleanly
355 |     %% shutdown last with what we think is the current config and so
356 |     %% if it was started up, it would rejoin (itself, sort of) and
357 |     %% then become ready: we could then sync to it.
358 |     %%
359 |     %% - If the transitioner kind is not all 'join' then some other
360 |     %% nodes must be rejoining. We should wait for them to succeed (or
361 |     %% at least change state) because if they do succeed we should
362 |     %% sync off them.
363 |     case ReadyNodes of
364 |         true ->
365 |             ok = cluster_with_nodes(Config),
366 |             {success, Config};
367 |         false when AllJoining andalso BadNodes =:= [] ->
368 |             case maybe_form_new_cluster(Config) of
369 |                 true  -> {success, Config};
370 |                 false -> delayed_request_status(State)
371 |             end;
372 |         false ->
373 |             delayed_request_status(State)
374 |     end.
375 | 
376 | cluster_with_nodes(Config) ->
377 |     ok = rabbit_clusterer_utils:make_mnesia_singleton(true),
378 |     ok = rabbit_clusterer_utils:configure_cluster(
379 |            rabbit_clusterer_config:nodenames(Config),
380 |            rabbit_clusterer_config:node_type(node(), Config)).
381 | 
382 | maybe_form_new_cluster(Config) ->
383 |     %% Is it necessary to limit the election of a leader to disc
384 |     %% nodes? No: we're only here when we have everyone in the cluster
385 |     %% joining, so we know we wouldn't be creating a RAM-node-only
386 |     %% cluster. Given that we enforce that the cluster config must
387 |     %% have at least one disc node in it anyway, it's safe to allow a
388 |     %% RAM node to lead. However, I'm not 100% sure that the rest of
389 |     %% rabbit/mnesia likes that, so we leave in the 'disc'
390 |     %% filter. This might get reviewed in QA.
391 |     MyNode = node(),
392 |     {Wipe, Leader} =
393 |         case rabbit_clusterer_config:gospel(Config) of
394 |             {node, Node} -> {Node =/= MyNode, Node};
395 |             reset        -> {true, lists:min(rabbit_clusterer_config:disc_nodenames(Config))}
396 |         end,
397 |     case Leader of
398 |         MyNode -> ok = rabbit_clusterer_utils:make_mnesia_singleton(Wipe),
399 |                   Type = rabbit_clusterer_config:node_type(MyNode, Config),
400 |                   ok = rabbit_clusterer_utils:configure_cluster([MyNode], Type),
401 |                   true;
402 |         _      -> false
403 |     end.
404 | 
405 | %%----------------------------------------------------------------------------
406 | %% 'rejoin' helpers
407 | %%----------------------------------------------------------------------------
408 | 
409 | collect_dependency_graph(RejoiningNodes, State = #state { comms = Comms }) ->
410 |     ok = rabbit_clusterer_comms:multi_call(
411 |            RejoiningNodes, {{transitioner, rejoin}, request_awaiting}, Comms),
412 |     {continue, State #state { status = awaiting_awaiting }}.
413 | 
414 | maybe_rejoin(BadNodes, StatusDict, State = #state { config = Config }) ->
415 |     %% Everyone who's here is on the same config as us. If anyone is
416 |     %% running then we can just declare success and trust mnesia to
417 |     %% join into them.
418 |     MyNode = node(),
419 |     SomeoneRunning = dict:is_key(ready, StatusDict),
420 |     IsRam = ram =:= rabbit_clusterer_config:node_type(MyNode, Config),
421 |     if
422 |         SomeoneRunning ->
423 |             %% Someone is running, so we should be able to cluster to
424 |             %% them.
425 |             {success, Config};
426 |         IsRam ->
427 |             %% We're ram; can't do anything but wait for someone else
428 |             delayed_request_status(State);
429 |         true ->
430 |             {All, _Disc, Running} = rabbit_node_monitor:read_cluster_status(),
431 |             DiscSet = ordsets:from_list(
432 |                         rabbit_clusterer_config:disc_nodenames(Config)),
433 |             %% Intersect with Running and remove MyNode
434 |             DiscRunningSet =
435 |                 ordsets:del_element(
436 |                   MyNode, ordsets:intersection(
437 |                             DiscSet, ordsets:from_list(Running))),
438 |             BadNodesSet = ordsets:from_list(BadNodes),
439 |             Joining = case dict:find({transitioner, join}, StatusDict) of
440 |                           {ok, List} -> List;
441 |                           error      -> []
442 |                       end,
443 |             JoiningSet = ordsets:from_list(Joining),
444 |             NotJoiningSet = ordsets:subtract(DiscRunningSet, JoiningSet),
445 |             DeletedSet =
446 |                 ordsets:subtract(
447 |                   ordsets:from_list(All),
448 |                   ordsets:from_list(rabbit_clusterer_config:nodenames(Config))),
449 |             EliminableSet = ordsets:union(JoiningSet, DeletedSet),
450 |             State1 = State #state { awaiting   = ordsets:to_list(NotJoiningSet),
451 |                                     eliminable = ordsets:to_list(EliminableSet) },
452 |             case ordsets:is_disjoint(DiscRunningSet, BadNodesSet) of
453 |                 true ->
454 |                     %% Everyone we depend on is alive in some form.
455 |                     case {ordsets:size(NotJoiningSet),
456 |                           dict:find({transitioner, rejoin}, StatusDict)} of
457 |                         {0, _} ->
458 |                             %% We win!
459 |                             lock_nodes(State1);
460 |                         {_, error} ->
461 |                             %% No one else is rejoining, nothing we
462 |                             %% can do but wait.
463 |                             delayed_request_status(State1);
464 |                         {_, {ok, Rejoining}} ->
465 |                             collect_dependency_graph(Rejoining, State1)
466 |                     end;
467 |                 false ->
468 |                     %% We might depend on a node in BadNodes. We must
469 |                     %% wait for it to appear.
470 |                     delayed_request_status(State1)
471 |             end
472 |     end.
473 | 
474 | lock_nodes(State = #state { comms = Comms, config = Config }) ->
475 |     ok = rabbit_clusterer_comms:lock_nodes(
476 |            rabbit_clusterer_config:nodenames(Config), Comms),
477 |     {continue, State #state { status = awaiting_lock }}.
478 | 
479 | %%----------------------------------------------------------------------------
480 | %% common helpers
481 | %%----------------------------------------------------------------------------
482 | 
483 | request_status(State = #state { node_id = NodeID,
484 |                                 config  = Config,
485 |                                 comms   = Comms }) ->
486 |     MyNode = node(),
487 |     NodesNotUs = rabbit_clusterer_config:nodenames(Config) -- [MyNode],
488 |     ok = rabbit_clusterer_comms:multi_call(
489 |            NodesNotUs, {request_status, MyNode, NodeID}, Comms),
490 |     {continue, State #state { status = awaiting_status }}.
491 | 
492 | delayed_request_status(State) ->
493 |     delayed_request_status(?MINI_SLEEP, State).
494 | 
495 | delayed_request_status(Sleep, State) ->
496 |     %% TODO: work out some sensible timeout value
497 |     Ref = make_ref(),
498 |     {sleep, Sleep, {delayed_request_status, Ref},
499 |      State #state { status = {delayed_request_status, Ref} }}.
500 | 
501 | %% The input is a k/v list of nodes and their config+status tuples (or
502 | %% the atom 'preboot' if the node is in the process of starting up),
503 | %% plus the local node's id and config.
504 | %%
505 | %% Returns a tuple containing
506 | %% 1) the youngest config of all, with an enriched node_ids map
507 | %% 2) a list of nodes operating with configs older than the local node's
508 | %% 3) a dict mapping status to lists of nodes
509 | analyse_node_statuses(NodeConfigStatusList, NodeID, Config) ->
510 |     case lists:foldr(
511 |            fun (Elem, Acc) -> analyse_node_status(Config, Elem, Acc) end,
512 |            {Config, [], [], dict:new()}, NodeConfigStatusList) of
513 |         invalid ->
514 |             invalid;
515 |         {Youngest, Older, IDs, Status} ->
516 |             %% We want to make sure anything that we had in Config
517 |             %% that does not exist in IDs is still maintained.
518 |             YoungestOrigMap = rabbit_clusterer_config:transfer_node_ids(
519 |                                 Config, Youngest),
520 |             {rabbit_clusterer_config:add_node_ids(IDs, NodeID, YoungestOrigMap),
521 |              Older, Status}
522 |     end.
523 | 
524 | analyse_node_status(_Config, _Reply, invalid) ->
525 |     invalid;
526 | analyse_node_status(_Config, {Node, preboot},
527 |                     {YoungestN, OlderN, IDsN, StatusesN}) ->
528 |     {YoungestN, OlderN, IDsN, dict:append(preboot, Node, StatusesN)};
529 | analyse_node_status(Config, {Node, {ConfigN, StatusN}},
530 |                     {YoungestN, OlderN, IDsN, StatusesN}) ->
531 |     case {rabbit_clusterer_config:compare(ConfigN, YoungestN),
532 |           rabbit_clusterer_config:compare(ConfigN, Config)} of
533 |         {invalid, _}           -> invalid;
534 |         {_, invalid}           -> invalid;
535 |         {VsYoungest, VsConfig} -> {case VsYoungest of
536 |                                        younger -> ConfigN;
537 |                                        _       -> YoungestN
538 |                                    end,
539 |                                    case VsConfig   of
540 |                                        older   -> [Node | OlderN];
541 |                                        _       -> OlderN
542 |                                    end,
543 |                                    [{Node, rabbit_clusterer_config:node_id(
544 |                                              Node, ConfigN)} | IDsN],
545 |                                    dict:append(StatusN, Node, StatusesN)}
546 |     end.
547 | 


--------------------------------------------------------------------------------