├── .gitignore ├── test ├── src │ ├── clusterer_test.hrl │ ├── clusterer_utils.erl │ ├── clusterer_test.erl │ ├── clusterer_node.erl │ ├── clusterer_interpreter.erl │ └── clusterer_program.erl └── README.txt ├── rabbitmq-server.patch ├── Makefile ├── src ├── rabbit_clusterer_sup.erl ├── rabbit_clusterer_comms_sup.erl ├── rabbit_clusterer.erl ├── rabbit_clusterer_utils.erl ├── rabbit_clusterer_comms.erl ├── rabbit_clusterer_config.erl ├── rabbit_clusterer_coordinator.erl └── rabbit_clusterer_transitioner.erl ├── CONTRIBUTING.md ├── priv └── schema │ └── rabbitmq_clusterer.schema ├── CODE_OF_CONDUCT.md ├── rabbitmq-components.mk ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .sw? 2 | .*.sw? 3 | *.beam 4 | .erlang.mk/ 5 | cover/ 6 | deps/ 7 | doc/ 8 | ebin/ 9 | logs/ 10 | plugins/ 11 | 12 | rabbitmq_clusterer.d 13 | -------------------------------------------------------------------------------- /test/src/clusterer_test.hrl: -------------------------------------------------------------------------------- 1 | -record(state, { seed, 2 | node_count, 3 | nodes, 4 | config, 5 | valid_config, 6 | active_config 7 | }). 8 | 9 | -record(node, { name, 10 | port, 11 | state, 12 | pid 13 | }). 14 | 15 | -record(config, { version, 16 | nodes, 17 | gospel }). 18 | 19 | -record(step, { modify_node_instrs, 20 | modify_config_instr, 21 | existential_node_instr, 22 | final_state }). 23 | -------------------------------------------------------------------------------- /rabbitmq-server.patch: -------------------------------------------------------------------------------- 1 | diff -r e9637021f623 scripts/rabbitmq-server 2 | --- a/scripts/rabbitmq-server Thu Aug 01 15:36:09 2013 +0100 3 | +++ b/scripts/rabbitmq-server Tue Aug 06 11:15:37 2013 +0100 4 | @@ -59,7 +59,7 @@ 5 | 6 | RABBITMQ_START_RABBIT= 7 | [ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT=" -noinput" 8 | -[ "x" = "x$RABBITMQ_NODE_ONLY" ] && RABBITMQ_START_RABBIT="$RABBITMQ_START_RABBIT -s rabbit boot " 9 | +[ "x" = "x$RABBITMQ_NODE_ONLY" ] && RABBITMQ_START_RABBIT="$RABBITMQ_START_RABBIT -s rabbit_clusterer boot -pa ${RABBITMQ_PLUGINS_DIR}/rabbitmq_clusterer.ez/rabbitmq_clusterer-1.0.0/ebin" 10 | 11 | case "$(uname -s)" in 12 | CYGWIN*) # we make no attempt to record the cygwin pid; rabbitmqctl wait 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT = rabbitmq_clusterer 2 | PROJECT_DESCRIPTION = Declarative RabbitMQ clustering 3 | PROJECT_MOD = rabbit_clusterer 4 | PROJECT_APP_EXTRA_KEYS = {broker_version_requirements, ["3.6.0", "3.7.0"]} 5 | 6 | BUILD_DEPS = rabbit_common rabbit 7 | 8 | DEP_EARLY_PLUGINS = rabbit_common/mk/rabbitmq-early-plugin.mk 9 | DEP_PLUGINS = rabbit_common/mk/rabbitmq-plugin.mk 10 | 11 | # FIXME: Use erlang.mk patched for RabbitMQ, while waiting for PRs to be 12 | # reviewed and merged. 13 | 14 | ERLANG_MK_REPO = https://github.com/rabbitmq/erlang.mk.git 15 | ERLANG_MK_COMMIT = rabbitmq-tmp 16 | 17 | include rabbitmq-components.mk 18 | include erlang.mk 19 | 20 | # -------------------------------------------------------------------- 21 | # Testing. 22 | # -------------------------------------------------------------------- 23 | 24 | # clusterer test suite was never finished 25 | # and currently disabled 26 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_sup.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_sup). 18 | 19 | -behaviour(supervisor2). 20 | 21 | -export([start_link/0, init/1]). 22 | 23 | start_link() -> 24 | supervisor2:start_link(?MODULE, []). 25 | 26 | init([]) -> 27 | {ok, {{one_for_all, 0, 1}, 28 | [{rabbit_clusterer_comms_sup, 29 | {rabbit_clusterer_comms_sup, start_link, []}, 30 | intrinsic, infinity, supervisor, [rabbit_clusterer_comms_sup]}, 31 | {rabbit_clusterer_coordinator, 32 | {rabbit_clusterer_coordinator, start_link, []}, 33 | intrinsic, 16#ffffffff, worker, [rabbit_clusterer_coordinator]}]}}. 34 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_comms_sup.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_comms_sup). 18 | 19 | -behaviour(supervisor). 20 | 21 | -export([start_link/0, start_comms/0]). 22 | 23 | -export([init/1]). 24 | 25 | -define(SERVER, ?MODULE). 26 | 27 | start_link() -> 28 | supervisor:start_link({local, ?SERVER}, ?MODULE, []). 29 | 30 | start_comms() -> 31 | {ok, _Pid, Token} = supervisor:start_child(?SERVER, []), 32 | {ok, Token}. 33 | 34 | %%---------------------------------------------------------------------------- 35 | 36 | init([]) -> 37 | {ok, {{simple_one_for_one, 10, 10}, 38 | [{comms, {rabbit_clusterer_comms, start_link, []}, 39 | temporary, 16#ffffffff, worker, [rabbit_clusterer_comms]}]}}. 40 | -------------------------------------------------------------------------------- /test/README.txt: -------------------------------------------------------------------------------- 1 | Testing Plan 3. 2 | 3 | # Plan 1 was: 4 | program <- dsl 5 | run program 6 | verify result. 7 | 8 | Problem was generation of valid program required complex linear types, 9 | making in unfeasible. I then went to plan 2: 10 | 11 | # Plan 2 was: 12 | one process per node. Up to one config process per node. 13 | Iterative expansion of program with minimal coordination and state by 14 | essentially allowing node processes to do whatever they want to their 15 | node. 16 | 17 | Problem here is that the node processes can't really validate the 18 | action they attempted to apply to their node as their node is 19 | influenced by everyone else too. Eg you start a node, but you can't 20 | even expect to find later that it's up because some other node process 21 | may have applied a config to their node which turns our node off. 22 | 23 | General process was to ask everyone to "observe and pick your next 24 | action", then "apply action". Repeat. 25 | 26 | # Plan 3 is: 27 | Sort of a variation on Plan 2, but with much greater coordination. 28 | 29 | Driver asks all nodes for their instruction. Node process select based 30 | on current known state of their node. Driver uses all this to predict 31 | the next stable state. Driver allows all node processes to proceed, 32 | which they do. We then continuously poll all nodes until they're 33 | stable - i.e. pending_shutdown, off, or read. At that point, we 34 | compare global state with predicted state. 35 | 36 | Now there are large areas which will not be tested by plan 3 37 | (i.e. application of changes during times of flux), but it's the 38 | sanest approach to testing yet, and maybe implementable. 39 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | RabbitMQ projects use pull requests to discuss, collaborate on and accept code contributions. 4 | Pull requests is the primary place of discussing code changes. 5 | 6 | ## How to Contribute 7 | 8 | The process is fairly standard: 9 | 10 | * Fork the repository or repositories you plan on contributing to 11 | * Clone [RabbitMQ umbrella repository](https://github.com/rabbitmq/rabbitmq-public-umbrella) 12 | * `cd umbrella`, `make co` 13 | * Create a branch with a descriptive name in the relevant repositories 14 | * Make your changes, run tests, commit with a [descriptive message](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html), push to your fork 15 | * Submit pull requests with an explanation what has been changed and **why** 16 | * Submit a filled out and signed [Contributor Agreement](https://github.com/rabbitmq/ca#how-to-submit) if needed (see below) 17 | * Be patient. We will get to your pull request eventually 18 | 19 | If what you are going to work on is a substantial change, please first ask the core team 20 | of their opinion on [RabbitMQ mailing list](https://groups.google.com/forum/#!forum/rabbitmq-users). 21 | 22 | 23 | ## Code of Conduct 24 | 25 | See [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md). 26 | 27 | 28 | ## Contributor Agreement 29 | 30 | If you want to contribute a non-trivial change, please submit a signed copy of our 31 | [Contributor Agreement](https://github.com/rabbitmq/ca#how-to-submit) around the time 32 | you submit your pull request. This will make it much easier (in some cases, possible) 33 | for the RabbitMQ team at Pivotal to merge your contribution. 34 | 35 | 36 | ## Where to Ask Questions 37 | 38 | If something isn't clear, feel free to ask on our [mailing list](https://groups.google.com/forum/#!forum/rabbitmq-users). 39 | -------------------------------------------------------------------------------- /priv/schema/rabbitmq_clusterer.schema: -------------------------------------------------------------------------------- 1 | {mapping, "clusterer.config", "rabbitmq_clusterer.config", 2 | [{datatype, string}, {validators, ["file_accessible"]}]}. 3 | 4 | {translation, "rabbitmq_clusterer.config", 5 | fun(Conf) -> 6 | case cuttlefish:conf_get("clusterer.config", Conf, undefined) of 7 | String when is_list(String) -> 8 | case cuttlefish_variable:filter_by_prefix("clusterer", Conf) of 9 | [{["clusterer", "config"], String}] -> String; 10 | _ -> cuttlefish:invalid("Config for clusterer defined in "++ 11 | String ++ " file. " ++ 12 | "All other clusterer configurations should be removed") 13 | end; 14 | _ -> [] 15 | end 16 | end}. 17 | 18 | {mapping, "clusterer.version", "rabbitmq_clusterer.config.version", 19 | [{datatype, integer}]}. 20 | 21 | {mapping, "clusterer.nodes.$node", "rabbitmq_clusterer.config.nodes", 22 | [{datatype, atom}]}. 23 | 24 | {mapping, "clusterer.nodes.ram.$node", "rabbitmq_clusterer.config.nodes", 25 | [{datatype, atom}]}. 26 | 27 | {mapping, "clusterer.nodes.disk.$node", "rabbitmq_clusterer.config.nodes", 28 | [{datatype, atom}]}. 29 | 30 | {mapping, "clusterer.nodes.disc.$node", "rabbitmq_clusterer.config.nodes", 31 | [{datatype, atom}]}. 32 | 33 | {translation, "rabbitmq_clusterer.config.nodes", 34 | fun(Conf) -> 35 | DiskNodes = cuttlefish_variable:filter_by_prefix("clusterer.nodes", Conf) 36 | ++ cuttlefish_variable:filter_by_prefix("clusterer.nodes.disk", Conf) 37 | ++ cuttlefish_variable:filter_by_prefix("clusterer.nodes.disc", Conf), 38 | RamNodes = cuttlefish_variable:filter_by_prefix("clusterer.nodes.ram", Conf), 39 | [{Node, disk} || {_, Node} <- DiskNodes] ++ [{Node, ram} || Node <- RamNodes] 40 | end}. 41 | 42 | {mapping, "clusterer.gospel", "rabbitmq_clusterer.config.gospel", 43 | [{datatype, {enum, [reset]}}]}. 44 | 45 | {mapping, "clusterer.gospel.node", "rabbitmq_clusterer.config.gospel", 46 | [{datatype, atom}]}. 47 | 48 | {translation, "rabbitmq_clusterer.config.gospel", 49 | fun(Conf) -> 50 | case cuttlefish:conf_get("clusterer.gospel", Conf, undefined) of 51 | reset -> reset; 52 | _ -> 53 | {node, cuttlefish:conf_get("clusterer.gospel.node", Conf)} 54 | end 55 | end}. 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, and in the interest of fostering an open 4 | and welcoming community, we pledge to respect all people who contribute through reporting 5 | issues, posting feature requests, updating documentation, submitting pull requests or 6 | patches, and other activities. 7 | 8 | We are committed to making participation in this project a harassment-free experience for 9 | everyone, regardless of level of experience, gender, gender identity and expression, 10 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, 11 | religion, or nationality. 12 | 13 | Examples of unacceptable behavior by participants include: 14 | 15 | * The use of sexualized language or imagery 16 | * Personal attacks 17 | * Trolling or insulting/derogatory comments 18 | * Public or private harassment 19 | * Publishing other's private information, such as physical or electronic addresses, 20 | without explicit permission 21 | * Other unethical or unprofessional conduct 22 | 23 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 24 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 25 | Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors 26 | that they deem inappropriate, threatening, offensive, or harmful. 27 | 28 | By adopting this Code of Conduct, project maintainers commit themselves to fairly and 29 | consistently applying these principles to every aspect of managing this project. Project 30 | maintainers who do not follow or enforce the Code of Conduct may be permanently removed 31 | from the project team. 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an 34 | individual is representing the project or its community. 35 | 36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 37 | contacting a project maintainer at [info@rabbitmq.com](mailto:info@rabbitmq.com). All complaints will 38 | be reviewed and investigated and will result in a response that is deemed necessary and 39 | appropriate to the circumstances. Maintainers are obligated to maintain confidentiality 40 | with regard to the reporter of an incident. 41 | 42 | This Code of Conduct is adapted from the 43 | [Contributor Covenant](https://contributor-covenant.org), version 1.3.0, available at 44 | [contributor-covenant.org/version/1/3/0/](https://contributor-covenant.org/version/1/3/0/) 45 | -------------------------------------------------------------------------------- /src/rabbit_clusterer.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer). 18 | 19 | -behaviour(application). 20 | 21 | -export([boot/0]). 22 | 23 | -export([apply_config/0, apply_config/1, %% for 'rabbitmqctl eval ...' 24 | status/0, status/1]). 25 | 26 | -export([start/2, stop/1]). 27 | 28 | %%---------------------------------------------------------------------------- 29 | 30 | boot() -> 31 | ok = application:start(rabbitmq_clusterer), 32 | ok = rabbit_clusterer_coordinator:begin_coordination(), 33 | ok. 34 | 35 | %% Apply_config allows cluster configs to be dynamically applied to a 36 | %% running system. Currently that's best done by rabbitmqctl eval, but 37 | %% may be improved in the future. 38 | apply_config() -> apply_config(undefined). 39 | 40 | apply_config(Config) -> rabbit_clusterer_coordinator:apply_config(Config). 41 | 42 | status() -> 43 | status(node()). 44 | 45 | status(Node) -> 46 | {Message, Config, List} = 47 | case rabbit_clusterer_coordinator:request_status(Node) of 48 | preboot -> 49 | {"Clusterer is pre-booting. ~p~n", undefined, []}; 50 | {Config1, booting} -> 51 | {"Clusterer is booting Rabbit into cluster configuration: " 52 | "~n~s~n", Config1, []}; 53 | {Config1, ready} -> 54 | {"Rabbit is running in cluster configuration: ~n~s~n" 55 | "Running nodes: ~p~n", Config1, 56 | [rabbit_mnesia:cluster_nodes(running)]}; 57 | {Config1, {transitioner, join}} -> 58 | {"Clusterer is trying to join into cluster configuration: " 59 | "~n~s~n", Config1, []}; 60 | {Config1, {transitioner, rejoin}} -> 61 | {"Clusterer is trying to rejoin cluster configuration: ~n~s~n", 62 | Config1, []} 63 | end, 64 | Config2 = case Config of 65 | undefined -> ""; 66 | _ -> rabbit_misc:format( 67 | "~p", [tl(rabbit_clusterer_config:to_proplist( 68 | undefined, Config))]) 69 | end, 70 | io:format(Message, [Config2 | List]). 71 | 72 | %%---------------------------------------------------------------------------- 73 | 74 | start(normal, []) -> rabbit_clusterer_sup:start_link(). 75 | 76 | stop(_State) -> ok. 77 | -------------------------------------------------------------------------------- /test/src/clusterer_utils.erl: -------------------------------------------------------------------------------- 1 | -module(clusterer_utils). 2 | 3 | -export([set_config/2, 4 | store_node/2, 5 | set_node_state/2, 6 | contains_node/2, 7 | make_config_active/1, 8 | localise_program/2]). 9 | 10 | -include("clusterer_test.hrl"). 11 | 12 | %%---------------------------------------------------------------------------- 13 | 14 | set_config(Config = #config { nodes = [_|_] }, 15 | State = #state { valid_config = undefined }) -> 16 | State #state { config = Config, valid_config = Config }; 17 | set_config(Config = #config { nodes = [_|_], version = V }, 18 | State = #state { valid_config = #config { version = VV } }) 19 | when V > VV -> 20 | State #state { config = Config, valid_config = Config }; 21 | set_config(Config, State) -> 22 | State #state { config = Config }. 23 | 24 | store_node(Node = #node { name = Name }, State = #state { nodes = Nodes }) -> 25 | State #state { nodes = orddict:store(Name, Node, Nodes) }. 26 | 27 | set_node_state(Node = #node { name = Name, state = State }, Config) -> 28 | case State =:= ready andalso not contains_node(Name, Config) of 29 | true -> Node #node { state = off }; 30 | false -> Node 31 | end. 32 | 33 | contains_node(Node, #config { nodes = Nodes }) -> orddict:is_key(Node, Nodes); 34 | contains_node(_Node, undefined) -> false. 35 | 36 | %% Because we know that the valid config is only applied to nodes 37 | %% which are involved in the config, modelling the propogation is 38 | %% easy. 39 | make_config_active(State = #state { nodes = Nodes, 40 | valid_config = VConfig = #config { } }) -> 41 | Nodes1 = orddict:map( 42 | fun (_Name, Node) -> set_node_state(Node, VConfig) end, Nodes), 43 | State #state { nodes = Nodes1, 44 | active_config = VConfig }. 45 | 46 | localise_program({InitialState, Steps}, Host) -> 47 | {localise_state(InitialState, Host), 48 | [localise_step(Step, Host) || Step <- Steps]}. 49 | 50 | localise_step(#step { modify_node_instrs = NodeInstrs, 51 | modify_config_instr = ConfigInstr, 52 | existential_node_instr = ExistentialInstr, 53 | final_state = State }, Host) -> 54 | #step { modify_node_instrs = [localise_instr(Instr, Host) || 55 | Instr <- NodeInstrs], 56 | modify_config_instr = localise_instr(ConfigInstr, Host), 57 | existential_node_instr = localise_instr(ExistentialInstr, Host), 58 | final_state = localise_state(State, Host) }. 59 | 60 | localise_instr({Action, Name}, Host) 61 | when Action =:= stop_node orelse 62 | Action =:= start_node orelse 63 | Action =:= reset_node orelse 64 | Action =:= delete_node orelse 65 | Action =:= config_remove_node orelse 66 | Action =:= config_add_node -> 67 | {Action, localise_name(Name, Host)}; 68 | localise_instr({Action, Name, Config}, Host) 69 | when Action =:= apply_config_to_node orelse 70 | Action =:= start_node_with_config -> 71 | {Action, localise_name(Name, Host), localise_config(Config, Host)}; 72 | localise_instr({create_node, Name, Port}, Host) -> 73 | {create_node, localise_name(Name, Host), Port}; 74 | localise_instr({config_gospel_to, reset} = Instr, _Host) -> 75 | Instr; 76 | localise_instr({config_gospel_to, {node, Name}}, Host) -> 77 | {config_gospel_to, {node, localise_name(Name, Host)}}; 78 | localise_instr({config_version_to, _Ver} = Instr, _Host) -> 79 | Instr; 80 | localise_instr({config_shutdown_timeout_to, _ST} = Instr, _Host) -> 81 | Instr; 82 | localise_instr(noop, _Host) -> 83 | noop. 84 | 85 | localise_name(NodeName, Host) -> 86 | {Node, _Host} = rabbit_nodes:parts(NodeName), 87 | rabbit_nodes:make({Node, Host}). 88 | 89 | localise_config(Config = #config { nodes = Nodes, gospel = Gospel }, Host) -> 90 | Config #config { 91 | nodes = orddict:from_list([{localise_name(Name, Host), Value} || 92 | {Name, Value} <- orddict:to_list(Nodes)]), 93 | gospel = case Gospel of 94 | reset -> reset; 95 | {node, Name} -> {node, localise_name(Name, Host)} 96 | end 97 | }; 98 | localise_config(undefined, _Host) -> 99 | undefined. 100 | 101 | localise_state(State = #state { nodes = Nodes, 102 | config = Config, 103 | valid_config = VConfig, 104 | active_config = AConfig }, Host) -> 105 | State #state { nodes = 106 | orddict:from_list( 107 | [{localise_name(Name, Host), 108 | localise_node(Node, Host)} || 109 | {Name, Node} <- orddict:to_list(Nodes)]), 110 | config = localise_config(Config, Host), 111 | valid_config = localise_config(VConfig, Host), 112 | active_config = localise_config(AConfig, Host) }. 113 | 114 | localise_node(Node = #node { name = Name }, Host) -> 115 | Node #node { name = localise_name(Name, Host) }. 116 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_utils.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_utils). 18 | 19 | -export([stop_mnesia/0, 20 | stop_rabbit/0, 21 | start_rabbit_async/0, 22 | boot_rabbit_async/0, 23 | make_mnesia_singleton/1, 24 | eliminate_mnesia_dependencies/1, 25 | configure_cluster/2]). 26 | 27 | %%---------------------------------------------------------------------------- 28 | 29 | -define(PRE_SLEEP, 10000). %% 10 seconds 30 | 31 | stop_mnesia() -> 32 | stopped = mnesia:stop(), 33 | ok. 34 | 35 | ensure_start_mnesia() -> 36 | ok = mnesia:start(). 37 | 38 | stop_rabbit() -> 39 | case application:stop(rabbit) of 40 | ok -> ok; 41 | {error, {not_started, rabbit}} -> ok; 42 | Other -> Other 43 | end. 44 | 45 | start_rabbit_async() -> 46 | ok = spawn_starter(fun rabbit:start/0). 47 | 48 | boot_rabbit_async() -> 49 | ok = spawn_starter(fun rabbit:boot/0). 50 | 51 | spawn_starter(Fun) -> 52 | spawn(fun () -> 53 | try 54 | ok = Fun(), 55 | rabbit_clusterer_coordinator:rabbit_booted() 56 | catch 57 | _Class:_Reason -> 58 | rabbit_clusterer_coordinator:rabbit_boot_failed() 59 | end 60 | end), 61 | ok. 62 | 63 | make_mnesia_singleton(true) -> 64 | %% With mnesia not running, we can't call 65 | %% rabbit_mnesia:force_reset() because that tries to read in the 66 | %% cluster status files from the mnesia directory which might not 67 | %% exist if we're a completely virgin node. So we just do the rest 68 | %% manually. 69 | error_logger:info_msg("Clusterer Resetting Rabbit~n"), 70 | ok = rabbit_mnesia:ensure_mnesia_dir(), 71 | ok = rabbit_file:recursive_delete( 72 | filelib:wildcard(rabbit_mnesia:dir() ++ "/*")), 73 | ok = rabbit_node_monitor:reset_cluster_status(), 74 | ok; 75 | make_mnesia_singleton(false) -> 76 | %% Note that this is wrong: in this case we actually want to 77 | %% eliminate everyone who isn't in our cluster - i.e. everyone 78 | %% mnesia thinks we're currently clustered with. However, due to 79 | %% limitations with del_table_copy (i.e. mnesia must not be 80 | %% running on remote node; it must be running on our node), this 81 | %% is difficult to orchestrate: it's easiest done by the 82 | %% eliminated nodes doing an RPC to us. But there are still cases 83 | %% where that may not work out correctly. However, this scenario 84 | %% can only occur when a cluster is being split up into other 85 | %% clusters. For MVP and this project, we don't consider that a 86 | %% use case, so we're going to just ignore this problem for the 87 | %% time being. 88 | eliminate_mnesia_dependencies([]). 89 | 90 | eliminate_mnesia_dependencies(NodesToDelete) -> 91 | ok = rabbit_mnesia:ensure_mnesia_dir(), 92 | ok = ensure_start_mnesia(), 93 | %% rabbit_table:force_load() does not error if 94 | %% mnesia:force_load_table errors(!) Thus we can safely run this 95 | %% even in clean state - i.e. one where neither the schema nor any 96 | %% tables actually exist. 97 | ok = rabbit_table:force_load(), 98 | case rabbit_table:is_present() of 99 | true -> ok = rabbit_table:wait_for_replicated(); 100 | false -> ok 101 | end, 102 | %% del_table_copy has to be done after the force_load but is also 103 | %% usefully idempotent. 104 | [{atomic,ok} = mnesia:del_table_copy(schema, N) || N <- NodesToDelete], 105 | ok = remove_from_cluster_status(NodesToDelete), 106 | ok = stop_mnesia(), 107 | %% We had to force load in case we had to delete any schemas. But 108 | %% once we've stopped mnesia (and we have to because rabbit 109 | %% upgrades expect to find mnesia stopped), mnesia seems to forget 110 | %% that it's been force_loaded and thus should now really behave 111 | %% as if it's the master. Consequently we have to touch the 112 | %% force_load file in the mnesia dir which rabbit_mnesia then 113 | %% finds and does another force load when rabbit actually boots. 114 | ok = rabbit_file:write_file(filename:join(rabbit_mnesia:dir(), "force_load"), <<"">>), 115 | ok. 116 | 117 | configure_cluster(Nodes, MyNodeType) -> 118 | case application:load(rabbit) of 119 | ok -> ok; 120 | {error, {already_loaded, rabbit}} -> ok 121 | end, 122 | ok = application:set_env(rabbit, cluster_nodes, {Nodes, MyNodeType}). 123 | 124 | remove_from_cluster_status(Nodes) -> 125 | try 126 | {All, Disc, Running} = rabbit_node_monitor:read_cluster_status(), 127 | ok = rabbit_node_monitor:write_cluster_status( 128 | {All -- Nodes, Disc -- Nodes, Running -- Nodes}) 129 | catch 130 | {error, {corrupt_or_missing_cluster_files, _Stat, _Run}} -> 131 | ok = rabbit_node_monitor:reset_cluster_status() 132 | end. 133 | -------------------------------------------------------------------------------- /test/src/clusterer_test.erl: -------------------------------------------------------------------------------- 1 | -module(clusterer_test). 2 | 3 | -export([test/1, test/2, test_program/1]). 4 | 5 | -include("clusterer_test.hrl"). 6 | 7 | %%---------------------------------------------------------------------------- 8 | %% 9 | %% Testing the Clusterer 10 | %% 11 | %% Testing the Clusterer is challenging given the level of 12 | %% concurrency, eventually-consistent design, and the vast number of 13 | %% scenarios and modifications possible. The approach we take here is 14 | %% to deterministically generate programs which are then filtered and 15 | %% interpreted. These programs describe how to construct up to one 16 | %% cluster and operations upon the cluster. 17 | %% 18 | %% A program is a sequence of steps from a given starting state. Each 19 | %% step contains up to one instruction for each node, up to one 20 | %% instruction for modifying the cluster config, and up to one 21 | %% instruction for creating or deleting a node. All instructions are 22 | %% independent thus all the instructions within a step can be run in 23 | %% parallel. For example, a step can not contain both a "switch node X 24 | %% on" and a "delete node X" instruction. During program generation we 25 | %% capture the expected state of all the nodes at the end of each 26 | %% step. When we come to interpret the program, we use this expected 27 | %% state to compare to what we observe of the Real World. If we reach 28 | %% the end of the program and no divergence is observed then the 29 | %% program passed successfully. 30 | %% 31 | %% Program generation is driven by a seed. When the seed is reduced to 32 | %% 0, there is no more entropy available, and so program generation 33 | %% halts. At each step, the set of available instructions to choose 34 | %% from is highly dependent on the predicted state of the nodes at 35 | %% this point. Having constructed a list of viable instructions at a 36 | %% given point in the program, the seed is used to select the 37 | %% instruction. The list will always include the 'noop' instruction 38 | %% (but will never be just the 'noop' instruction), and the seed 39 | %% modulo the length of the list of valid instructions is used to 40 | %% select which instruction is chosen. The new seed is the current 41 | %% seed divided by the length of that list and is passed to the next 42 | %% stage of instruction selection. Thus the entropy of the seed is 43 | %% slowly reduced: it is consumed by selecting instructions from 44 | %% lists. This strategy means every seed will result in a unique 45 | %% program (though isomorphisms of various degrees are possible), and 46 | %% that a given seed will always generate the same program. In other 47 | %% words, the seed is equivalent to the perfect compression of the 48 | %% program (proof left as an exercise to the reader...). 49 | %% 50 | %% Program generation is heavily constrained to avoid generating 51 | %% programs where there are multiple possible valid outcomes, and to 52 | %% ensure we never have more than a single cluster running. The latter 53 | %% is essential to keep modelling of the nodes feasible. 54 | %% 55 | %% Having generated a program we then filter it to test whether or not 56 | %% in contains any interesting aspects. Currently we just look for 57 | %% programs which contain more than a single node running at any given 58 | %% point in time, but more elaborate filters are possible. Having 59 | %% selected a program to run, it is then passed to the interpreter. 60 | %% 61 | %% Some aspects of the interpreter are remarkably similar to the 62 | %% program generation itself: in the program generator we have to 63 | %% model changes to nodes and cluster configs and certainly much of 64 | %% the mechanism for making changes to the cluster configs is very 65 | %% similar in the interpreter. For nodes, we have one process per node 66 | %% to allow the possibility of changes to the nodes themselves for 67 | %% each step actually occurring in parallel. In this regard, given the 68 | %% program doesn't really specify scheduling of instructions beyond 69 | %% "concurrently", multiple different runs of the same program may 70 | %% result in different instructions being evaluated at different 71 | %% times. However, the point of the Clusterer (and the constraints of 72 | %% the program generation) is that it should be eventually consistent: 73 | %% the outcome of each step should be the independent of individual 74 | %% scheduling of instructions within a step. 75 | %% 76 | %% To detect divergence we have to be aware that a cluster of nodes 77 | %% may take a short period of time to stabilise at a new cluster 78 | %% config, to turn off, etc. The strategy we adopt is not fool-proof, 79 | %% but it'll do. We wait for all nodes to be stable in some way 80 | %% (i.e. off, reset, on or pending_shutdown, but not booting). We then 81 | %% wait a short amount of time and ask them all again for their 82 | %% state. If they're all still stable and they're all still stable in 83 | %% the same way, then we declare the cluster is stable and that we can 84 | %% actually check this state for divergence. At this point we compare 85 | %% their stable states and the cluster configs they're running with 86 | %% the state our program generation predicted for each step and fail 87 | %% if any divergence is detected. 88 | %% 89 | %%---------------------------------------------------------------------------- 90 | 91 | %% NB Limit is exclusive, not inclusive. 92 | test(Limit) when Limit > 0 -> 93 | test(0, Limit). 94 | 95 | test(From, To) when To > From -> 96 | case node() of 97 | 'nonode@nohost' -> {error, must_be_distributed_node}; 98 | Node -> {_, Host} = rabbit_nodes:parts(Node), 99 | io:format("Passed programs: ["), 100 | test_sequence(Host, To, From, 0) 101 | end. 102 | 103 | test_sequence(_Host, Limit, Limit, RanCount) -> 104 | io:format("].~n~p programs were ran and passed~n", [RanCount]), 105 | ok; 106 | test_sequence(Host, Limit, N, RanCount) -> 107 | case test_program(Host, N) of 108 | skip -> test_sequence(Host, Limit, N+1, RanCount); 109 | {_Program, ok} -> io:format("~p,", [N]), 110 | test_sequence(Host, Limit, N+1, RanCount+1); 111 | {Program, Err} -> io:format("~nError encountered with program ~p:" 112 | "~n~n~p~n~n~p~n", [N, Program, Err]), 113 | Err 114 | end. 115 | 116 | test_program(Seed) when is_integer(Seed) -> 117 | {_, Host} = rabbit_nodes:parts(node()), 118 | test_program(Host, Seed); 119 | test_program(NomadicProgram = {#state {}, Steps}) when is_list(Steps) -> 120 | {_, Host} = rabbit_nodes:parts(node()), 121 | Prog = clusterer_utils:localise_program(NomadicProgram, Host), 122 | {NomadicProgram, clusterer_interpreter:run_program(Prog)}. 123 | 124 | test_program(Host, Seed) -> 125 | NomadicProgram = clusterer_program:generate_program(new_state(Seed)), 126 | case filter_program(NomadicProgram) of 127 | skip -> skip; 128 | run -> Prog = clusterer_utils:localise_program(NomadicProgram, Host), 129 | {NomadicProgram, clusterer_interpreter:run_program(Prog)} 130 | end. 131 | 132 | %%---------------------------------------------------------------------------- 133 | 134 | new_state(Seed) -> 135 | #state { seed = Seed, 136 | node_count = 0, 137 | nodes = orddict:new(), 138 | config = #config { nodes = [], 139 | gospel = reset, 140 | version = 0 }, 141 | valid_config = undefined, 142 | active_config = undefined 143 | }. 144 | 145 | filter_program(Program) -> 146 | %% Eventually there'll be a more sophisticated set of filters here. 147 | case two_ready(Program) of 148 | true -> run; 149 | false -> skip 150 | end. 151 | 152 | two_ready({_InitialState, Steps}) -> 153 | io:format("Steps: ~p~n", [Steps]), 154 | lists:any(fun (#step { final_state = #state { nodes = Nodes } }) -> 155 | length([true || {_Name, #node { state = ready }} 156 | <- orddict:to_list(Nodes)]) > 1 157 | end, Steps). 158 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_comms.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_comms). 18 | 19 | -behaviour(gen_server). 20 | 21 | -export([start_link/0, stop/1, 22 | multi_call/3, multi_cast/3, 23 | lock_nodes/2, lock/2, unlock/2]). 24 | 25 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 26 | terminate/2, code_change/3]). 27 | 28 | -record(state, { token, locked_by, locking }). 29 | 30 | -define(TARGET, rabbit_clusterer_coordinator). 31 | 32 | %% In general the comms process exists to perform blocking calls to 33 | %% other nodes, without causing the main coordinator process to 34 | %% block. Thus the communication between the coordinator and the comms 35 | %% is always async even if the comms process goes on to do blocking 36 | %% communication with other nodes. Thus we explain the existence of 37 | %% multi_call. 38 | %% 39 | %% Once we have multi_call and we care about message arrival order, we 40 | %% have to have multi_cast too so that messages arrive in the same 41 | %% order they were sent. 42 | %% 43 | %% We also push the locking API in here. This is rather more complex 44 | %% and is only used by the rejoin transitioner, where it is also 45 | %% documented. But essentially the comms pid is the lock, and the lock 46 | %% is taken by some other pid, which the lock monitors. Should the pid 47 | %% that holds the lock die, the lock is released. 48 | 49 | start_link() -> 50 | Ref = make_ref(), 51 | {ok, Pid} = gen_server:start_link(?MODULE, [Ref], []), 52 | {ok, Pid, {Pid, Ref}}. 53 | 54 | stop({Pid, _Ref}) -> 55 | ok = gen_server:cast(Pid, stop). 56 | 57 | multi_call(Nodes, Msg, {Pid, _Ref}) -> 58 | %% We do a cast, not a call, so that the caller doesn't block - 59 | %% the result gets sent back async. This is essential to avoid a 60 | %% potential deadlock. 61 | ok = gen_server:cast(Pid, {multi_call, self(), Nodes, Msg}). 62 | 63 | multi_cast(Nodes, Msg, {Pid, _Ref}) -> 64 | %% Reason for doing this is to ensure that both abcasts and 65 | %% multi_calls originate from the same process and so will be 66 | %% received in the same order as they're sent. 67 | ok = gen_server:cast(Pid, {multi_cast, Nodes, Msg}). 68 | 69 | %% public api 70 | lock_nodes(Nodes = [_|_], {Pid, _Ref}) -> 71 | ok = gen_server:cast(Pid, {lock_nodes, self(), Nodes}). 72 | 73 | %% passed through from coordinator 74 | lock(Locker, {Pid, _Ref}) -> 75 | ok = gen_server:cast(Pid, {lock, Locker}). 76 | 77 | %% passed through from coordinator 78 | unlock(Locker, {Pid, _Ref}) -> 79 | ok = gen_server:cast(Pid, {unlock, Locker}). 80 | 81 | %%---------------------------------------------------------------------------- 82 | 83 | init([Ref]) -> 84 | {ok, #state { token = {self(), Ref}, 85 | locked_by = undefined, 86 | locking = undefined }}. 87 | 88 | handle_call(Msg, From, State) -> 89 | {stop, {unhandled_call, Msg, From}, State}. 90 | 91 | handle_cast({multi_call, ReplyTo, Nodes, Msg}, 92 | State = #state { token = Token }) -> 93 | %% 'infinity' does not cause it to wait for badnodes to become 94 | %% good. 95 | Result = gen_server:multi_call(Nodes, ?TARGET, Msg, infinity), 96 | gen_server:cast(ReplyTo, {comms, Token, Result}), 97 | {noreply, State}; 98 | 99 | handle_cast({multi_cast, Nodes, Msg}, State) -> 100 | abcast = gen_server:abcast(Nodes, ?TARGET, Msg), 101 | {noreply, State}; 102 | 103 | handle_cast({lock_nodes, ReplyTo, Nodes}, 104 | State = #state { locking = undefined }) -> 105 | true = lists:member(node(), Nodes), %% ASSERTION 106 | %% Of course, all of this has to be async too... 107 | [First|_] = SortedNodes = lists:usort(Nodes), 108 | [erlang:monitor(process, {?TARGET, N}) || N <- SortedNodes], 109 | gen_server:cast({?TARGET, First}, {lock, self()}), 110 | {noreply, State #state { locking = {[], SortedNodes, ReplyTo} }}; 111 | 112 | handle_cast({lock_ok, Node}, 113 | State = #state { locking = {_Locked, [Node], ReplyTo}, 114 | token = Token }) -> 115 | gen_server:cast(ReplyTo, {comms, Token, lock_ok}), 116 | {noreply, State #state { locking = undefined }}; 117 | handle_cast({lock_ok, Node}, 118 | State = #state { locking = {Locked, [Node,Next|ToLock], ReplyTo} }) -> 119 | gen_server:cast({?TARGET, Next}, {lock, self()}), 120 | {noreply, State #state { locking = {[Node|Locked], [Next|ToLock], ReplyTo} }}; 121 | 122 | handle_cast({lock_rejected, Node}, 123 | State = #state { locking = {Locked, [Node|_ToLock], ReplyTo}, 124 | token = Token }) -> 125 | gen_server:cast(ReplyTo, {comms, Token, lock_rejected}), 126 | abcast = gen_server:abcast(Locked, ?TARGET, {unlock, self()}), 127 | {noreply, State #state { locking = undefined }}; 128 | 129 | handle_cast({lock, Locker}, State = #state { locked_by = undefined }) -> 130 | gen_server:cast(Locker, {lock_ok, node()}), 131 | erlang:monitor(process, Locker), 132 | {noreply, State #state { locked_by = Locker }}; 133 | handle_cast({lock, Locker}, State) -> 134 | gen_server:cast(Locker, {lock_rejected, node()}), 135 | {noreply, State}; 136 | 137 | handle_cast({unlock, Locker}, State = #state { locked_by = Locker }) -> 138 | {noreply, State #state { locked_by = undefined }}; 139 | handle_cast({unlock, _Locker}, State) -> 140 | %% Potential race between the DOWN and the unlock might well mean 141 | %% that the DOWN gets here first, thus we unlock ourselves. At 142 | %% that point we're free to be locked by someone else. Later on, 143 | %% the unlock from the DOWN'd process gets here. Thus we don't 144 | %% attempt to make any assertions about only receiving an unlock 145 | %% from X when locked by X. Also, this could be an unlock coming 146 | %% from a remote node which was originally for a lock held by an 147 | %% older comms which has since been killed off. 148 | {noreply, State}; 149 | 150 | handle_cast(stop, State) -> 151 | {stop, normal, State}; 152 | 153 | handle_cast(Msg, State) -> 154 | {stop, {unhandled_cast, Msg}, State}. 155 | 156 | 157 | handle_info({'DOWN', _MRef, process, {?TARGET, Node}, _Info}, 158 | State = #state { locking = Locking, token = Token }) -> 159 | %% This DOWN must be from some node we're trying to lock. 160 | Locking1 = case Locking of 161 | undefined -> 162 | Locking; 163 | {_Locked, [Node], ReplyTo} -> 164 | gen_server:cast(ReplyTo, {comms, Token, lock_ok}), 165 | undefined; 166 | {Locked, [Node,Next|ToLock], ReplyTo} -> 167 | gen_server:cast({?TARGET, Next}, {lock, self()}), 168 | {[Node|Locked], [Next|ToLock], ReplyTo}; 169 | {Locked, ToLock, ReplyTo} -> 170 | {Locked -- [Node], ToLock -- [Node], ReplyTo} 171 | end, 172 | {noreply, State # state { locking = Locking1 }}; 173 | handle_info({'DOWN', _MRef, process, Pid, _Info}, 174 | State = #state { locked_by = Pid }) -> 175 | {noreply, State #state { locked_by = undefined }}; 176 | handle_info({'DOWN', _MRef, process, _Pid, _Info}, State) -> 177 | {noreply, State}; 178 | handle_info(Msg, State) -> 179 | {stop, {unhandled_info, Msg}, State}. 180 | 181 | terminate(_Reason, _State) -> 182 | ok. 183 | 184 | code_change(_OldVsn, State, _Extra) -> 185 | {ok, State}. 186 | -------------------------------------------------------------------------------- /test/src/clusterer_node.erl: -------------------------------------------------------------------------------- 1 | -module(clusterer_node). 2 | 3 | -export([observe_stable_state/1, 4 | start_link/2, delete/1, 5 | reset/1, start/1, start_with_config/2, apply_config/2, stop/1, 6 | exit/1]). 7 | 8 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 9 | terminate/2, code_change/3]). 10 | 11 | -include("clusterer_test.hrl"). 12 | 13 | -record(node_state, { name, name_str, port }). 14 | 15 | -define(IS_NODE_OFF(R), R =:= noconnection; R =:= nodedown; R =:= noproc). 16 | -define(SLEEP, timer:sleep(250)). 17 | 18 | %%---------------------------------------------------------------------------- 19 | 20 | observe_stable_state([]) -> 21 | {stable, orddict:new()}; 22 | observe_stable_state(Pids) -> 23 | Self = self(), 24 | Ref = make_ref(), 25 | [gen_server:cast(Pid, {stable_state, Ref, Self}) || Pid <- Pids], 26 | Results = [receive 27 | {stable_state, Ref, Name, Result} -> 28 | {Name, Result} 29 | end || _Pid <- Pids], 30 | case [Name || {Name, false} <- Results] of 31 | [] -> {stable, orddict:from_list(lists:usort(Results))}; 32 | _ -> not_stable 33 | end. 34 | 35 | %%---------------------------------------------------------------------------- 36 | 37 | start_link(Name, Port) when is_atom(Name) andalso is_integer(Port) -> 38 | gen_server:start_link(?MODULE, [Name, Port], []). 39 | 40 | delete(Pid) -> gen_server:cast(Pid, delete). 41 | 42 | reset(Pid) -> gen_server:cast(Pid, reset). 43 | 44 | start(Pid) -> gen_server:cast(Pid, start). 45 | 46 | start_with_config(Pid, Config) -> 47 | gen_server:cast(Pid, {start_with_config, Config}). 48 | 49 | apply_config(Pid, Config) -> gen_server:cast(Pid, {apply_config, Config}). 50 | 51 | stop(Pid) -> gen_server:cast(Pid, stop). 52 | 53 | exit(Pid) -> gen_server:call(Pid, exit, infinity). 54 | 55 | %%---------------------------------------------------------------------------- 56 | 57 | init([Name, Port]) -> 58 | State = #node_state { name = Name, 59 | name_str = atom_to_list(Name), 60 | port = rabbit_misc:format("~p", [Port]) }, 61 | pang = net_adm:ping(Name), %% ASSERTION 62 | ok = clean_db(State), 63 | {ok, State}. 64 | 65 | handle_call(exit, _From, State = #node_state { name = Name }) -> 66 | ok = run_cmd("stop-node", State), 67 | ok = await_death(Name), 68 | ok = clean_db(State), 69 | {stop, normal, ok, State}; 70 | handle_call(Msg, From, State) -> 71 | {stop, {unhandled_call, Msg, From}, State}. 72 | 73 | handle_cast(delete, State = #node_state { name = Name }) -> 74 | pang = net_adm:ping(Name), %% ASSERTION 75 | ok = clean_db(State), 76 | {stop, normal, State}; 77 | handle_cast(reset, State = #node_state { name = Name }) -> 78 | pang = net_adm:ping(Name), %% ASSERTION 79 | ok = clean_db(State), 80 | {noreply, State}; 81 | handle_cast(start, State = #node_state { name = Name }) -> 82 | pang = net_adm:ping(Name), %% ASSERTION 83 | ok = run_bg_cmd("run", "-noinput", State), 84 | ok = await_life(Name), 85 | {noreply, State}; 86 | handle_cast(stop, State = #node_state { name = Name }) -> 87 | pong = net_adm:ping(Name), 88 | ok = run_cmd("stop-node", State), 89 | ok = await_death(Name), 90 | {noreply, State}; 91 | handle_cast({start_with_config, Config}, 92 | State = #node_state { name = Name, name_str = NameStr }) -> 93 | ok = store_external_cluster_config(NameStr, Config), 94 | ok = run_bg_cmd("run", "-rabbitmq_clusterer config \\\\\\\"" ++ 95 | external_config_file(NameStr) ++ "\\\\\\\" -noinput", 96 | State), 97 | ok = await_life(Name), 98 | {noreply, State}; 99 | handle_cast({apply_config, Config}, 100 | State = #node_state { name = Name, name_str = NameStr }) -> 101 | pong = net_adm:ping(Name), 102 | ok = store_external_cluster_config(NameStr, Config), 103 | ok = ctl("eval 'rabbit_clusterer:apply_config(\"" ++ 104 | external_config_file(NameStr) ++ "\").'", State), 105 | {noreply, State}; 106 | handle_cast({stable_state, Ref, From}, 107 | State = #node_state { name = Name, name_str = NameStr}) -> 108 | Result = 109 | try 110 | case rabbit_clusterer_coordinator:request_status(Name) of 111 | preboot -> false; 112 | {Config, {transitioner, _}} -> {ready, convert(Config)}; 113 | {_Config, booting} -> false; 114 | { Config, ready} -> {ready, convert(Config)} 115 | end 116 | catch 117 | exit:{R, _} when ?IS_NODE_OFF(R) -> 118 | case is_reset(NameStr) of 119 | true -> reset; 120 | false -> off 121 | end; 122 | exit:{{R, _}, _} when ?IS_NODE_OFF(R) -> 123 | case is_reset(NameStr) of 124 | true -> reset; 125 | false -> off 126 | end; 127 | _Class:_Reason -> 128 | false 129 | end, 130 | From ! {stable_state, Ref, Name, Result}, 131 | {noreply, State}; 132 | handle_cast(Msg, State) -> 133 | {stop, {unhandled_cast, Msg}, State}. 134 | 135 | handle_info(Msg, State) -> 136 | {stop, {unhandled_info, Msg}, State}. 137 | 138 | terminate(_Reason, _State) -> 139 | ok. 140 | 141 | code_change(_OldVsn, State, _Extra) -> 142 | {ok, State}. 143 | 144 | await_death(Name) -> 145 | await(Name, pong, pang). 146 | 147 | await_life(Name) -> 148 | await(Name, pang, pong). 149 | 150 | await(Name, Again, Return) -> 151 | case net_adm:ping(Name) of 152 | Again -> ?SLEEP, 153 | await(Name, Again, Return); 154 | Return -> ok 155 | end. 156 | 157 | convert(ClustererConfig) -> 158 | Version = rabbit_clusterer_config:version(ClustererConfig), 159 | NodesNames = rabbit_clusterer_config:nodenames(ClustererConfig), 160 | DiscNodeNames = rabbit_clusterer_config:disc_nodenames(ClustererConfig), 161 | RamNodeNames = NodesNames -- DiscNodeNames, 162 | Gospel = rabbit_clusterer_config:gospel(ClustererConfig), 163 | #config { version = Version, 164 | nodes = orddict:from_list( 165 | [{Name, disc} || Name <- DiscNodeNames] ++ 166 | [{Name, ram} || Name <- RamNodeNames]), 167 | gospel = Gospel }. 168 | 169 | makefile_dir() -> 170 | filename:join(filename:dirname(code:which(rabbit)), ".."). 171 | 172 | mnesia_dir(NameStr) when is_list(NameStr) -> 173 | DirName = "rabbitmq-" ++ NameStr ++ "-mnesia", 174 | case {os:getenv("RABBITMQ_MNESIA_DIR"), os:getenv("TMPDIR")} of 175 | {false, false } -> filename:join("/tmp", DirName); 176 | {false, TmpDir} -> filename:join(TmpDir, DirName); 177 | {Dir, _ } -> Dir 178 | end. 179 | 180 | is_reset(NameStr) when is_list(NameStr) -> 181 | Dir = mnesia_dir(NameStr), 182 | case file:list_dir(Dir) of 183 | {error, enoent} -> true; 184 | {ok, [] } -> true; 185 | {ok, _ } -> false 186 | end. 187 | 188 | external_config_file(NameStr) when is_list(NameStr) -> 189 | mnesia_dir(NameStr) ++ "-external-cluster.config". 190 | 191 | ctl(Action, #node_state { name_str = NameStr }) -> 192 | Cmd = lists:flatten([filename:join(makefile_dir(), "scripts/rabbitmqctl"), 193 | " -n '", 194 | NameStr, "' ", Action, " ; echo $?"]), 195 | Res = os:cmd(Cmd), 196 | LastLine = hd(lists:reverse(string:tokens(Res, "\n"))), 197 | "0" = LastLine, %% ASSERTION 198 | ok. 199 | 200 | run_cmd(Action, #node_state { name_str = NameStr, port = Port }) -> 201 | Cmd = lists:flatten(["RABBITMQ_NODENAME=", 202 | NameStr, 203 | " RABBITMQ_NODE_PORT=", 204 | Port, 205 | " ", 206 | os:getenv("MAKE"), 207 | " -C ", 208 | makefile_dir(), 209 | " ", 210 | Action, 211 | " ; echo $?"]), 212 | Res = os:cmd(Cmd), 213 | LastLine = hd(lists:reverse(string:tokens(Res, "\n"))), 214 | "0" = LastLine, %% ASSERTION 215 | ok. 216 | 217 | run_bg_cmd(Action, StartArgs, #node_state { name_str = NameStr, port = Port }) -> 218 | Log = mnesia_dir(NameStr), 219 | Cmd = lists:flatten(["RABBITMQ_NODENAME=", 220 | NameStr, 221 | " RABBITMQ_NODE_PORT=", 222 | Port, 223 | " RABBITMQ_SERVER_START_ARGS=\"", 224 | StartArgs, 225 | "\" setsid ", 226 | os:getenv("MAKE"), 227 | " -C ", 228 | makefile_dir(), 229 | " ", 230 | Action, 231 | " 1>> ", 232 | Log, 233 | "-stdout.log 2>> ", 234 | Log, 235 | "-stderr.log &"]), 236 | os:cmd(Cmd), 237 | ok. 238 | 239 | clean_db(State = #node_state { name_str = NameStr }) -> 240 | ok = run_cmd("cleandb", State), 241 | case file:delete(mnesia_dir(NameStr) ++ "-cluster.config") of 242 | ok -> ok; 243 | {error, enoent} -> ok; 244 | Err -> Err 245 | end. 246 | 247 | store_external_cluster_config(NameStr, Config) when is_list(NameStr) -> 248 | ok = rabbit_file:write_term_file(external_config_file(NameStr), 249 | [to_proplist(Config)]). 250 | 251 | field_fold(Fun, Init) -> 252 | {_Pos, Res} = lists:foldl(fun (FieldName, {Pos, Acc}) -> 253 | {Pos + 1, Fun(FieldName, Pos, Acc)} 254 | end, {2, Init}, record_info(fields, config)), 255 | Res. 256 | 257 | to_proplist(Config = #config {}) -> 258 | field_fold(fun (FieldName, Pos, ProplistN) -> 259 | [{FieldName, element(Pos, Config)} | ProplistN] 260 | end, []). 261 | -------------------------------------------------------------------------------- /test/src/clusterer_interpreter.erl: -------------------------------------------------------------------------------- 1 | -module(clusterer_interpreter). 2 | 3 | -export([run_program/1]). 4 | 5 | -include("clusterer_test.hrl"). 6 | 7 | -define(SLEEP, timer:sleep(500)). 8 | 9 | %%---------------------------------------------------------------------------- 10 | 11 | run_program({InitialState, Steps}) -> 12 | run_program(Steps, InitialState). 13 | 14 | run_program([], FinalState) -> 15 | ok = tidy(FinalState), 16 | ok; 17 | run_program([Step | Steps], InitialState) -> 18 | PredictedState = Step #step.final_state, 19 | AchievedState = (run_step(Step #step { final_state = InitialState }) 20 | ) #step.final_state, 21 | case check_convergence(PredictedState, AchievedState) of 22 | ok -> 23 | case compare_state(AchievedState, 24 | observe_stable_state(AchievedState)) of 25 | {ok, ObservedState} -> run_program(Steps, ObservedState); 26 | E1 -> E1 27 | end; 28 | {error, E2} -> 29 | ok = tidy(AchievedState), 30 | {error, E2, Step} 31 | end. 32 | 33 | run_step(Step) -> 34 | run_modify_config(run_existential_node(run_modify_nodes(Step))). 35 | 36 | tidy(#state { nodes = Nodes }) -> 37 | [clusterer_node:exit(Pid) 38 | || {_Name, #node { pid = Pid }} <- orddict:to_list(Nodes)], 39 | ok. 40 | 41 | %%---------------------------------------------------------------------------- 42 | 43 | check_convergence(#state { nodes = NodesPred, 44 | config = Config, 45 | valid_config = VConfig, 46 | active_config = AConfig }, 47 | #state { nodes = NodesAchi, 48 | config = Config, 49 | valid_config = VConfig, 50 | active_config = AConfig }) -> 51 | %% Configs should just match exactly. Nodes will differ only in 52 | %% that Achi will have pids 53 | case {orddict:fetch_keys(NodesPred), orddict:fetch_keys(NodesAchi)} of 54 | {Eq, Eq} -> 55 | orddict:fold( 56 | fun (_Name, _Node, {error, _} = Err) -> 57 | Err; 58 | (Name, #node { name = Name, state = StateAchi }, ok) -> 59 | #node { name = Name, state = StatePred } = 60 | orddict:fetch(Name, NodesPred), 61 | case {StatePred, StateAchi} of 62 | {EqSt, EqSt} -> ok; 63 | {_, _ } -> 64 | {error, {node_state_divergence, Name, 65 | StateAchi, StatePred}} 66 | end 67 | end, ok, NodesAchi); 68 | {Pr, Ac} -> 69 | {error, {node_divergence, Pr, Ac}} 70 | end; 71 | check_convergence(Pred, Achi) -> 72 | {error, {config_divergence, Pred, Achi}}. 73 | 74 | observe_stable_state(State = #state { nodes = Nodes }) -> 75 | Pids = [Pid || {_Name, #node { pid = Pid }} <- orddict:to_list(Nodes)], 76 | case clusterer_node:observe_stable_state(Pids) of 77 | {stable, S} -> ?SLEEP, %% always sleep, just to allow some time 78 | case clusterer_node:observe_stable_state(Pids) of 79 | {stable, S} -> S; %% No one has changed, all good. 80 | _ -> observe_stable_state(State) 81 | end; 82 | _ -> ?SLEEP, 83 | observe_stable_state(State) 84 | end. 85 | 86 | compare_state(State = #state { nodes = Nodes, 87 | active_config = AConfig }, StableState) -> 88 | case {orddict:fetch_keys(Nodes), orddict:fetch_keys(Nodes)} of 89 | {Eq, Eq} -> 90 | Result = 91 | orddict:fold( 92 | fun (_Name, _Node, {error, _} = Err) -> 93 | Err; 94 | (Name, Node = #node { name = Name, state = NS }, Acc) -> 95 | Observed = orddict:fetch(Name, StableState), 96 | case {NS, Observed} of 97 | {off, off} -> 98 | orddict:store(Name, Node, Acc); 99 | {reset, reset} -> 100 | orddict:store(Name, Node, Acc); 101 | {ready, {ready, AConfig}} -> 102 | orddict:store(Name, Node, Acc); 103 | {_, _} = DivergenceSt -> 104 | {error, {node_state_divergence, DivergenceSt}} 105 | end 106 | end, orddict:new(), Nodes), 107 | case Result of 108 | {error, _} = Err -> Err; 109 | Nodes1 -> {ok, State #state { nodes = Nodes1 }} 110 | end; 111 | {_, _} = DivergenceNodes -> 112 | {error, {nodes_divergence, DivergenceNodes}} 113 | end. 114 | 115 | %%---------------------------------------------------------------------------- 116 | 117 | run_modify_nodes(Step = #step { modify_node_instrs = Instrs, 118 | final_state = State }) -> 119 | State1 = lists:foldr(fun run_modify_node_instr/2, State, Instrs), 120 | Step #step { final_state = State1 }. 121 | 122 | run_modify_node_instr(noop, State) -> 123 | State; 124 | run_modify_node_instr({reset_node, Name}, State = #state { nodes = Nodes }) -> 125 | Node = #node { state = off, pid = Pid } = orddict:fetch(Name, Nodes), 126 | ok = clusterer_node:reset(Pid), 127 | clusterer_utils:store_node(Node #node { state = reset }, State); 128 | run_modify_node_instr({start_node, Name}, 129 | State = #state { nodes = Nodes, 130 | active_config = AConfig }) -> 131 | Node = #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes), 132 | true = NS =:= off orelse NS =:= reset, %% ASSERTION 133 | ok = clusterer_node:start(Pid), 134 | clusterer_utils:store_node(clusterer_utils:set_node_state( 135 | Node #node { state = ready }, AConfig), State); 136 | run_modify_node_instr({start_node_with_config, Name, VConfig}, 137 | State = #state { nodes = Nodes, 138 | valid_config = VConfig }) -> 139 | Node = #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes), 140 | true = NS =:= off orelse NS =:= reset, %% ASSERTION 141 | ok = clusterer_node:start_with_config(Pid, VConfig), 142 | clusterer_utils:make_config_active( 143 | clusterer_utils:store_node(Node #node { state = ready }, State)); 144 | run_modify_node_instr({apply_config_to_node, Name, VConfig}, 145 | State = #state { nodes = Nodes, 146 | valid_config = VConfig }) -> 147 | Node = #node { state = ready, pid = Pid } = orddict:fetch(Name, Nodes), 148 | ok = clusterer_node:apply_config(Pid, VConfig), 149 | clusterer_utils:make_config_active(clusterer_utils:store_node(Node, State)); 150 | run_modify_node_instr({stop_node, Name}, State = #state { nodes = Nodes }) -> 151 | Node = #node { state = ready, pid = Pid } = orddict:fetch(Name, Nodes), 152 | ok = clusterer_node:stop(Pid), 153 | clusterer_utils:store_node(Node #node { state = off }, State). 154 | 155 | %%---------------------------------------------------------------------------- 156 | 157 | run_existential_node(Step = #step { existential_node_instr = Instr, 158 | final_state = State }) -> 159 | State1 = run_existential_node_instr(Instr, State), 160 | Step #step { final_state = State1 }. 161 | 162 | run_existential_node_instr(noop, State) -> 163 | State; 164 | run_existential_node_instr({create_node, Name, Port}, 165 | State = #state { nodes = Nodes }) -> 166 | false = orddict:is_key(Name, Nodes), %% ASSERTION 167 | {ok, Pid} = clusterer_node:start_link(Name, Port), 168 | Nodes1 = orddict:store(Name, #node { name = Name, 169 | port = Port, 170 | state = reset, 171 | pid = Pid }, Nodes), 172 | State #state { nodes = Nodes1 }; 173 | run_existential_node_instr({delete_node, Name}, 174 | State = #state { nodes = Nodes }) -> 175 | #node { state = NS, pid = Pid } = orddict:fetch(Name, Nodes), 176 | true = NS =:= reset orelse NS =:= off, %% ASSERTION 177 | ok = clusterer_node:delete(Pid), 178 | State #state { nodes = orddict:erase(Name, Nodes) }. 179 | 180 | %%---------------------------------------------------------------------------- 181 | 182 | run_modify_config(Step = #step { modify_config_instr = Instr, 183 | final_state = State }) -> 184 | State1 = run_modify_config_instr(Instr, State), 185 | Step #step { final_state = State1 }. 186 | 187 | run_modify_config_instr(noop, State) -> 188 | State; 189 | run_modify_config_instr({config_version_to, V}, 190 | State = #state { config = Config = 191 | #config { version = V1 } }) 192 | when V > V1 -> 193 | clusterer_utils:set_config(Config #config { version = V }, State); 194 | run_modify_config_instr({config_gospel_to, V}, 195 | State = #state { config = Config = 196 | #config { gospel = V1 } }) 197 | when V =/= V1 -> 198 | clusterer_utils:set_config(Config #config { gospel = V }, State); 199 | run_modify_config_instr({config_add_node, Name}, 200 | State = #state { nodes = Nodes, 201 | config = Config = 202 | #config { nodes = ConfigNodes } }) -> 203 | true = orddict:is_key(Name, Nodes), %% ASSERTION 204 | false = orddict:is_key(Name, ConfigNodes), %% ASSERTION 205 | ConfigNodes1 = orddict:store(Name, disc, ConfigNodes), 206 | clusterer_utils:set_config(Config #config { nodes = ConfigNodes1 }, State); 207 | run_modify_config_instr({config_remove_node, Name}, 208 | State = #state { config = Config = 209 | #config { nodes = ConfigNodes, 210 | gospel = Gospel } }) -> 211 | %% We allow nodes to be exterminated even when they're in the 212 | %% Config. We only require them to be off/reset. So no assertion 213 | %% for Name in keys(Nodes). 214 | true = Gospel =/= {node, Name}, %% ASSERTION 215 | true = orddict:is_key(Name, ConfigNodes), %% ASSERTION 216 | ConfigNodes1 = orddict:erase(Name, ConfigNodes), 217 | clusterer_utils:set_config(Config #config { nodes = ConfigNodes1 }, State). 218 | -------------------------------------------------------------------------------- /test/src/clusterer_program.erl: -------------------------------------------------------------------------------- 1 | -module(clusterer_program). 2 | 3 | -export([generate_program/1]). 4 | 5 | -include("clusterer_test.hrl"). 6 | 7 | -define(BASE_PORT, 10000). 8 | 9 | %%---------------------------------------------------------------------------- 10 | 11 | generate_program(InitialState = #state {}) -> 12 | {InitialState, generate_steps([], InitialState)}. 13 | 14 | generate_steps(Steps, #state { seed = 0 }) -> 15 | lists:reverse(Steps); 16 | generate_steps(Steps, State) -> 17 | Step = generate_step(State), 18 | generate_steps([Step | Steps], Step #step.final_state). 19 | 20 | generate_step(State) -> 21 | %% We want to avoid any dependencies between instructions within a 22 | %% step - i.e. they must all be able to be exec'd in parallel. To 23 | %% enforce that we generate the instructions in a particular 24 | %% order: 1) modify an existing node; 2) modify config; 3) create 25 | %% new node. However, "create new node" can also include "delete 26 | %% node" and we need to ensure that if we delete a node it is not 27 | %% also used in another instruction in the same step. Thus we do 28 | %% the existential instruction first, but it can result in a 29 | %% "delayed" instruction for creation that is exec'd at the end. 30 | Step = #step { modify_node_instrs = [], 31 | modify_config_instr = noop, 32 | existential_node_instr = noop, 33 | final_state = State }, 34 | Step1 = step_if_seed(fun generate_existential_node_instructions/1, Step), 35 | Step2 = step_if_seed(fun generate_modify_node_instructions/1, Step1), 36 | Step3 = step_if_seed(fun generate_modify_config_instructions/1, Step2), 37 | eval_delayed_existential_instruction(Step3). 38 | 39 | generate_modify_node_instructions( 40 | Step = #step { final_state = State = #state { nodes = Nodes } }) -> 41 | {NodeInstrs, State1} = 42 | orddict:fold( 43 | fun (_Name, _Node, {Instrs, StateN = #state { seed = 0 }}) -> 44 | {Instrs, StateN}; 45 | (Name, Node = #node { name = Name }, {Instrs, StateN}) -> 46 | {NodeInstrFun, StateN1} = 47 | choose_one_noop2( 48 | lists:flatten( 49 | modify_node_instructions(Node, StateN)), StateN), 50 | {NodeInstr, StateN2} = NodeInstrFun(Node, StateN1), 51 | {[NodeInstr | Instrs], StateN2} 52 | end, {[], State}, Nodes), 53 | Step #step { modify_node_instrs = NodeInstrs, final_state = State1 }. 54 | 55 | generate_modify_config_instructions( 56 | Step = #step { final_state = State = #state { nodes = Nodes, 57 | config = Config } }) -> 58 | #config { nodes = ConfigNodes, gospel = Gospel } = Config, 59 | {InstrFun, State1} = 60 | choose_one_noop1( 61 | lists:flatten([fun update_version_instr/1, 62 | case ConfigNodes of 63 | [] -> []; 64 | _ -> fun change_gospel_instr/1 65 | end, 66 | case orddict:size(Nodes) > orddict:size(ConfigNodes) of 67 | true -> [fun add_node_to_config_instr/1]; 68 | false -> [] 69 | end, 70 | case orddict:size(ConfigNodes) > 0 andalso 71 | [Gospel] =/= 72 | [{node,N} || N <- orddict:fetch_keys(ConfigNodes)] 73 | of 74 | true -> [fun remove_node_from_config_instr/1]; 75 | false -> [] 76 | end]), State), 77 | step_if_seed(fun (Step1 = #step { final_state = State2 }) -> 78 | {ModifyConfigInstr, State3} = InstrFun(State2), 79 | Step1 #step { modify_config_instr = ModifyConfigInstr, 80 | final_state = State3 } 81 | end, Step #step { final_state = State1 }). 82 | 83 | generate_existential_node_instructions( 84 | Step = #step { final_state = State = #state { nodes = Nodes }}) -> 85 | {InstrFun, State1} = 86 | choose_one_noop1( 87 | lists:flatten( 88 | [fun create_node_fun_instr/1, %% this one is delayed 89 | case orddict:size( %% can only delete if we have some off nodes 90 | orddict:filter(fun (_Name, #node { state = NS }) -> 91 | NS =:= reset orelse NS =:= off 92 | end, Nodes)) of 93 | 0 -> []; 94 | _ -> [fun delete_node_instr/1] 95 | end]), State), 96 | step_if_seed(fun (Step1 = #step { final_state = State2 }) -> 97 | {ExistNodeInstr, State3} = InstrFun(State2), 98 | Step1 #step { existential_node_instr = ExistNodeInstr, 99 | final_state = State3 } 100 | end, Step #step { final_state = State1 }). 101 | 102 | %%---------------------------------------------------------------------------- 103 | 104 | modify_node_instructions(#node { name = Name, state = off }, 105 | State = #state { valid_config = VConfig, 106 | active_config = AConfig }) -> 107 | %% To keep life simpler, we only allow starting a node with the 108 | %% new config if the new config uses the node. 109 | [fun reset_node_instr/2, 110 | case is_config_active(State) of 111 | true -> [fun start_node_instr/2]; 112 | false -> [] 113 | end, 114 | case clusterer_utils:contains_node(Name, VConfig) andalso 115 | AConfig =/= VConfig of 116 | true -> [fun start_node_with_config_instr/2]; 117 | false -> [] 118 | end]; 119 | modify_node_instructions(#node { name = Name, state = reset }, 120 | State = #state { valid_config = VConfig, 121 | active_config = AConfig }) -> 122 | [case clusterer_utils:contains_node(Name, VConfig) andalso 123 | AConfig =/= VConfig of 124 | true -> [fun start_node_with_config_instr/2]; 125 | false -> [] 126 | end, 127 | case is_config_active(State) andalso 128 | clusterer_utils:contains_node(Name, AConfig) of 129 | true -> [fun start_node_instr/2]; 130 | false -> [] 131 | end]; 132 | modify_node_instructions(#node { state = ready }, 133 | #state { valid_config = VConfig, 134 | active_config = AConfig }) -> 135 | [fun stop_node_instr/2, 136 | case VConfig of 137 | #config {} when VConfig =/= AConfig -> [fun apply_config_instr/2]; 138 | _ -> [] 139 | end]. 140 | 141 | %%---------------------------------------------------------------------------- 142 | 143 | update_version_instr( 144 | State = #state { config = Config = #config { version = V } }) -> 145 | Config1 = Config #config { version = V + 1 }, 146 | {{config_version_to, V + 1}, 147 | clusterer_utils:set_config(Config1, State)}. 148 | 149 | change_gospel_instr( 150 | State = #state { config = Config = #config { nodes = Nodes, 151 | gospel = Gospel } }) -> 152 | Values = [reset | [{node, N} || N <- orddict:fetch_keys(Nodes)]], 153 | {Value, State1} = choose_one([V || V <- Values, V =/= Gospel], State), 154 | Config1 = Config #config { gospel = Value }, 155 | {{config_gospel_to, Value}, 156 | clusterer_utils:set_config(Config1, State1)}. 157 | 158 | add_node_to_config_instr(State = #state { config = Config = 159 | #config { nodes = ConfigNodes }, 160 | nodes = Nodes }) -> 161 | Values = [V || V <- orddict:fetch_keys(Nodes), 162 | not orddict:is_key(V, ConfigNodes)], 163 | {Value, State1} = choose_one(Values, State), 164 | Config1 = 165 | Config #config { nodes = orddict:store(Value, disc, ConfigNodes) }, 166 | {{config_add_node, Value}, 167 | clusterer_utils:set_config(Config1, State1)}. 168 | 169 | remove_node_from_config_instr( 170 | State = #state { config = Config = #config { nodes = Nodes, 171 | gospel = Gospel } }) -> 172 | Values = [N || N <- orddict:fetch_keys(Nodes), {node, N} =/= Gospel], 173 | {Value, State1} = choose_one(Values, State), 174 | Config1 = Config #config { nodes = orddict:erase(Value, Nodes) }, 175 | {{config_remove_node, Value}, 176 | clusterer_utils:set_config(Config1, State1)}. 177 | 178 | %%---------------------------------------------------------------------------- 179 | 180 | create_node_fun_instr(State) -> 181 | {{delayed, 182 | fun (State1) -> 183 | {Name, Port, State2 = #state { nodes = Nodes }} = 184 | generate_name_port(State1), 185 | Node = #node { name = Name, 186 | port = Port, 187 | pid = undefined, 188 | state = reset }, 189 | {{create_node, Name, Port}, 190 | State2 #state { nodes = orddict:store(Name, Node, Nodes) }} 191 | end}, State}. 192 | 193 | delete_node_instr(State = #state { nodes = Nodes }) -> 194 | Names = orddict:fetch_keys( 195 | orddict:filter(fun (_Name, #node { state = NS }) -> 196 | NS =:= reset orelse NS =:= off 197 | end, Nodes)), 198 | {Name, State1} = choose_one(Names, State), 199 | {{delete_node, Name}, State1 #state { nodes = orddict:erase(Name, Nodes) }}. 200 | 201 | %%---------------------------------------------------------------------------- 202 | 203 | reset_node_instr(Node = #node { name = Name, state = off }, State) -> 204 | {{reset_node, Name}, 205 | clusterer_utils:store_node(Node #node { state = reset }, State)}. 206 | 207 | start_node_instr(Node = #node { name = Name, state = NS }, 208 | State = #state { active_config = AConfig }) 209 | when NS =:= off orelse NS =:= reset -> 210 | {{start_node, Name}, 211 | clusterer_utils:store_node( 212 | clusterer_utils:set_node_state( 213 | Node #node { state = ready }, AConfig), State)}. 214 | 215 | start_node_with_config_instr(Node = #node { name = Name, state = NS }, 216 | State = #state { valid_config = VConfig }) 217 | when NS =:= off orelse NS =:= reset -> 218 | {{start_node_with_config, Name, VConfig}, 219 | clusterer_utils:make_config_active( 220 | clusterer_utils:store_node(Node #node { state = ready }, State))}. 221 | 222 | apply_config_instr(#node { name = Name }, 223 | State = #state { valid_config = VConfig }) -> 224 | {{apply_config_to_node, Name, VConfig}, 225 | clusterer_utils:make_config_active(State)}. 226 | 227 | stop_node_instr(Node = #node { name = Name }, State) -> 228 | {{stop_node, Name}, 229 | clusterer_utils:store_node(Node #node { state = off }, State)}. 230 | 231 | %%---------------------------------------------------------------------------- 232 | 233 | is_config_active(#state { active_config = undefined }) -> 234 | false; 235 | is_config_active(#state { nodes = Nodes, 236 | active_config = #config { nodes = ConfigNodes } }) -> 237 | [] =/= orddict:filter( 238 | fun (Name, _Disc) -> 239 | orddict:is_key(Name, Nodes) andalso 240 | ready =:= (orddict:fetch(Name, Nodes)) #node.state 241 | end, ConfigNodes). 242 | 243 | generate_name_port(State = #state { node_count = N }) -> 244 | {list_to_atom(lists:flatten(io_lib:format("node~p@anyhost", [N]))), 245 | ?BASE_PORT + N, 246 | State #state { node_count = N+1 }}. 247 | 248 | noop(State ) -> {noop, State}. 249 | noop(_Node, State) -> {noop, State}. 250 | 251 | choose_one_noop1(List, State) -> choose_one([fun noop/1 | List], State). 252 | choose_one_noop2(List, State) -> choose_one([fun noop/2 | List], State). 253 | 254 | choose_one(List, State = #state { seed = Seed }) -> 255 | Len = length(List), 256 | {lists:nth(1 + (Seed rem Len), List), State #state { seed = Seed div Len }}. 257 | 258 | step_if_seed(_Fun, Step = #step { final_state = #state { seed = 0 } }) -> 259 | Step; 260 | step_if_seed(Fun, Step = #step {}) -> 261 | Fun(Step). 262 | 263 | %% We only need this for the create_node case so we don't overly 264 | %% generalise this. Yes, I know this is not like me at all. Also, this 265 | %% case definitely doesn't need further seed so we don't wrap it in 266 | %% step_if_seed. 267 | eval_delayed_existential_instruction( 268 | Step = #step { existential_node_instr = {delayed, Fun}, 269 | final_state = State }) -> 270 | {ExistentialInstr, State1} = Fun(State), 271 | Step #step { existential_node_instr = ExistentialInstr, 272 | final_state = State1 }; 273 | eval_delayed_existential_instruction(Step) -> 274 | Step. 275 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_config.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_config). 18 | 19 | -export([load/2, load/1, store_internal/2, to_proplist/2, 20 | transfer_node_ids/2, update_node_id/4, add_node_ids/3, add_node_id/4, 21 | compare/2, is_compatible/2, 22 | contains_node/2, is_singleton/2, version/1, nodenames/1, 23 | disc_nodenames/1, node_type/2, node_id/2, gospel/1]). 24 | 25 | -record(config, { version, 26 | nodes, 27 | gospel, 28 | node_ids 29 | }). 30 | %%---------------------------------------------------------------------------- 31 | 32 | %% We can't put the file within mnesia dir because that upsets the 33 | %% virgin detection in rabbit_mnesia! 34 | internal_path() -> rabbit_mnesia:dir() ++ "-cluster.config". 35 | 36 | external_path() -> application:get_env(rabbitmq_clusterer, config). 37 | 38 | load(undefined) -> load_external(); 39 | load(#config {} = C) -> case validate(C) of 40 | ok -> {ok, C}; 41 | Err -> Err 42 | end; 43 | load(PathOrPropList) -> load_external(PathOrPropList). 44 | 45 | load(NodeID, Config) -> 46 | choose_external_or_internal( 47 | case load_external() of 48 | {ok, ExternalConfig} -> 49 | ExternalConfig; 50 | {error, no_external_config_provided} -> 51 | undefined; 52 | {error, Error} -> 53 | error_logger:info_msg( 54 | "Ignoring external configuration due to error: ~p~n", [Error]), 55 | undefined 56 | end, 57 | case Config of 58 | undefined -> load_internal(); 59 | _ -> {NodeID, Config} 60 | end). 61 | 62 | load_external() -> 63 | case external_path() of 64 | {ok, PathOrProplist} -> load_external(PathOrProplist); 65 | undefined -> {error, no_external_config_provided} 66 | end. 67 | 68 | load_external(PathOrProplist) when is_list(PathOrProplist) -> 69 | ProplistOrErr = case PathOrProplist of 70 | [{_,_}|_] -> {ok, [PathOrProplist]}; 71 | [_|_] -> rabbit_file:read_term_file(PathOrProplist) 72 | end, 73 | case ProplistOrErr of 74 | {ok, [Proplist]} -> case from_proplist(Proplist) of 75 | {ok, _NodeID, Config} -> {ok, Config}; 76 | {error, _} = Error -> Error 77 | end; 78 | {ok, Terms} -> {error, rabbit_misc:format( 79 | "Config is not a single term: ~p", 80 | [Terms])}; 81 | {error, _} = Error -> Error 82 | end; 83 | load_external(Other) -> 84 | {error, rabbit_misc:format("External config not a path or proplist: ~p", 85 | [Other])}. 86 | 87 | load_internal() -> 88 | Proplist = case rabbit_file:read_term_file(internal_path()) of 89 | {error, enoent} -> undefined; 90 | {ok, [Proplist1 = [{_,_}|_]]} -> Proplist1 91 | end, 92 | case Proplist of 93 | undefined -> undefined; 94 | _ -> {ok, NodeID, Config} = from_proplist(Proplist), 95 | true = is_binary(NodeID), %% ASSERTION 96 | {NodeID, Config} 97 | end. 98 | 99 | store_internal(NodeID, Config) -> 100 | ok = filelib:ensure_dir(filename:dirname(internal_path())), 101 | ok = rabbit_file:write_term_file(internal_path(), 102 | [to_proplist(NodeID, Config)]). 103 | 104 | choose_external_or_internal(undefined, undefined) -> 105 | {ok, NodeID, NewConfig} = default_config(), 106 | {NodeID, NewConfig, undefined}; 107 | choose_external_or_internal(NewConfig, undefined) -> 108 | %% We only have an external config and no internal config, so we 109 | %% have no NodeID, so we must generate one. 110 | NodeID = create_node_id(), 111 | {NodeID, tidy_node_ids(NodeID, NewConfig), undefined}; 112 | choose_external_or_internal(undefined, {NodeID, OldConfig}) -> 113 | {NodeID, OldConfig, OldConfig}; 114 | choose_external_or_internal(NewConfig, {NodeID, OldConfig}) -> 115 | case compare(NewConfig, OldConfig) of 116 | younger -> %% New cluster config has been applied 117 | {NodeID, NewConfig, OldConfig}; 118 | invalid -> error_logger:info_msg( 119 | "Ignoring invalid user-provided configuration", []), 120 | {NodeID, OldConfig, OldConfig}; 121 | _ -> %% All other cases, we ignore the user-provided config. 122 | {NodeID, OldConfig, OldConfig} 123 | end. 124 | 125 | %% Note that here we intentionally deal with NodeID being in the 126 | %% proplist as on disk but not in the #config record. 127 | default_config() -> 128 | NodeID = create_node_id(), 129 | MyNode = node(), 130 | from_proplist( 131 | [{version, 0}, 132 | {nodes, [{MyNode, disc}]}, 133 | {gospel, {node, MyNode}}, 134 | {node_id, NodeID}, 135 | {node_ids, orddict:from_list([{MyNode, NodeID}])} 136 | ]). 137 | 138 | create_node_id() -> 139 | %% We can't use rabbit_guid here because it may not have been 140 | %% started at this stage. We only need a fresh node_id when we're 141 | %% a virgin node. But we also want to ensure that when we are a 142 | %% virgin node our node id will be different from if we existed 143 | %% previously, hence the use of erlang:system_time() which can go 144 | %% wrong if time is set backwards, but we hope that won't happen. 145 | erlang:md5(term_to_binary({node(), erlang:system_time()})). 146 | 147 | %%---------------------------------------------------------------------------- 148 | 149 | required_keys() -> [version, nodes, gospel]. 150 | 151 | optional_keys() -> [{node_ids, orddict:new()}]. 152 | 153 | field_fold(Fun, Init) -> 154 | {_Pos, Res} = lists:foldl(fun (FieldName, {Pos, Acc}) -> 155 | {Pos + 1, Fun(FieldName, Pos, Acc)} 156 | end, {2, Init}, record_info(fields, config)), 157 | Res. 158 | 159 | to_proplist(NodeID, Config = #config {}) -> 160 | [{node_id, NodeID} | 161 | field_fold(fun (FieldName, Pos, ProplistN) -> 162 | [{FieldName, element(Pos, Config)} | ProplistN] 163 | end, [])]. 164 | 165 | from_proplist(Proplist) when is_list(Proplist) -> 166 | case check_required_keys(Proplist) of 167 | ok -> 168 | Proplist1 = add_optional_keys(Proplist), 169 | Config = #config { nodes = Nodes } = 170 | field_fold( 171 | fun (FieldName, Pos, ConfigN) -> 172 | setelement(Pos, ConfigN, 173 | proplists:get_value(FieldName, Proplist1)) 174 | end, #config {}), 175 | case validate(Config) of 176 | ok -> 177 | {ok, proplists:get_value(node_id, Proplist1), 178 | Config #config { nodes = normalise_nodes(Nodes) }}; 179 | {error, _} = Err -> 180 | Err 181 | end; 182 | {error, _} = Err -> 183 | Err 184 | end; 185 | from_proplist(Other) -> 186 | {error, rabbit_misc:format("Config is not a proplist: ~p", [Other])}. 187 | 188 | check_required_keys(Proplist) -> 189 | case required_keys() -- proplists:get_keys(Proplist) of 190 | [] -> ok; 191 | Missing -> {error, rabbit_misc:format( 192 | "Required keys missing from cluster config: ~p", 193 | [Missing])} 194 | end. 195 | 196 | add_optional_keys(Proplist) -> 197 | lists:foldr(fun ({Key, _Default} = E, ProplistN) -> 198 | case proplists:is_defined(Key, ProplistN) of 199 | true -> ProplistN; 200 | false -> [E | ProplistN] 201 | end 202 | end, Proplist, optional_keys()). 203 | 204 | validate(Config) -> 205 | field_fold(fun (FieldName, Pos, ok) -> 206 | validate_key(FieldName, element(Pos, Config), Config); 207 | (_FieldName, _Pos, {error, _E} = Err) -> 208 | Err 209 | end, ok). 210 | 211 | validate_key(version, Version, _Config) 212 | when is_integer(Version) andalso Version >= 0 -> 213 | ok; 214 | validate_key(version, Version, _Config) -> 215 | {error, rabbit_misc:format("Require version to be non-negative integer: ~p", 216 | [Version])}; 217 | validate_key(nodes, Nodes, _Config) when is_list(Nodes) -> 218 | {Result, Disc, NodeNames} = 219 | lists:foldr( 220 | fun ({Node, disc}, {ok, _, NN}) when is_atom(Node) -> 221 | {ok, true, [Node | NN]}; 222 | ({Node, disk}, {ok, _, NN}) when is_atom(Node) -> 223 | {ok, true, [Node | NN]}; 224 | ({Node, ram }, {ok, D, NN}) when is_atom(Node) -> 225 | {ok, D, [Node | NN]}; 226 | (Node, {ok, _, NN}) when is_atom(Node) -> 227 | {ok, true, [Node | NN]}; 228 | (Other, {ok, _, _NN}) -> 229 | {error, rabbit_misc:format("Invalid node: ~p", [Other]), []}; 230 | (_, {error, _E, _NN} = Err) -> Err 231 | end, {ok, false, []}, Nodes), 232 | case {Result, Disc, length(NodeNames) =:= length(lists:usort(NodeNames))} of 233 | {ok, true, true} -> 234 | ok; 235 | {ok, true, false} -> 236 | {error, rabbit_misc:format( 237 | "Some nodes specified more than once: ~p", [NodeNames])}; 238 | {ok, false, _} when length(NodeNames) =:= 0 -> 239 | ok; 240 | {ok, false, _} -> 241 | {error, rabbit_misc:format( 242 | "Require at least one disc node: ~p", [Nodes])}; 243 | {error, Err, _} -> 244 | {error, Err} 245 | end; 246 | validate_key(nodes, Nodes, _Config) -> 247 | {error, 248 | rabbit_misc:format("Require nodes to be a list of nodes: ~p", [Nodes])}; 249 | validate_key(gospel, reset, _Config) -> 250 | ok; 251 | validate_key(gospel, {node, Node}, Config = #config { nodes = Nodes }) -> 252 | case [true || N <- Nodes, 253 | (N =:= {Node, ram} orelse 254 | N =:= {Node, disc} orelse 255 | N =:= {Node, disk} orelse 256 | N =:= Node)] of 257 | [] -> {error, rabbit_misc:format( 258 | "Node in gospel (~p) is not in nodes (~p)", 259 | [Node, Config #config.nodes])}; 260 | [_|_] -> ok 261 | end; 262 | validate_key(gospel, Gospel, _Config) -> 263 | {error, rabbit_misc:format("Invalid gospel setting: ~p", [Gospel])}; 264 | validate_key(node_ids, Orddict, _Config) when is_list(Orddict) -> 265 | ok; 266 | validate_key(node_ids, Orddict, _Config) -> 267 | {error, 268 | rabbit_misc:format("Requires node_ids to be an orddict: ~p", [Orddict])}. 269 | 270 | normalise_nodes(Nodes) when is_list(Nodes) -> 271 | orddict:from_list( 272 | lists:usort( 273 | lists:map(fun ({Node, disc} = E) when is_atom(Node) -> E; 274 | ({Node, disk}) when is_atom(Node) -> {Node, disc}; 275 | (Node) when is_atom(Node) -> {Node, disc}; 276 | ({Node, ram} = E) when is_atom(Node) -> E 277 | end, Nodes))). 278 | 279 | %%---------------------------------------------------------------------------- 280 | 281 | transfer_node_ids(undefined, Dest) -> 282 | Dest; 283 | transfer_node_ids(#config { node_ids = NodeIDs }, Dest = #config { }) -> 284 | Dest #config { node_ids = NodeIDs }. 285 | 286 | update_node_id(Node, #config { node_ids = NodeIDsRemote }, 287 | NodeID, Config = #config { node_ids = NodeIDsLocal }) -> 288 | NodeIDsLocal1 = case orddict:find(Node, NodeIDsRemote) of 289 | error -> NodeIDsLocal; 290 | {ok, ID} -> orddict:store(Node, ID, NodeIDsLocal) 291 | end, 292 | tidy_node_ids(NodeID, Config #config { node_ids = NodeIDsLocal1 }). 293 | 294 | add_node_ids(ExtraNodeIDs, NodeID, Config = #config { node_ids = NodeIDs }) -> 295 | NodeIDs1 = orddict:merge(fun (_Node, _A, B) -> B end, 296 | NodeIDs, orddict:from_list(ExtraNodeIDs)), 297 | tidy_node_ids(NodeID, Config #config { node_ids = NodeIDs1 }). 298 | 299 | add_node_id(NewNode, NewNodeID, NodeID, 300 | Config = #config { node_ids = NodeIDs }) -> 301 | %% Note that if NewNode isn't in Config then tidy_node_ids will do 302 | %% the right thing, and also that Changed will always be false. 303 | Changed = case orddict:find(NewNode, NodeIDs) of 304 | error -> false; 305 | {ok, NewNodeID} -> false; 306 | {ok, _NewNodeID} -> true 307 | end, 308 | NodeIDs1 = orddict:store(NewNode, NewNodeID, NodeIDs), 309 | {Changed, tidy_node_ids(NodeID, Config #config { node_ids = NodeIDs1 })}. 310 | 311 | tidy_node_ids(NodeID, Config = #config { nodes = Nodes, node_ids = NodeIDs }) -> 312 | MyNode = node(), 313 | NodeIDs1 = orddict:filter(fun (N, _ID) -> orddict:is_key(N, Nodes) end, 314 | NodeIDs), 315 | %% our own node_id may have changed or be missing. 316 | NodeIDs2 = case orddict:is_key(MyNode, Nodes) of 317 | true -> orddict:store(MyNode, NodeID, NodeIDs1); 318 | false -> NodeIDs1 319 | end, 320 | Config #config { node_ids = NodeIDs2 }. 321 | 322 | %%---------------------------------------------------------------------------- 323 | 324 | compare(ConfigA = #config { version = VA }, 325 | ConfigB = #config { version = VB }) -> 326 | %% node_ids are semantically irrevelant for comparison 327 | case {ConfigA #config { node_ids = undefined }, 328 | ConfigB #config { node_ids = undefined }} of 329 | {EQ, EQ} -> coeval; 330 | _ when VA > VB -> younger; 331 | _ when VA < VB -> older; 332 | _ -> invalid 333 | end. 334 | 335 | %% If the config has changed, we need to figure out whether we need to 336 | %% do a full join (which may well include wiping out mnesia) or 337 | %% whether the config has simply evolved and we can do something 338 | %% softer (maybe nothing at all). Essentially, if the gospel node in 339 | %% the new config is someone we thought we knew but who's been reset 340 | %% (so their node_id has changed) then we'll need to do a fresh sync 341 | %% to them. 342 | is_compatible(Config, Config) -> true; 343 | is_compatible(#config {}, undefined) -> false; 344 | is_compatible(#config { gospel = reset }, _ConfigOld) -> false; 345 | is_compatible(#config { gospel = {node, Node}, 346 | node_ids = NodeIDsNew }, 347 | #config { node_ids = NodeIDsOld } = ConfigOld) -> 348 | case (contains_node(node(), ConfigOld) andalso 349 | contains_node(Node, ConfigOld)) of 350 | true -> case {orddict:find(Node, NodeIDsNew), 351 | orddict:find(Node, NodeIDsOld)} of 352 | {{ok, IdA}, {ok, IdB}} when IdA =/= IdB -> false; 353 | {_ , _ } -> true 354 | end; 355 | false -> false 356 | end. 357 | 358 | %%---------------------------------------------------------------------------- 359 | 360 | contains_node(Node, #config { nodes = Nodes }) -> orddict:is_key(Node, Nodes). 361 | 362 | is_singleton( Node, #config { nodes = [{Node, disc}] }) -> true; 363 | is_singleton(_Node, _Config) -> false. 364 | 365 | version(#config { version = Version }) -> Version. 366 | 367 | nodenames(#config { nodes = Nodes }) -> orddict:fetch_keys(Nodes). 368 | 369 | disc_nodenames(#config { nodes = Nodes }) -> 370 | orddict:fetch_keys(orddict:filter(fun (_K, V) -> V =:= disc end, Nodes)). 371 | 372 | node_type(Node, #config { nodes = Nodes }) -> orddict:fetch(Node, Nodes). 373 | 374 | node_id(Node, #config { node_ids = NodeIDs }) -> orddict:fetch(Node, NodeIDs). 375 | 376 | gospel(#config { gospel = Gospel }) -> Gospel. 377 | -------------------------------------------------------------------------------- /rabbitmq-components.mk: -------------------------------------------------------------------------------- 1 | ifeq ($(.DEFAULT_GOAL),) 2 | # Define default goal to `all` because this file defines some targets 3 | # before the inclusion of erlang.mk leading to the wrong target becoming 4 | # the default. 5 | .DEFAULT_GOAL = all 6 | endif 7 | 8 | # PROJECT_VERSION defaults to: 9 | # 1. the version exported by rabbitmq-server-release; 10 | # 2. the version stored in `git-revisions.txt`, if it exists; 11 | # 3. a version based on git-describe(1), if it is a Git clone; 12 | # 4. 0.0.0 13 | 14 | PROJECT_VERSION := $(RABBITMQ_VERSION) 15 | 16 | ifeq ($(PROJECT_VERSION),) 17 | PROJECT_VERSION := $(shell \ 18 | if test -f git-revisions.txt; then \ 19 | head -n1 git-revisions.txt | \ 20 | awk '{print $$$(words $(PROJECT_DESCRIPTION) version);}'; \ 21 | else \ 22 | (git describe --dirty --abbrev=7 --tags --always --first-parent \ 23 | 2>/dev/null || echo rabbitmq_v0_0_0) | \ 24 | sed -e 's/^rabbitmq_v//' -e 's/^v//' -e 's/_/./g' -e 's/-/+/' \ 25 | -e 's/-/./g'; \ 26 | fi) 27 | endif 28 | 29 | # -------------------------------------------------------------------- 30 | # RabbitMQ components. 31 | # -------------------------------------------------------------------- 32 | 33 | # For RabbitMQ repositories, we want to checkout branches which match 34 | # the parent project. For instance, if the parent project is on a 35 | # release tag, dependencies must be on the same release tag. If the 36 | # parent project is on a topic branch, dependencies must be on the same 37 | # topic branch or fallback to `stable` or `master` whichever was the 38 | # base of the topic branch. 39 | 40 | dep_amqp_client = git_rmq rabbitmq-erlang-client $(current_rmq_ref) $(base_rmq_ref) master 41 | dep_amqp10_client = git_rmq rabbitmq-amqp1.0-client $(current_rmq_ref) $(base_rmq_ref) master 42 | dep_amqp10_common = git_rmq rabbitmq-amqp1.0-common $(current_rmq_ref) $(base_rmq_ref) master 43 | dep_rabbit = git_rmq rabbitmq-server $(current_rmq_ref) $(base_rmq_ref) master 44 | dep_rabbit_common = git_rmq rabbitmq-common $(current_rmq_ref) $(base_rmq_ref) master 45 | dep_rabbitmq_amqp1_0 = git_rmq rabbitmq-amqp1.0 $(current_rmq_ref) $(base_rmq_ref) master 46 | dep_rabbitmq_auth_backend_amqp = git_rmq rabbitmq-auth-backend-amqp $(current_rmq_ref) $(base_rmq_ref) master 47 | dep_rabbitmq_auth_backend_cache = git_rmq rabbitmq-auth-backend-cache $(current_rmq_ref) $(base_rmq_ref) master 48 | dep_rabbitmq_auth_backend_http = git_rmq rabbitmq-auth-backend-http $(current_rmq_ref) $(base_rmq_ref) master 49 | dep_rabbitmq_auth_backend_ldap = git_rmq rabbitmq-auth-backend-ldap $(current_rmq_ref) $(base_rmq_ref) master 50 | dep_rabbitmq_auth_mechanism_ssl = git_rmq rabbitmq-auth-mechanism-ssl $(current_rmq_ref) $(base_rmq_ref) master 51 | dep_rabbitmq_aws = git_rmq rabbitmq-aws $(current_rmq_ref) $(base_rmq_ref) master 52 | dep_rabbitmq_boot_steps_visualiser = git_rmq rabbitmq-boot-steps-visualiser $(current_rmq_ref) $(base_rmq_ref) master 53 | dep_rabbitmq_clusterer = git_rmq rabbitmq-clusterer $(current_rmq_ref) $(base_rmq_ref) master 54 | dep_rabbitmq_cli = git_rmq rabbitmq-cli $(current_rmq_ref) $(base_rmq_ref) master 55 | dep_rabbitmq_codegen = git_rmq rabbitmq-codegen $(current_rmq_ref) $(base_rmq_ref) master 56 | dep_rabbitmq_consistent_hash_exchange = git_rmq rabbitmq-consistent-hash-exchange $(current_rmq_ref) $(base_rmq_ref) master 57 | dep_rabbitmq_ct_client_helpers = git_rmq rabbitmq-ct-client-helpers $(current_rmq_ref) $(base_rmq_ref) master 58 | dep_rabbitmq_ct_helpers = git_rmq rabbitmq-ct-helpers $(current_rmq_ref) $(base_rmq_ref) master 59 | dep_rabbitmq_delayed_message_exchange = git_rmq rabbitmq-delayed-message-exchange $(current_rmq_ref) $(base_rmq_ref) master 60 | dep_rabbitmq_dotnet_client = git_rmq rabbitmq-dotnet-client $(current_rmq_ref) $(base_rmq_ref) master 61 | dep_rabbitmq_event_exchange = git_rmq rabbitmq-event-exchange $(current_rmq_ref) $(base_rmq_ref) master 62 | dep_rabbitmq_federation = git_rmq rabbitmq-federation $(current_rmq_ref) $(base_rmq_ref) master 63 | dep_rabbitmq_federation_management = git_rmq rabbitmq-federation-management $(current_rmq_ref) $(base_rmq_ref) master 64 | dep_rabbitmq_java_client = git_rmq rabbitmq-java-client $(current_rmq_ref) $(base_rmq_ref) master 65 | dep_rabbitmq_jms_client = git_rmq rabbitmq-jms-client $(current_rmq_ref) $(base_rmq_ref) master 66 | dep_rabbitmq_jms_cts = git_rmq rabbitmq-jms-cts $(current_rmq_ref) $(base_rmq_ref) master 67 | dep_rabbitmq_jms_topic_exchange = git_rmq rabbitmq-jms-topic-exchange $(current_rmq_ref) $(base_rmq_ref) master 68 | dep_rabbitmq_lvc_exchange = git_rmq rabbitmq-lvc-exchange $(current_rmq_ref) $(base_rmq_ref) master 69 | dep_rabbitmq_management = git_rmq rabbitmq-management $(current_rmq_ref) $(base_rmq_ref) master 70 | dep_rabbitmq_management_agent = git_rmq rabbitmq-management-agent $(current_rmq_ref) $(base_rmq_ref) master 71 | dep_rabbitmq_management_exchange = git_rmq rabbitmq-management-exchange $(current_rmq_ref) $(base_rmq_ref) master 72 | dep_rabbitmq_management_themes = git_rmq rabbitmq-management-themes $(current_rmq_ref) $(base_rmq_ref) master 73 | dep_rabbitmq_management_visualiser = git_rmq rabbitmq-management-visualiser $(current_rmq_ref) $(base_rmq_ref) master 74 | dep_rabbitmq_message_timestamp = git_rmq rabbitmq-message-timestamp $(current_rmq_ref) $(base_rmq_ref) master 75 | dep_rabbitmq_metronome = git_rmq rabbitmq-metronome $(current_rmq_ref) $(base_rmq_ref) master 76 | dep_rabbitmq_mqtt = git_rmq rabbitmq-mqtt $(current_rmq_ref) $(base_rmq_ref) master 77 | dep_rabbitmq_objc_client = git_rmq rabbitmq-objc-client $(current_rmq_ref) $(base_rmq_ref) master 78 | dep_rabbitmq_peer_discovery_aws = git_rmq rabbitmq-peer-discovery-aws $(current_rmq_ref) $(base_rmq_ref) master 79 | dep_rabbitmq_peer_discovery_common = git_rmq rabbitmq-peer-discovery-common $(current_rmq_ref) $(base_rmq_ref) master 80 | dep_rabbitmq_peer_discovery_consul = git_rmq rabbitmq-peer-discovery-consul $(current_rmq_ref) $(base_rmq_ref) master 81 | dep_rabbitmq_peer_discovery_etcd = git_rmq rabbitmq-peer-discovery-etcd $(current_rmq_ref) $(base_rmq_ref) master 82 | dep_rabbitmq_peer_discovery_k8s = git_rmq rabbitmq-peer-discovery-k8s $(current_rmq_ref) $(base_rmq_ref) master 83 | dep_rabbitmq_random_exchange = git_rmq rabbitmq-random-exchange $(current_rmq_ref) $(base_rmq_ref) master 84 | dep_rabbitmq_recent_history_exchange = git_rmq rabbitmq-recent-history-exchange $(current_rmq_ref) $(base_rmq_ref) master 85 | dep_rabbitmq_routing_node_stamp = git_rmq rabbitmq-routing-node-stamp $(current_rmq_ref) $(base_rmq_ref) master 86 | dep_rabbitmq_rtopic_exchange = git_rmq rabbitmq-rtopic-exchange $(current_rmq_ref) $(base_rmq_ref) master 87 | dep_rabbitmq_server_release = git_rmq rabbitmq-server-release $(current_rmq_ref) $(base_rmq_ref) master 88 | dep_rabbitmq_sharding = git_rmq rabbitmq-sharding $(current_rmq_ref) $(base_rmq_ref) master 89 | dep_rabbitmq_shovel = git_rmq rabbitmq-shovel $(current_rmq_ref) $(base_rmq_ref) master 90 | dep_rabbitmq_shovel_management = git_rmq rabbitmq-shovel-management $(current_rmq_ref) $(base_rmq_ref) master 91 | dep_rabbitmq_stomp = git_rmq rabbitmq-stomp $(current_rmq_ref) $(base_rmq_ref) master 92 | dep_rabbitmq_toke = git_rmq rabbitmq-toke $(current_rmq_ref) $(base_rmq_ref) master 93 | dep_rabbitmq_top = git_rmq rabbitmq-top $(current_rmq_ref) $(base_rmq_ref) master 94 | dep_rabbitmq_tracing = git_rmq rabbitmq-tracing $(current_rmq_ref) $(base_rmq_ref) master 95 | dep_rabbitmq_trust_store = git_rmq rabbitmq-trust-store $(current_rmq_ref) $(base_rmq_ref) master 96 | dep_rabbitmq_test = git_rmq rabbitmq-test $(current_rmq_ref) $(base_rmq_ref) master 97 | dep_rabbitmq_web_dispatch = git_rmq rabbitmq-web-dispatch $(current_rmq_ref) $(base_rmq_ref) master 98 | dep_rabbitmq_web_stomp = git_rmq rabbitmq-web-stomp $(current_rmq_ref) $(base_rmq_ref) master 99 | dep_rabbitmq_web_stomp_examples = git_rmq rabbitmq-web-stomp-examples $(current_rmq_ref) $(base_rmq_ref) master 100 | dep_rabbitmq_web_mqtt = git_rmq rabbitmq-web-mqtt $(current_rmq_ref) $(base_rmq_ref) master 101 | dep_rabbitmq_web_mqtt_examples = git_rmq rabbitmq-web-mqtt-examples $(current_rmq_ref) $(base_rmq_ref) master 102 | dep_rabbitmq_website = git_rmq rabbitmq-website $(current_rmq_ref) $(base_rmq_ref) live master 103 | dep_toke = git_rmq toke $(current_rmq_ref) $(base_rmq_ref) master 104 | 105 | dep_rabbitmq_public_umbrella = git_rmq rabbitmq-public-umbrella $(current_rmq_ref) $(base_rmq_ref) master 106 | 107 | # Third-party dependencies version pinning. 108 | # 109 | # We do that in this file, which is copied in all projects, to ensure 110 | # all projects use the same versions. It avoids conflicts and makes it 111 | # possible to work with rabbitmq-public-umbrella. 112 | 113 | dep_cowboy = hex 2.6.1 114 | dep_cowlib = hex 2.7.0 115 | dep_jsx = hex 2.9.0 116 | dep_lager = hex 3.6.5 117 | dep_ra = git https://github.com/rabbitmq/ra.git master 118 | dep_ranch = hex 1.7.1 119 | dep_recon = hex 2.3.6 120 | 121 | dep_sockjs = git https://github.com/rabbitmq/sockjs-erlang.git 405990ea62353d98d36dbf5e1e64942d9b0a1daf 122 | 123 | RABBITMQ_COMPONENTS = amqp_client \ 124 | amqp10_common \ 125 | amqp10_client \ 126 | rabbit \ 127 | rabbit_common \ 128 | rabbitmq_amqp1_0 \ 129 | rabbitmq_auth_backend_amqp \ 130 | rabbitmq_auth_backend_cache \ 131 | rabbitmq_auth_backend_http \ 132 | rabbitmq_auth_backend_ldap \ 133 | rabbitmq_auth_mechanism_ssl \ 134 | rabbitmq_aws \ 135 | rabbitmq_boot_steps_visualiser \ 136 | rabbitmq_clusterer \ 137 | rabbitmq_cli \ 138 | rabbitmq_codegen \ 139 | rabbitmq_consistent_hash_exchange \ 140 | rabbitmq_ct_client_helpers \ 141 | rabbitmq_ct_helpers \ 142 | rabbitmq_delayed_message_exchange \ 143 | rabbitmq_dotnet_client \ 144 | rabbitmq_event_exchange \ 145 | rabbitmq_federation \ 146 | rabbitmq_federation_management \ 147 | rabbitmq_java_client \ 148 | rabbitmq_jms_client \ 149 | rabbitmq_jms_cts \ 150 | rabbitmq_jms_topic_exchange \ 151 | rabbitmq_lvc_exchange \ 152 | rabbitmq_management \ 153 | rabbitmq_management_agent \ 154 | rabbitmq_management_exchange \ 155 | rabbitmq_management_themes \ 156 | rabbitmq_management_visualiser \ 157 | rabbitmq_message_timestamp \ 158 | rabbitmq_metronome \ 159 | rabbitmq_mqtt \ 160 | rabbitmq_objc_client \ 161 | rabbitmq_peer_discovery_aws \ 162 | rabbitmq_peer_discovery_common \ 163 | rabbitmq_peer_discovery_consul \ 164 | rabbitmq_peer_discovery_etcd \ 165 | rabbitmq_peer_discovery_k8s \ 166 | rabbitmq_random_exchange \ 167 | rabbitmq_recent_history_exchange \ 168 | rabbitmq_routing_node_stamp \ 169 | rabbitmq_rtopic_exchange \ 170 | rabbitmq_server_release \ 171 | rabbitmq_sharding \ 172 | rabbitmq_shovel \ 173 | rabbitmq_shovel_management \ 174 | rabbitmq_stomp \ 175 | rabbitmq_toke \ 176 | rabbitmq_top \ 177 | rabbitmq_tracing \ 178 | rabbitmq_trust_store \ 179 | rabbitmq_web_dispatch \ 180 | rabbitmq_web_mqtt \ 181 | rabbitmq_web_mqtt_examples \ 182 | rabbitmq_web_stomp \ 183 | rabbitmq_web_stomp_examples \ 184 | rabbitmq_website 185 | 186 | # Several components have a custom erlang.mk/build.config, mainly 187 | # to disable eunit. Therefore, we can't use the top-level project's 188 | # erlang.mk copy. 189 | NO_AUTOPATCH += $(RABBITMQ_COMPONENTS) 190 | 191 | ifeq ($(origin current_rmq_ref),undefined) 192 | ifneq ($(wildcard .git),) 193 | current_rmq_ref := $(shell (\ 194 | ref=$$(LANG=C git branch --list | awk '/^\* \(.*detached / {ref=$$0; sub(/.*detached [^ ]+ /, "", ref); sub(/\)$$/, "", ref); print ref; exit;} /^\* / {ref=$$0; sub(/^\* /, "", ref); print ref; exit}');\ 195 | if test "$$(git rev-parse --short HEAD)" != "$$ref"; then echo "$$ref"; fi)) 196 | else 197 | current_rmq_ref := master 198 | endif 199 | endif 200 | export current_rmq_ref 201 | 202 | ifeq ($(origin base_rmq_ref),undefined) 203 | ifneq ($(wildcard .git),) 204 | possible_base_rmq_ref := master 205 | ifeq ($(possible_base_rmq_ref),$(current_rmq_ref)) 206 | base_rmq_ref := $(current_rmq_ref) 207 | else 208 | base_rmq_ref := $(shell \ 209 | (git rev-parse --verify -q master >/dev/null && \ 210 | git rev-parse --verify -q $(possible_base_rmq_ref) >/dev/null && \ 211 | git merge-base --is-ancestor $$(git merge-base master HEAD) $(possible_base_rmq_ref) && \ 212 | echo $(possible_base_rmq_ref)) || \ 213 | echo master) 214 | endif 215 | else 216 | base_rmq_ref := master 217 | endif 218 | endif 219 | export base_rmq_ref 220 | 221 | # Repository URL selection. 222 | # 223 | # First, we infer other components' location from the current project 224 | # repository URL, if it's a Git repository: 225 | # - We take the "origin" remote URL as the base 226 | # - The current project name and repository name is replaced by the 227 | # target's properties: 228 | # eg. rabbitmq-common is replaced by rabbitmq-codegen 229 | # eg. rabbit_common is replaced by rabbitmq_codegen 230 | # 231 | # If cloning from this computed location fails, we fallback to RabbitMQ 232 | # upstream which is GitHub. 233 | 234 | # Maccro to transform eg. "rabbit_common" to "rabbitmq-common". 235 | rmq_cmp_repo_name = $(word 2,$(dep_$(1))) 236 | 237 | # Upstream URL for the current project. 238 | RABBITMQ_COMPONENT_REPO_NAME := $(call rmq_cmp_repo_name,$(PROJECT)) 239 | RABBITMQ_UPSTREAM_FETCH_URL ?= https://github.com/rabbitmq/$(RABBITMQ_COMPONENT_REPO_NAME).git 240 | RABBITMQ_UPSTREAM_PUSH_URL ?= git@github.com:rabbitmq/$(RABBITMQ_COMPONENT_REPO_NAME).git 241 | 242 | # Current URL for the current project. If this is not a Git clone, 243 | # default to the upstream Git repository. 244 | ifneq ($(wildcard .git),) 245 | git_origin_fetch_url := $(shell git config remote.origin.url) 246 | git_origin_push_url := $(shell git config remote.origin.pushurl || git config remote.origin.url) 247 | RABBITMQ_CURRENT_FETCH_URL ?= $(git_origin_fetch_url) 248 | RABBITMQ_CURRENT_PUSH_URL ?= $(git_origin_push_url) 249 | else 250 | RABBITMQ_CURRENT_FETCH_URL ?= $(RABBITMQ_UPSTREAM_FETCH_URL) 251 | RABBITMQ_CURRENT_PUSH_URL ?= $(RABBITMQ_UPSTREAM_PUSH_URL) 252 | endif 253 | 254 | # Macro to replace the following pattern: 255 | # 1. /foo.git -> /bar.git 256 | # 2. /foo -> /bar 257 | # 3. /foo/ -> /bar/ 258 | subst_repo_name = $(patsubst %/$(1)/%,%/$(2)/%,$(patsubst %/$(1),%/$(2),$(patsubst %/$(1).git,%/$(2).git,$(3)))) 259 | 260 | # Macro to replace both the project's name (eg. "rabbit_common") and 261 | # repository name (eg. "rabbitmq-common") by the target's equivalent. 262 | # 263 | # This macro is kept on one line because we don't want whitespaces in 264 | # the returned value, as it's used in $(dep_fetch_git_rmq) in a shell 265 | # single-quoted string. 266 | dep_rmq_repo = $(if $(dep_$(2)),$(call subst_repo_name,$(PROJECT),$(2),$(call subst_repo_name,$(RABBITMQ_COMPONENT_REPO_NAME),$(call rmq_cmp_repo_name,$(2)),$(1))),$(pkg_$(1)_repo)) 267 | 268 | dep_rmq_commits = $(if $(dep_$(1)), \ 269 | $(wordlist 3,$(words $(dep_$(1))),$(dep_$(1))), \ 270 | $(pkg_$(1)_commit)) 271 | 272 | define dep_fetch_git_rmq 273 | fetch_url1='$(call dep_rmq_repo,$(RABBITMQ_CURRENT_FETCH_URL),$(1))'; \ 274 | fetch_url2='$(call dep_rmq_repo,$(RABBITMQ_UPSTREAM_FETCH_URL),$(1))'; \ 275 | if test "$$$$fetch_url1" != '$(RABBITMQ_CURRENT_FETCH_URL)' && \ 276 | git clone -q -n -- "$$$$fetch_url1" $(DEPS_DIR)/$(call dep_name,$(1)); then \ 277 | fetch_url="$$$$fetch_url1"; \ 278 | push_url='$(call dep_rmq_repo,$(RABBITMQ_CURRENT_PUSH_URL),$(1))'; \ 279 | elif git clone -q -n -- "$$$$fetch_url2" $(DEPS_DIR)/$(call dep_name,$(1)); then \ 280 | fetch_url="$$$$fetch_url2"; \ 281 | push_url='$(call dep_rmq_repo,$(RABBITMQ_UPSTREAM_PUSH_URL),$(1))'; \ 282 | fi; \ 283 | cd $(DEPS_DIR)/$(call dep_name,$(1)) && ( \ 284 | $(foreach ref,$(call dep_rmq_commits,$(1)), \ 285 | git checkout -q $(ref) >/dev/null 2>&1 || \ 286 | ) \ 287 | (echo "error: no valid pathspec among: $(call dep_rmq_commits,$(1))" \ 288 | 1>&2 && false) ) && \ 289 | (test "$$$$fetch_url" = "$$$$push_url" || \ 290 | git remote set-url --push origin "$$$$push_url") 291 | endef 292 | 293 | # -------------------------------------------------------------------- 294 | # Component distribution. 295 | # -------------------------------------------------------------------- 296 | 297 | list-dist-deps:: 298 | @: 299 | 300 | prepare-dist:: 301 | @: 302 | 303 | # -------------------------------------------------------------------- 304 | # Umbrella-specific settings. 305 | # -------------------------------------------------------------------- 306 | 307 | # If this project is under the Umbrella project, we override $(DEPS_DIR) 308 | # to point to the Umbrella's one. We also disable `make distclean` so 309 | # $(DEPS_DIR) is not accidentally removed. 310 | 311 | ifneq ($(wildcard ../../UMBRELLA.md),) 312 | UNDER_UMBRELLA = 1 313 | else ifneq ($(wildcard UMBRELLA.md),) 314 | UNDER_UMBRELLA = 1 315 | endif 316 | 317 | ifeq ($(UNDER_UMBRELLA),1) 318 | ifneq ($(PROJECT),rabbitmq_public_umbrella) 319 | DEPS_DIR ?= $(abspath ..) 320 | endif 321 | 322 | ifneq ($(filter distclean distclean-deps,$(MAKECMDGOALS)),) 323 | SKIP_DEPS = 1 324 | endif 325 | endif 326 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RabbitMQ Clusterer 2 | 3 | This plugin is **no longer maintained**, and **completely unnecessary** with [supported releases of RabbitMQ](https://www.rabbitmq.com/versions.html). 4 | 5 | This plugin provided an alternative means for creating and maintaining 6 | RabbitMQ clusters. It is highly opinionated and was created with specific 7 | opinionated infrastructure provisioning tooling in mind. Team RabbitMQ 8 | **considers it to be a failed experiment** and **highly recommends against it**. 9 | 10 | Please [upgrade to a supported RabbitMQ version](https://www.rabbitmq.com/upgrade.html) instead. 11 | It provides a [peer discovery subsystem](https://www.rabbitmq.com/cluster-formation.html) introduced in RabbitMQ 3.7.0 or 12 | its predecessor, [rabbitmq-autocluster](https://github.com/rabbitmq/rabbitmq-autocluster). 13 | That plugin is not a strict alternative to this one but targets a wider range of provisioning scenarios. 14 | 15 | ## Project status 16 | 17 | The plugin was created to handle arbitrary order on nodes restart. 18 | **Since RabbitMQ version 3.6.7 this problem is [addressed](https://www.rabbitmq.com/clustering.html#restarting)** in the core. 19 | 20 | This plugin is considered deprecated, and it's recommended to switch 21 | to RabbitMQ's built-in [cluster formation feature](https://www.rabbitmq.com/configure.html) in order to avoid 22 | known issues that this plugin's opinionated behavior entails (such as 23 | [#7](https://github.com/rabbitmq/rabbitmq-clusterer/issues/7)). 24 | 25 | ## Overview 26 | 27 | Traditional RabbitMQ clustering is not very friendly to infrastructure 28 | automation tools such as Chef, Puppet or BOSH. The 29 | existing tooling (`rabbitmqctl join_cluster` and friends) is 30 | imperative, requires more oversight and does not handle potentially 31 | random node boot order very well. The Clusterer has been specifically 32 | designed with automated deployment tools in mind. 33 | 34 | Unlike the existing tooling, the Clusterer is declarative and goal 35 | directed: you tell it the overall shape of the cluster you want to 36 | construct and the clusterer tries to achieve that. With the Clusterer, 37 | cluster configuration can be provided in a single location (a configuration 38 | file). 39 | 40 | With `rabbitmq-clusterer`, nodes in a cluster can be restarted in any order, 41 | which can be the case with automation tools performing upgrades/reconfiguration, 42 | or due to node failure timing. 43 | 44 | 45 | ## Project Maturity 46 | 47 | This plugin is **ancient abandonware**. Do not use. Yes, really, it no longer has any reasons to exist. 48 | 49 | 50 | ## Compatibility With Traditional RabbitMQ Clustering 51 | 52 | The Clusterer is not generally compatible with the existing clustering 53 | tool-set. Do not use any of the `rabbitmqctl` commands relating to 54 | changing clusters: `join_cluster`, `change_cluster_node_type`, and `update_cluster_nodes` must not be used. 55 | If you do use these, this plugin likely won't be able to perform its jobs. 56 | 57 | `rabbitmqctl cluster_status` may be used to inspect a cluster 58 | state, but the Clusterer sends to the standard Rabbit log files 59 | details about any clusters it joins or leaves. See the *Inspecting the 60 | Clusterer Status* section further down. 61 | 62 | `rabbitmqctl stop_app`, `rabbitmqctl forget_cluster_node`, and `rabbitmqctl start_app` 63 | can be used to force a node out of a cluster before cluster config can be changed. While 64 | this is not generally recommended, there can be valid reasons for doing so, e.g. node 65 | running out of disk space and/or needing replacement for other reasons. 66 | 67 | `cluster_nodes` in the RabbitMQ config file is incompatible with this plugin 68 | and must not be used. 69 | 70 | 71 | ## Installation 72 | 73 | Binary builds of this plugin are available from 74 | 75 | * [Bintray](https://bintray.com/rabbitmq/community-plugins/rabbitmq_clusterer) (like with other [RabbitMQ Community Plugins](https://rabbitmq.com/community-plugins.html)) 76 | * [GitHub releases page](https://github.com/rabbitmq/rabbitmq-clusterer/releases) 77 | 78 | As with all other plugins, you must put the plugin archive (`.ez`) in 79 | the [RabbitMQ plugins directory](https://www.rabbitmq.com/relocate.html) 80 | and enable it with `rabbitmq-plugins enable rabbitmq_clusterer --offline`. 81 | 82 | ## For Recent RabbitMQ Versions (3.5.4 and Later) 83 | 84 | Compiled plugin file needs to be placed into . 85 | 86 | To use the plugin, it is necessary to override `RABBITMQ_BOOT_MODULE` to `rabbit_clusterer`. This 87 | is done similarly to [other RabbitMQ environment variables](https://rabbitmq.com/configure.html). 88 | 89 | Because this plugin coordinates RabbitMQ node start, it needs to be manually added to the Erlang VM 90 | code path: 91 | 92 | ``` 93 | export RABBITMQ_BOOT_MODULE=rabbit_clusterer 94 | export RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /path/to/rabbitmq/plugins/rabbitmq_clusterer.ez/rabbitmq_clusterer-{clusterer-version}/ebin" 95 | ``` 96 | 97 | where `{clusterer-version}` is the build of the plugin (see [GitHub releases page](https://github.com/rabbitmq/rabbitmq-clusterer/releases) and [Bintray](https://bintray.com/rabbitmq/community-plugins/rabbitmq_clusterer)): 98 | 99 | ``` 100 | export RABBITMQ_BOOT_MODULE=rabbit_clusterer 101 | export RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS="-pa /path/to/rabbitmq/plugins/rabbitmq_clusterer-1.0.3.ez/rabbitmq_clusterer-1.0.3/ebin" 102 | ``` 103 | 104 | Since `.ez` files are `.zip` archives, they can be easily inspected when you are not sure about 105 | the exact name of the directory you the file you've downloaded. 106 | 107 | ## For RabbitMQ 3.4.x 108 | 109 | ### rabbitmq-server Patch 110 | 111 | With RabbitMQ versions earlier than `3.5.4`, it is necessary to apply a `rabbitmq-server` 112 | patch and re-compile the broker. 113 | 114 | The patch is provided in 115 | [rabbitmq-clusterer/rabbitmq-server.patch](https://github.com/rabbitmq/rabbitmq-clusterer/blob/master/rabbitmq-server.patch). 116 | Change into the server scripts 117 | directory and apply it with: 118 | 119 | patch -p1 < rabbitmq-clusterer/rabbitmq-server.patch 120 | 121 | The patch assumes the plugin archive is at `${RABBITMQ_PLUGINS_DIR}/rabbitmq_clusterer.ez`. 122 | 123 | 124 | ## Usage in Environments with Dynamic Hostnames (e.g. Kubernetes) 125 | 126 | Since this plugin assumes that all cluster members are known ahead of time 127 | and listed in the config, environments with dynamically generated hostnames 128 | must be configured to use known (or completely predictable) hostnames. 129 | 130 | For Kubernetes specifically, there's an [example repository](https://github.com/MattFriedman/kubernetes-rabbitmq-clusterer) contributed by Matt Friendman. 131 | 132 | There's also [another example that uses Kubernetes 1.5.x](https://github.com/nanit/kubernetes-rabbitmq-cluster). 133 | 134 | 135 | ## Cluster Config Specification 136 | 137 | The Clusterer will communicate a new valid config to both all the 138 | nodes of its current config, and in addition to all the nodes in the 139 | new config. Even if the cluster is able to be formed in the absence of 140 | some nodes indicated in the config, the nodes of the cluster will 141 | continue to attempt to make contact with any missing nodes and will 142 | pass the config to them if and when they eventually appear. 143 | 144 | All of which means that you generally only need to supply new configs 145 | to a single node of any cluster. There is no harm in doing more than 146 | this. The Clusterer stores on disk the currently applied config (it 147 | stores this next to the mnesia directory Rabbit uses for all its 148 | persistent data) and so if a node goes down, it will have a record of 149 | the config in operation when it was last up. When it comes back up, it 150 | will attempt to rejoin that cluster, regardless of whether this node 151 | was ever explicitly given this config. 152 | 153 | There are a couple of ways to specify a cluster config: via an external 154 | file or inline. 155 | 156 | ### Using External Config File 157 | 158 | In a `rabbitmq_clusterer` section in `rabbitmq.config` file you 159 | can add a `config` entry that is a path to configuration file. 160 | 161 | Below are some examples. 162 | 163 | When using `rabbitmq.conf` (currently only available in RabbitMQ master): 164 | 165 | clusterer.config = /path/to/my/cluster.config 166 | 167 | When using the classic configuration format (`rabbitmq.config`, prior to 3.7.0) or `advanced.config`: 168 | 169 | [{rabbitmq_clusterer, 170 | [{config, "/path/to/my/cluster.config"}] 171 | }]. 172 | 173 | Like with `rabbitmq.config` or any other Erlang terms file, 174 | the dot at the end is mandatory. 175 | 176 | ### Using Inline Configuration in rabbitmq.config 177 | 178 | It is possible to provide cluster configuration in `rabbitmq.config`. 179 | 180 | In `rabbitmq.conf`: 181 | 182 | clusterer.version = 43 183 | clusterer.nodes.disc.1 = rabbit@hostA 184 | clusterer.nodes.disc.2 = rabbit@hostD 185 | clusterer.nodes.ram.1 = rabbit@hostB 186 | clusterer.gospel.node = rabbit@hostD 187 | 188 | Or, using the classic config format (`rabbitmq.config`, prior to 3.7.0) or `advanced.config`: 189 | 190 | [{rabbitmq_clusterer, 191 | [{config, 192 | [{version, 43}, 193 | {nodes, [{rabbit@hostA, disc}, {rabbit@hostB, ram}, {rabbit@hostD, disc}]}, 194 | {gospel, {node, rabbit@hostD}}] 195 | }] 196 | }]. 197 | 198 | This approach makes configuration management with tools such as Chef somewhat 199 | less convenient, so external configuration file is the recommended option. 200 | 201 | 202 | ### Using rabbitmqctl eval 203 | 204 | `rabbitmqctl eval 'rabbit_clusterer:apply_config().'` 205 | 206 | **This will only have any effect if there is an entry in the 207 | `rabbitmq.config` file for the Clusterer as above, and a path is 208 | specified as the value rather than a config directly.** 209 | 210 | If that is the case, then this will cause the node to reload the 211 | file containing cluster config and apply it. Note that you cannot 212 | change the path itself in the `rabbitmq.config` file dynamically: 213 | neither Rabbit nor the Clusterer will pick up any changes to that 214 | file without restarting the whole Erlang node. 215 | 216 | `rabbitmqctl eval 'rabbit_clusterer:apply_config("/path/to/my/other/cluster.config").'` 217 | 218 | This will cause the Clusterer to attempt to load the indicated file 219 | as a cluster config and apply it. Using this method rather than the 220 | above allows the path to change dynamically and does not depend on 221 | any entries in the `rabbitmq.config` file. The path provided here is 222 | not retained in any way: providing the path here does not influence 223 | future calls to `rabbit_clusterer:apply_config().` - using 224 | `rabbit_clusterer:apply_config().` *always* attempts to inspect the 225 | path as found in `rabbitmq.config` when the node was started. 226 | 227 | Note if you really want to, rather than suppling a path to a file, 228 | you can supply the cluster config as a proplist directly, just as 229 | you can in the `rabbitmq.config` file itself. 230 | 231 | 232 | 233 | ## Cluster Configuration 234 | 235 | A cluster config is an Erlang proplist consisting of just four 236 | tuples. The config can be supplied to Rabbit in a variety of ways and 237 | it is in general only necessary to supply a config to a single node of 238 | a cluster: the Clusterer will take care of distributing the config to 239 | all the other nodes as necessary. 240 | 241 | [{version, 43}, 242 | {nodes, [{rabbit@hostA, disc}, {rabbit@hostB, ram}, {rabbit@hostD, disc}]}, 243 | {gospel, {node, rabbit@hostD}}]. 244 | 245 | The above gives an example cluster config. This specifies that the 246 | cluster is formed out of the nodes `rabbit@hostA`, `rabbit@hostB` and 247 | `rabbit@hostD` and that `rabbit@hostA` and `rabbit@hostD` are *disc* 248 | nodes and `rabbit@hostB` is a *ram* node. The `nodes` tuple is really 249 | the only tuple that describes the shape of the cluster. The other 250 | tuples describe how to achieve the cluster, and are thus mainly 251 | irrelevant once the cluster has been achieved. 252 | 253 | In general, the Clusterer will wait indefinitely for the conditions to 254 | be correct to form any given cluster. This is in contrast to the 255 | existing tools which will either timeout or in some cases take 256 | (arguably) unsafe actions. For example, the existing tools will allow 257 | a fresh node to fully start when it is supplied with a cluster 258 | configuration which involves other nodes which are not currently 259 | contactable. This is unsafe because those other nodes might not be 260 | fresh nodes: the intention would be for the fresh node to sync with 261 | those other nodes and preserve the data those nodes hold. When those 262 | other nodes eventually return, manual intervention is then required to 263 | throw away some data and preserve others. The Clusterer, by contrast, 264 | would wait until it could either verify that all the nodes to be part 265 | of the cluster are fresh (so there is no data to preserve at all), or 266 | failing that would wait until one of the non-fresh nodes was fully up 267 | and running, at which point it could sync with that node. 268 | 269 | * version: non negative integer 270 | 271 | All configs are versioned and this is used to decide which of any 272 | two configs is the youngest. A config which has a smaller version 273 | number is older. Configs will be ignored unless they are younger 274 | than the current config. Note that in lieu of any config being 275 | provided by the user, the default config is used which has a 276 | version of 0. Thus user supplied configs should use a version of 1 277 | or greater. 278 | 279 | * nodes: list 280 | 281 | List the names of the nodes that are to be in the cluster. If you 282 | list node names directly then they are considered to be disc 283 | nodes. If you specify nodes by using a tuple, you can specify a 284 | disc node using either `disc` or `disk`. If you want to specify 285 | ram nodes, you must use a tuple, with `ram` as the second 286 | element. Order of nodes does not matter. The following are all 287 | equivalent. 288 | 289 | {nodes, [rabbit@hostA, rabbit@hostD, {rabbit@hostB, ram}]} 290 | {nodes, [rabbit@hostD, rabbit@hostA, {rabbit@hostB, ram}]} 291 | {nodes, [{rabbit@hostB, ram}, rabbit@hostD, rabbit@hostA]} 292 | {nodes, [rabbit@hostA, {rabbit@hostD, disk}, {rabbit@hostB, ram}]} 293 | {nodes, [{rabbit@hostA, disc}, {rabbit@hostD, disk}, {rabbit@hostB, ram}]} 294 | 295 | * gospel: `reset` or `{node, `*nodename*`}` 296 | 297 | When multiple nodes are to become a cluster (or indeed multiple 298 | clusters are to merge: you can think of an unclustered node as a 299 | cluster of a single node) some data must be lost and some data can 300 | be preserved: given two unclustered nodes *A* and *B* that are to 301 | become a cluster, either *A*'s data can survive or *B*`s data can 302 | survive, or neither, but not both. The `gospel` tuple allows you 303 | to specify which data should survive: 304 | 305 | * `reset` will reset all nodes in the cluster. This will apply 306 | *every time the cluster config is changed and applied* (i.e. if 307 | you change some other setting in the config, bump the version 308 | number, leave the gospel as `reset` and apply the config to any 309 | node in your cluster, you will find the entire cluster 310 | resets). This is deliberate: it allows you to very easily and 311 | quickly reset an entire cluster, but in general you'll only 312 | occasionally want to set `gospel` to `reset`. 313 | 314 | * `{node, nodename}` The nodename must appear in the `nodes` 315 | tuple. The data held by the existing cluster of which *nodename* 316 | is a member will survive. Nodes that are listed in the `nodes` 317 | tuple but which are not currently members of the same cluster as 318 | *nodename* will be reset. The phrasing here is very deliberate: 319 | it is not necessary for *nodename* to actually be up and running 320 | for this to work. If you have an existing cluster of nodes *A* 321 | and *B* and you want to add in node *C* you can set the `gospel` 322 | to be `{node, A}`, add *C* to the `nodes` tuple, bump the 323 | version and apply the config to *C* and provided *at least one* 324 | of *A* or *B* is up and running, *C* will successfully 325 | cluster. I.e. if only *B* is up, *B* still knows that it is 326 | clustered with *A*, it just happens to be the case that *A* is 327 | currently unavailable. Thus *C* can cluster with *B* and both 328 | will happily work, awaiting the return of *A*. 329 | 330 | In this particular case, the subsequent behaviour when *A* 331 | returns is important. If *A* has been reset and is now running 332 | an older config then it is *A* that is reset again to join back 333 | in with *B* and *C*. I.e. the `gospel` setting is really 334 | identifying that the data that *A* holds at a particular moment 335 | in time is the data to be preserved. When *A* comes back, having 336 | been reset, *A* realises that the `gospel` is indicating an 337 | older version of *A*, which is preserved by the surviving 338 | cluster nodes of *B* and *C*, not the newer reset data held by 339 | *A*. The upshot of this is that in your cluster, if a node 340 | fails, goes down and has to be reset, then to join it back into 341 | the cluster you don't need to alter anything in the cluster 342 | config (and indeed shouldn't): even if the failed node was named 343 | as the `gospel`, you shouldn't make any changes to the config. 344 | 345 | By contrast, if *A* comes back and has been reset but is now 346 | running a younger config than *B* and *C*, then that younger 347 | config will propagate to *B* and *C*. If *A* is named as the 348 | gospel in the new younger config, then that refers to the data 349 | held by the new younger *A*, and so *B* and *C* will reset as 350 | necessary. 351 | 352 | 353 | 354 | ### Mistakes in config files 355 | 356 | Config files can contain mistakes. If you apply a config file using 357 | `rabbitmqctl eval` then you'll get feedback directly. If you specify 358 | the config file via `rabbitmq.config` then and mistakes will be logged 359 | to Rabbit's log files. 360 | 361 | In general, the Clusterer tries reasonably hard to give informative 362 | messages about what it doesn't like, but that can only occur if the 363 | config is syntactically valid in the first place. If you forget to 364 | bump the version number it will complain, and generally whenever the 365 | Clusterer comes across configs with equal version numbers but 366 | semantically different contents it takes highly evasive action: in 367 | some situations, it may decide to shut down the whole Erlang node 368 | immediately. It is your responsibility to manage the version numbers: 369 | the Clusterer expects to be able to order configs by version numbers, 370 | and thus determine the youngest config. You need to ensure it can do 371 | this. If you're building cluster configs automatically, one sensible 372 | approach would be to set the version to the number of seconds since 373 | epoch, for example. 374 | 375 | 376 | ## Inspecting the Clusterer Status 377 | 378 | `rabbitmqctl cluster_status` presents basic information about 379 | clusters, but does not interact with the Clusterer. `rabbitmqctl eval 380 | 'rabbit_clusterer:status().'`, on the other hand, does, and shows 381 | which config is in operation by the node and what the Clusterer is 382 | trying to do. If the cluster has been established then the command 383 | will also display which nodes are known to be currently up and 384 | running. 385 | 386 | 387 | ## Building From Source 388 | 389 | The Clusterer reuses parts of the RabbitMQ [umbrella repository](https://github.com/rabbitmq/rabbitmq-public-umbrella). Before 390 | building the plugin, make sure it is cloned as `rabbitmq_clusterer` under it, 391 | much [like other plugins](https://www.rabbitmq.com/plugin-development.html). 392 | 393 | To build the plugin run `make`. The `VERSION` environment variable is used to specify plugin version, e.g.: 394 | 395 | VERSION=3.6.6 make 396 | 397 | To package the plugin run `make dist`. In some cases, `make clean dist` is the 398 | safest option. 399 | 400 | ### Linking in Development Environment 401 | 402 | If you're running a development environment and want to link through 403 | from the `rabbit/plugins` directory, link to 404 | `rabbitmq_clusterer/plugins/rabbitmq_clusterer-$VERSION.ez`. Do not just 405 | link to the `rabbitmq_clusterer` directory. 406 | 407 | 408 | ## License and Copyright 409 | 410 | (c) 2013-2017 Pivotal Software Inc. 411 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MOZILLA PUBLIC LICENSE 2 | Version 1.1 3 | 4 | --------------- 5 | 6 | 1. Definitions. 7 | 8 | 1.0.1. "Commercial Use" means distribution or otherwise making the 9 | Covered Code available to a third party. 10 | 11 | 1.1. "Contributor" means each entity that creates or contributes to 12 | the creation of Modifications. 13 | 14 | 1.2. "Contributor Version" means the combination of the Original 15 | Code, prior Modifications used by a Contributor, and the Modifications 16 | made by that particular Contributor. 17 | 18 | 1.3. "Covered Code" means the Original Code or Modifications or the 19 | combination of the Original Code and Modifications, in each case 20 | including portions thereof. 21 | 22 | 1.4. "Electronic Distribution Mechanism" means a mechanism generally 23 | accepted in the software development community for the electronic 24 | transfer of data. 25 | 26 | 1.5. "Executable" means Covered Code in any form other than Source 27 | Code. 28 | 29 | 1.6. "Initial Developer" means the individual or entity identified 30 | as the Initial Developer in the Source Code notice required by Exhibit 31 | A. 32 | 33 | 1.7. "Larger Work" means a work which combines Covered Code or 34 | portions thereof with code not governed by the terms of this License. 35 | 36 | 1.8. "License" means this document. 37 | 38 | 1.8.1. "Licensable" means having the right to grant, to the maximum 39 | extent possible, whether at the time of the initial grant or 40 | subsequently acquired, any and all of the rights conveyed herein. 41 | 42 | 1.9. "Modifications" means any addition to or deletion from the 43 | substance or structure of either the Original Code or any previous 44 | Modifications. When Covered Code is released as a series of files, a 45 | Modification is: 46 | A. Any addition to or deletion from the contents of a file 47 | containing Original Code or previous Modifications. 48 | 49 | B. Any new file that contains any part of the Original Code or 50 | previous Modifications. 51 | 52 | 1.10. "Original Code" means Source Code of computer software code 53 | which is described in the Source Code notice required by Exhibit A as 54 | Original Code, and which, at the time of its release under this 55 | License is not already Covered Code governed by this License. 56 | 57 | 1.10.1. "Patent Claims" means any patent claim(s), now owned or 58 | hereafter acquired, including without limitation, method, process, 59 | and apparatus claims, in any patent Licensable by grantor. 60 | 61 | 1.11. "Source Code" means the preferred form of the Covered Code for 62 | making modifications to it, including all modules it contains, plus 63 | any associated interface definition files, scripts used to control 64 | compilation and installation of an Executable, or source code 65 | differential comparisons against either the Original Code or another 66 | well known, available Covered Code of the Contributor's choice. The 67 | Source Code can be in a compressed or archival form, provided the 68 | appropriate decompression or de-archiving software is widely available 69 | for no charge. 70 | 71 | 1.12. "You" (or "Your") means an individual or a legal entity 72 | exercising rights under, and complying with all of the terms of, this 73 | License or a future version of this License issued under Section 6.1. 74 | For legal entities, "You" includes any entity which controls, is 75 | controlled by, or is under common control with You. For purposes of 76 | this definition, "control" means (a) the power, direct or indirect, 77 | to cause the direction or management of such entity, whether by 78 | contract or otherwise, or (b) ownership of more than fifty percent 79 | (50%) of the outstanding shares or beneficial ownership of such 80 | entity. 81 | 82 | 2. Source Code License. 83 | 84 | 2.1. The Initial Developer Grant. 85 | The Initial Developer hereby grants You a world-wide, royalty-free, 86 | non-exclusive license, subject to third party intellectual property 87 | claims: 88 | (a) under intellectual property rights (other than patent or 89 | trademark) Licensable by Initial Developer to use, reproduce, 90 | modify, display, perform, sublicense and distribute the Original 91 | Code (or portions thereof) with or without Modifications, and/or 92 | as part of a Larger Work; and 93 | 94 | (b) under Patents Claims infringed by the making, using or 95 | selling of Original Code, to make, have made, use, practice, 96 | sell, and offer for sale, and/or otherwise dispose of the 97 | Original Code (or portions thereof). 98 | 99 | (c) the licenses granted in this Section 2.1(a) and (b) are 100 | effective on the date Initial Developer first distributes 101 | Original Code under the terms of this License. 102 | 103 | (d) Notwithstanding Section 2.1(b) above, no patent license is 104 | granted: 1) for code that You delete from the Original Code; 2) 105 | separate from the Original Code; or 3) for infringements caused 106 | by: i) the modification of the Original Code or ii) the 107 | combination of the Original Code with other software or devices. 108 | 109 | 2.2. Contributor Grant. 110 | Subject to third party intellectual property claims, each Contributor 111 | hereby grants You a world-wide, royalty-free, non-exclusive license 112 | 113 | (a) under intellectual property rights (other than patent or 114 | trademark) Licensable by Contributor, to use, reproduce, modify, 115 | display, perform, sublicense and distribute the Modifications 116 | created by such Contributor (or portions thereof) either on an 117 | unmodified basis, with other Modifications, as Covered Code 118 | and/or as part of a Larger Work; and 119 | 120 | (b) under Patent Claims infringed by the making, using, or 121 | selling of Modifications made by that Contributor either alone 122 | and/or in combination with its Contributor Version (or portions 123 | of such combination), to make, use, sell, offer for sale, have 124 | made, and/or otherwise dispose of: 1) Modifications made by that 125 | Contributor (or portions thereof); and 2) the combination of 126 | Modifications made by that Contributor with its Contributor 127 | Version (or portions of such combination). 128 | 129 | (c) the licenses granted in Sections 2.2(a) and 2.2(b) are 130 | effective on the date Contributor first makes Commercial Use of 131 | the Covered Code. 132 | 133 | (d) Notwithstanding Section 2.2(b) above, no patent license is 134 | granted: 1) for any code that Contributor has deleted from the 135 | Contributor Version; 2) separate from the Contributor Version; 136 | 3) for infringements caused by: i) third party modifications of 137 | Contributor Version or ii) the combination of Modifications made 138 | by that Contributor with other software (except as part of the 139 | Contributor Version) or other devices; or 4) under Patent Claims 140 | infringed by Covered Code in the absence of Modifications made by 141 | that Contributor. 142 | 143 | 3. Distribution Obligations. 144 | 145 | 3.1. Application of License. 146 | The Modifications which You create or to which You contribute are 147 | governed by the terms of this License, including without limitation 148 | Section 2.2. The Source Code version of Covered Code may be 149 | distributed only under the terms of this License or a future version 150 | of this License released under Section 6.1, and You must include a 151 | copy of this License with every copy of the Source Code You 152 | distribute. You may not offer or impose any terms on any Source Code 153 | version that alters or restricts the applicable version of this 154 | License or the recipients' rights hereunder. However, You may include 155 | an additional document offering the additional rights described in 156 | Section 3.5. 157 | 158 | 3.2. Availability of Source Code. 159 | Any Modification which You create or to which You contribute must be 160 | made available in Source Code form under the terms of this License 161 | either on the same media as an Executable version or via an accepted 162 | Electronic Distribution Mechanism to anyone to whom you made an 163 | Executable version available; and if made available via Electronic 164 | Distribution Mechanism, must remain available for at least twelve (12) 165 | months after the date it initially became available, or at least six 166 | (6) months after a subsequent version of that particular Modification 167 | has been made available to such recipients. You are responsible for 168 | ensuring that the Source Code version remains available even if the 169 | Electronic Distribution Mechanism is maintained by a third party. 170 | 171 | 3.3. Description of Modifications. 172 | You must cause all Covered Code to which You contribute to contain a 173 | file documenting the changes You made to create that Covered Code and 174 | the date of any change. You must include a prominent statement that 175 | the Modification is derived, directly or indirectly, from Original 176 | Code provided by the Initial Developer and including the name of the 177 | Initial Developer in (a) the Source Code, and (b) in any notice in an 178 | Executable version or related documentation in which You describe the 179 | origin or ownership of the Covered Code. 180 | 181 | 3.4. Intellectual Property Matters 182 | (a) Third Party Claims. 183 | If Contributor has knowledge that a license under a third party's 184 | intellectual property rights is required to exercise the rights 185 | granted by such Contributor under Sections 2.1 or 2.2, 186 | Contributor must include a text file with the Source Code 187 | distribution titled "LEGAL" which describes the claim and the 188 | party making the claim in sufficient detail that a recipient will 189 | know whom to contact. If Contributor obtains such knowledge after 190 | the Modification is made available as described in Section 3.2, 191 | Contributor shall promptly modify the LEGAL file in all copies 192 | Contributor makes available thereafter and shall take other steps 193 | (such as notifying appropriate mailing lists or newsgroups) 194 | reasonably calculated to inform those who received the Covered 195 | Code that new knowledge has been obtained. 196 | 197 | (b) Contributor APIs. 198 | If Contributor's Modifications include an application programming 199 | interface and Contributor has knowledge of patent licenses which 200 | are reasonably necessary to implement that API, Contributor must 201 | also include this information in the LEGAL file. 202 | 203 | (c) Representations. 204 | Contributor represents that, except as disclosed pursuant to 205 | Section 3.4(a) above, Contributor believes that Contributor's 206 | Modifications are Contributor's original creation(s) and/or 207 | Contributor has sufficient rights to grant the rights conveyed by 208 | this License. 209 | 210 | 3.5. Required Notices. 211 | You must duplicate the notice in Exhibit A in each file of the Source 212 | Code. If it is not possible to put such notice in a particular Source 213 | Code file due to its structure, then You must include such notice in a 214 | location (such as a relevant directory) where a user would be likely 215 | to look for such a notice. If You created one or more Modification(s) 216 | You may add your name as a Contributor to the notice described in 217 | Exhibit A. You must also duplicate this License in any documentation 218 | for the Source Code where You describe recipients' rights or ownership 219 | rights relating to Covered Code. You may choose to offer, and to 220 | charge a fee for, warranty, support, indemnity or liability 221 | obligations to one or more recipients of Covered Code. However, You 222 | may do so only on Your own behalf, and not on behalf of the Initial 223 | Developer or any Contributor. You must make it absolutely clear than 224 | any such warranty, support, indemnity or liability obligation is 225 | offered by You alone, and You hereby agree to indemnify the Initial 226 | Developer and every Contributor for any liability incurred by the 227 | Initial Developer or such Contributor as a result of warranty, 228 | support, indemnity or liability terms You offer. 229 | 230 | 3.6. Distribution of Executable Versions. 231 | You may distribute Covered Code in Executable form only if the 232 | requirements of Section 3.1-3.5 have been met for that Covered Code, 233 | and if You include a notice stating that the Source Code version of 234 | the Covered Code is available under the terms of this License, 235 | including a description of how and where You have fulfilled the 236 | obligations of Section 3.2. The notice must be conspicuously included 237 | in any notice in an Executable version, related documentation or 238 | collateral in which You describe recipients' rights relating to the 239 | Covered Code. You may distribute the Executable version of Covered 240 | Code or ownership rights under a license of Your choice, which may 241 | contain terms different from this License, provided that You are in 242 | compliance with the terms of this License and that the license for the 243 | Executable version does not attempt to limit or alter the recipient's 244 | rights in the Source Code version from the rights set forth in this 245 | License. If You distribute the Executable version under a different 246 | license You must make it absolutely clear that any terms which differ 247 | from this License are offered by You alone, not by the Initial 248 | Developer or any Contributor. You hereby agree to indemnify the 249 | Initial Developer and every Contributor for any liability incurred by 250 | the Initial Developer or such Contributor as a result of any such 251 | terms You offer. 252 | 253 | 3.7. Larger Works. 254 | You may create a Larger Work by combining Covered Code with other code 255 | not governed by the terms of this License and distribute the Larger 256 | Work as a single product. In such a case, You must make sure the 257 | requirements of this License are fulfilled for the Covered Code. 258 | 259 | 4. Inability to Comply Due to Statute or Regulation. 260 | 261 | If it is impossible for You to comply with any of the terms of this 262 | License with respect to some or all of the Covered Code due to 263 | statute, judicial order, or regulation then You must: (a) comply with 264 | the terms of this License to the maximum extent possible; and (b) 265 | describe the limitations and the code they affect. Such description 266 | must be included in the LEGAL file described in Section 3.4 and must 267 | be included with all distributions of the Source Code. Except to the 268 | extent prohibited by statute or regulation, such description must be 269 | sufficiently detailed for a recipient of ordinary skill to be able to 270 | understand it. 271 | 272 | 5. Application of this License. 273 | 274 | This License applies to code to which the Initial Developer has 275 | attached the notice in Exhibit A and to related Covered Code. 276 | 277 | 6. Versions of the License. 278 | 279 | 6.1. New Versions. 280 | Netscape Communications Corporation ("Netscape") may publish revised 281 | and/or new versions of the License from time to time. Each version 282 | will be given a distinguishing version number. 283 | 284 | 6.2. Effect of New Versions. 285 | Once Covered Code has been published under a particular version of the 286 | License, You may always continue to use it under the terms of that 287 | version. You may also choose to use such Covered Code under the terms 288 | of any subsequent version of the License published by Netscape. No one 289 | other than Netscape has the right to modify the terms applicable to 290 | Covered Code created under this License. 291 | 292 | 6.3. Derivative Works. 293 | If You create or use a modified version of this License (which you may 294 | only do in order to apply it to code which is not already Covered Code 295 | governed by this License), You must (a) rename Your license so that 296 | the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", 297 | "MPL", "NPL" or any confusingly similar phrase do not appear in your 298 | license (except to note that your license differs from this License) 299 | and (b) otherwise make it clear that Your version of the license 300 | contains terms which differ from the Mozilla Public License and 301 | Netscape Public License. (Filling in the name of the Initial 302 | Developer, Original Code or Contributor in the notice described in 303 | Exhibit A shall not of themselves be deemed to be modifications of 304 | this License.) 305 | 306 | 7. DISCLAIMER OF WARRANTY. 307 | 308 | COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, 309 | WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, 310 | WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF 311 | DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. 312 | THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE 313 | IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, 314 | YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE 315 | COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER 316 | OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF 317 | ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. 318 | 319 | 8. TERMINATION. 320 | 321 | 8.1. This License and the rights granted hereunder will terminate 322 | automatically if You fail to comply with terms herein and fail to cure 323 | such breach within 30 days of becoming aware of the breach. All 324 | sublicenses to the Covered Code which are properly granted shall 325 | survive any termination of this License. Provisions which, by their 326 | nature, must remain in effect beyond the termination of this License 327 | shall survive. 328 | 329 | 8.2. If You initiate litigation by asserting a patent infringement 330 | claim (excluding declatory judgment actions) against Initial Developer 331 | or a Contributor (the Initial Developer or Contributor against whom 332 | You file such action is referred to as "Participant") alleging that: 333 | 334 | (a) such Participant's Contributor Version directly or indirectly 335 | infringes any patent, then any and all rights granted by such 336 | Participant to You under Sections 2.1 and/or 2.2 of this License 337 | shall, upon 60 days notice from Participant terminate prospectively, 338 | unless if within 60 days after receipt of notice You either: (i) 339 | agree in writing to pay Participant a mutually agreeable reasonable 340 | royalty for Your past and future use of Modifications made by such 341 | Participant, or (ii) withdraw Your litigation claim with respect to 342 | the Contributor Version against such Participant. If within 60 days 343 | of notice, a reasonable royalty and payment arrangement are not 344 | mutually agreed upon in writing by the parties or the litigation claim 345 | is not withdrawn, the rights granted by Participant to You under 346 | Sections 2.1 and/or 2.2 automatically terminate at the expiration of 347 | the 60 day notice period specified above. 348 | 349 | (b) any software, hardware, or device, other than such Participant's 350 | Contributor Version, directly or indirectly infringes any patent, then 351 | any rights granted to You by such Participant under Sections 2.1(b) 352 | and 2.2(b) are revoked effective as of the date You first made, used, 353 | sold, distributed, or had made, Modifications made by that 354 | Participant. 355 | 356 | 8.3. If You assert a patent infringement claim against Participant 357 | alleging that such Participant's Contributor Version directly or 358 | indirectly infringes any patent where such claim is resolved (such as 359 | by license or settlement) prior to the initiation of patent 360 | infringement litigation, then the reasonable value of the licenses 361 | granted by such Participant under Sections 2.1 or 2.2 shall be taken 362 | into account in determining the amount or value of any payment or 363 | license. 364 | 365 | 8.4. In the event of termination under Sections 8.1 or 8.2 above, 366 | all end user license agreements (excluding distributors and resellers) 367 | which have been validly granted by You or any distributor hereunder 368 | prior to termination shall survive termination. 369 | 370 | 9. LIMITATION OF LIABILITY. 371 | 372 | UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT 373 | (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL 374 | DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, 375 | OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR 376 | ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY 377 | CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, 378 | WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER 379 | COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN 380 | INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF 381 | LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY 382 | RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW 383 | PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE 384 | EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO 385 | THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. 386 | 387 | 10. U.S. GOVERNMENT END USERS. 388 | 389 | The Covered Code is a "commercial item," as that term is defined in 390 | 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer 391 | software" and "commercial computer software documentation," as such 392 | terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 393 | C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), 394 | all U.S. Government End Users acquire Covered Code with only those 395 | rights set forth herein. 396 | 397 | 11. MISCELLANEOUS. 398 | 399 | This License represents the complete agreement concerning subject 400 | matter hereof. If any provision of this License is held to be 401 | unenforceable, such provision shall be reformed only to the extent 402 | necessary to make it enforceable. This License shall be governed by 403 | California law provisions (except to the extent applicable law, if 404 | any, provides otherwise), excluding its conflict-of-law provisions. 405 | With respect to disputes in which at least one party is a citizen of, 406 | or an entity chartered or registered to do business in the United 407 | States of America, any litigation relating to this License shall be 408 | subject to the jurisdiction of the Federal Courts of the Northern 409 | District of California, with venue lying in Santa Clara County, 410 | California, with the losing party responsible for costs, including 411 | without limitation, court costs and reasonable attorneys' fees and 412 | expenses. The application of the United Nations Convention on 413 | Contracts for the International Sale of Goods is expressly excluded. 414 | Any law or regulation which provides that the language of a contract 415 | shall be construed against the drafter shall not apply to this 416 | License. 417 | 418 | 12. RESPONSIBILITY FOR CLAIMS. 419 | 420 | As between Initial Developer and the Contributors, each party is 421 | responsible for claims and damages arising, directly or indirectly, 422 | out of its utilization of rights under this License and You agree to 423 | work with Initial Developer and Contributors to distribute such 424 | responsibility on an equitable basis. Nothing herein is intended or 425 | shall be deemed to constitute any admission of liability. 426 | 427 | 13. MULTIPLE-LICENSED CODE. 428 | 429 | Initial Developer may designate portions of the Covered Code as 430 | "Multiple-Licensed". "Multiple-Licensed" means that the Initial 431 | Developer permits you to utilize portions of the Covered Code under 432 | Your choice of the MPL or the alternative licenses, if any, specified 433 | by the Initial Developer in the file described in Exhibit A. 434 | 435 | EXHIBIT A -Mozilla Public License. 436 | 437 | ``The contents of this file are subject to the Mozilla Public License 438 | Version 1.1 (the "License"); you may not use this file except in 439 | compliance with the License. You may obtain a copy of the License at 440 | https://www.mozilla.org/MPL/ 441 | 442 | Software distributed under the License is distributed on an "AS IS" 443 | basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 444 | License for the specific language governing rights and limitations 445 | under the License. 446 | 447 | The Original Code is RabbitMQ. 448 | 449 | The Initial Developer of the Original Code is Pivotal Software, Inc. 450 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_coordinator.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_coordinator). 18 | 19 | -behaviour(gen_server). 20 | 21 | -export([begin_coordination/0, 22 | rabbit_booted/0, 23 | rabbit_boot_failed/0, 24 | send_new_config/2, 25 | template_new_config/1, 26 | apply_config/1, 27 | request_status/1]). 28 | 29 | -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, 30 | terminate/2, code_change/3]). 31 | 32 | -define(SERVER, ?MODULE). 33 | 34 | -define(IS_TRANSITIONER(X), (X =:= {transitioner, join} orelse 35 | X =:= {transitioner, rejoin})). 36 | 37 | -record(state, { status, 38 | node_id, 39 | config, 40 | transitioner_state, 41 | comms, 42 | nodes, 43 | alive_mrefs, 44 | dead, 45 | poke_timer_ref, 46 | booted, 47 | last_boot_failed 48 | }). 49 | 50 | %%---------------------------------------------------------------------------- 51 | 52 | start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 53 | 54 | begin_coordination() -> ok = gen_server:cast(?SERVER, begin_coordination). 55 | 56 | rabbit_booted() -> ok = gen_server:cast(?SERVER, rabbit_booted). 57 | 58 | rabbit_boot_failed() -> ok = gen_server:cast(?SERVER, rabbit_boot_failed). 59 | 60 | send_new_config(Config, Node) when is_atom(Node) -> 61 | %% Node may be undefined. gen_server:cast doesn't error. This is 62 | %% what we want. 63 | ok = gen_server:cast({?SERVER, Node}, template_new_config(Config)); 64 | send_new_config(_Config, []) -> 65 | ok; 66 | send_new_config(Config, Nodes) when is_list(Nodes) -> 67 | abcast = gen_server:abcast( 68 | lists:usort(Nodes), ?SERVER, template_new_config(Config)), 69 | ok. 70 | 71 | template_new_config(Config) -> {new_config, Config, node()}. 72 | 73 | apply_config(Config) -> 74 | gen_server:call(?SERVER, {apply_config, Config}, infinity). 75 | 76 | request_status(Node) -> 77 | gen_server:call( 78 | {?SERVER, Node}, {request_status, undefined, <<>>}, infinity). 79 | 80 | %%---------------------------------------------------------------------------- 81 | 82 | init([]) -> {ok, #state { status = preboot, 83 | node_id = undefined, 84 | config = undefined, 85 | transitioner_state = undefined, 86 | comms = undefined, 87 | nodes = [], 88 | alive_mrefs = [], 89 | dead = [], 90 | poke_timer_ref = undefined, 91 | booted = false, 92 | last_boot_failed = false 93 | }}. 94 | 95 | %%---------------- 96 | %% Call 97 | %%---------------- 98 | 99 | %% request_status requires a response and is only used by the 100 | %% transitioners to perform coordination when joining or rejoining a 101 | %% cluster. 102 | handle_call({request_status, _Node, _NodeID}, _From, 103 | State = #state { status = preboot }) -> 104 | %% If status = preboot then we have the situation that a remote 105 | %% node is contacting us (it's in {transitioner,_}) before we've 106 | %% even started reading in our cluster configs. We need to "ignore" 107 | %% them. They'll either wait for us, or they'll start up and bring 108 | %% us in later on anyway. 109 | reply(preboot, State); 110 | handle_call({request_status, NewNode, NewNodeID}, From, 111 | State = #state { status = Status = {transitioner, _} }) -> 112 | Fun = fun (Config) -> gen_server:reply(From, {Config, Status}), ok end, 113 | noreply(transitioner_event( 114 | {request_config, NewNode, NewNodeID, Fun}, State)); 115 | handle_call({request_status, NewNode, NewNodeID}, _From, 116 | State = #state { status = Status, 117 | node_id = NodeID, 118 | config = Config }) -> 119 | %% Status \in {booting, ready} 120 | %% 121 | %% Consider we're running (ready) and we're already clustered with 122 | %% NewNode, though it's currently down and is just coming back up, 123 | %% after being reset. At this point, we will learn of its new 124 | %% NodeID, but we must ignore that: if we merged it into our 125 | %% config here then should NewNode be starting up with a newer 126 | %% config that eventually involves us, we would lose the ability 127 | %% in is_compatible to detect the node has been reset. Hence 128 | %% ignoring NewNodeID here. 129 | %% 130 | %% Equally however, consider we're running in a cluster which has 131 | %% some missing nodes. Those nodes then come online and request 132 | %% our status. We should here record their ID. So we want to add 133 | %% their ID in only if we don't have a record of it already. 134 | %% 135 | %% This is consistent with the behaviour of the transitioners 136 | %% (above head) who will restart the transition if the NewNode has 137 | %% changed its ID. 138 | Config1 = case rabbit_clusterer_config:add_node_id(NewNode, NewNodeID, 139 | NodeID, Config) of 140 | {true, _Config} -> Config; 141 | {false, Config2} -> Config2 142 | end, 143 | reply({Config1, Status}, State #state { config = Config1 }); 144 | 145 | %% This is where a call from the transitioner on one node to the 146 | %% transitioner on another node lands. 147 | handle_call({{transitioner, _TKind} = Status, Msg}, From, 148 | State = #state { status = Status }) -> 149 | Fun = fun (Result) -> gen_server:reply(From, Result), ok end, 150 | noreply(transitioner_event({Msg, Fun}, State)); 151 | handle_call({{transitioner, _TKind}, _Msg}, _From, State) -> 152 | reply(invalid, State); 153 | 154 | handle_call({apply_config, NewConfig}, From, 155 | State = #state { status = Status, 156 | config = Config }) 157 | when Status =:= ready orelse ?IS_TRANSITIONER(Status) -> 158 | case {rabbit_clusterer_config:load(NewConfig), Status} of 159 | {{ok, NewConfig1}, {transitioner, _}} -> 160 | %% We have to defer to the transitioner here which means 161 | %% we can't give back as good feedback, but never 162 | %% mind. The transitioner will do the comparison for us 163 | %% with whatever it's currently trying to transition to. 164 | gen_server:reply(From, transition_in_progress_ok), 165 | noreply(transitioner_event( 166 | {new_config, NewConfig1, undefined}, State)); 167 | {{ok, NewConfig1}, ready} -> 168 | ReadyNotRunning = Status =:= ready andalso not rabbit:is_running(), 169 | case rabbit_clusterer_config:compare(NewConfig1, Config) of 170 | younger when ReadyNotRunning -> 171 | reply({rabbit_not_running, NewConfig1}, State); 172 | younger -> gen_server:reply( 173 | From, {beginning_transition_to_provided_config, 174 | NewConfig1}), 175 | noreply(begin_transition(NewConfig1, State)); 176 | older -> reply({provided_config_is_older_than_current, 177 | NewConfig1, Config}, State); 178 | coeval -> reply({provided_config_already_applied, 179 | NewConfig1}, State); 180 | invalid -> 181 | reply( 182 | {provided_config_has_same_version_but_differs_from_current, 183 | NewConfig1, Config}, State) 184 | end; 185 | {{error, Reason}, _} -> 186 | reply({invalid_config_specification, NewConfig, Reason}, State) 187 | end; 188 | handle_call({apply_config, _Config}, _From, 189 | State = #state { status = Status }) -> 190 | reply({cannot_apply_config_currently, Status}, State); 191 | 192 | %% anything else kills us 193 | handle_call(Msg, From, State) -> 194 | {stop, {unhandled_call, Msg, From}, State}. 195 | 196 | %%---------------- 197 | %% Cast 198 | %%---------------- 199 | 200 | handle_cast(begin_coordination, State = #state { status = preboot, 201 | node_id = NodeID, 202 | config = Config }) -> 203 | {NewNodeID, NewConfig, OldConfig} = rabbit_clusterer_config:load( 204 | NodeID, Config), 205 | noreply( 206 | begin_transition(NewConfig, State #state { node_id = NewNodeID, 207 | config = OldConfig })); 208 | handle_cast(begin_coordination, State) -> 209 | noreply(State); 210 | 211 | handle_cast({comms, Comms, Result}, 212 | State = #state { comms = Comms, status = {transitioner, _} }) -> 213 | %% This is a response from the comms process coming back to the 214 | %% transitioner 215 | noreply(transitioner_event({comms, Result}, State)); 216 | handle_cast({comms, _Comms, _Result}, State) -> 217 | %% Ignore it - either we're not transitioning, or it's from an old 218 | %% comms pid. 219 | noreply(State); 220 | 221 | %% new_config is sent to update nodes that we come across through some 222 | %% means that we think they're running an old config and should be 223 | %% updated to run a newer config. It is also sent periodically to any 224 | %% missing nodes in the cluster to make sure that should they appear 225 | %% they will be informed of the cluster config we expect them to take 226 | %% part in. 227 | handle_cast({new_config, _ConfigRemote, Node}, 228 | State = #state { status = preboot, 229 | nodes = Nodes }) -> 230 | %% In preboot we don't know what our eventual config is going to 231 | %% be so as a result we just ignore the provided remote config but 232 | %% make a note to send over our eventual config to this node once 233 | %% we've sorted ourselves out. 234 | %% 235 | %% Don't worry about dupes, we'll filter them out when we come to 236 | %% deal with the list. 237 | noreply(State #state { nodes = [Node | Nodes] }); 238 | handle_cast({new_config, ConfigRemote, Node}, 239 | State = #state { status = booting, 240 | nodes = Nodes, 241 | node_id = NodeID, 242 | config = Config }) -> 243 | %% In booting, it's not safe to reconfigure our own rabbit, and 244 | %% given the transitioning state of mnesia during rabbit boot we 245 | %% don't want anyone else to interfere either, so again, we just 246 | %% wait. But we do update our config node_id map if the 247 | %% ConfigRemote is coeval with our own. 248 | case rabbit_clusterer_config:compare(ConfigRemote, Config) of 249 | coeval -> Config1 = rabbit_clusterer_config:update_node_id( 250 | Node, ConfigRemote, NodeID, Config), 251 | ok = rabbit_clusterer_config:store_internal( 252 | NodeID, Config1), 253 | noreply(State #state { config = Config1 }); 254 | _ -> noreply(State #state { nodes = [Node | Nodes] }) 255 | end; 256 | handle_cast({new_config, ConfigRemote, Node}, 257 | State = #state { status = {transitioner, _} }) -> 258 | %% We have to deal with this case because we could have the 259 | %% situation where we are blocked in the transitioner waiting for 260 | %% another node to come up but there really is a younger config 261 | %% that has become available that we should be transitioning 262 | %% to. If we don't deal with this we can potentially have a 263 | %% deadlock. 264 | noreply(transitioner_event({new_config, ConfigRemote, Node}, State)); 265 | handle_cast({new_config, ConfigRemote, Node}, 266 | State = #state { status = ready, 267 | node_id = NodeID, 268 | config = Config }) -> 269 | %% We a) know what our config really is; b) it's safe to begin 270 | %% transitions to other configurations. 271 | Running = rabbit:is_running(), 272 | case rabbit_clusterer_config:compare(ConfigRemote, Config) of 273 | younger when not Running -> 274 | %% Something has stopped Rabbit. Maybe the 275 | %% partition handler. Thus we're going to refuse to 276 | %% do anything for the time being. 277 | noreply(State); 278 | younger -> %% Remote is younger. We should switch to it. We 279 | %% deliberately do not merge across the configs at 280 | %% this stage as it would break is_compatible. 281 | %% begin_transition will reboot if necessary. 282 | noreply(begin_transition(ConfigRemote, State)); 283 | older -> ok = send_new_config(Config, Node), 284 | noreply(State); 285 | coeval -> Config1 = rabbit_clusterer_config:update_node_id( 286 | Node, ConfigRemote, NodeID, Config), 287 | ok = rabbit_clusterer_config:store_internal( 288 | NodeID, Config1), 289 | noreply(State #state { config = Config1 }); 290 | invalid -> %% Whilst invalid, the fact is that we are ready, 291 | %% so we don't want to disturb that. 292 | noreply(State) 293 | end; 294 | 295 | handle_cast(rabbit_booted, State = #state { status = booting }) -> 296 | %% Note that we don't allow any transition to start whilst we're 297 | %% booting so it should be safe to assert we can only receive 298 | %% rabbit_booted when in booting. 299 | noreply(set_status(ready, State #state { booted = true, 300 | last_boot_failed = false })); 301 | handle_cast(rabbit_booted, State = #state { status = preboot }) -> 302 | %% Very likely they forgot to edit the rabbit-server 303 | %% script. Complain very loudly. 304 | Msg = "RabbitMQ Clusterer is enabled as a plugin but has " 305 | "not been started correctly. Terminating RabbitMQ.~n", 306 | error_logger:error_msg(Msg, []), 307 | io:format(Msg, []), 308 | init:stop(), 309 | {stop, startup_error, State}; 310 | handle_cast(rabbit_booted, State = #state { status = ready }) -> 311 | %% This can happen if the partition handler stopped and then 312 | %% restarted rabbit. 313 | noreply(State); 314 | 315 | handle_cast(rabbit_boot_failed, State = #state { status = booting, 316 | config = Config }) -> 317 | %% Just to be on the safe side, do the stop_rabbit as well 318 | %% (thinking is that rabbit itself could have managed to start 319 | %% just fine, but some plugin/separate-app failed to start; 320 | %% possibly rabbit could still be running). The stop_mnesia is 321 | %% crucial: rabbit expects mnesia to be stopped on boot, so if the 322 | %% boot failed, we must be sure to stop mnesia. 323 | ok = rabbit_clusterer_utils:stop_rabbit(), 324 | ok = rabbit_clusterer_utils:stop_mnesia(), 325 | noreply(begin_transition(Config, State #state { last_boot_failed = true })); 326 | 327 | handle_cast({lock, Locker}, State = #state { comms = undefined }) -> 328 | gen_server:cast(Locker, {lock_rejected, node()}), 329 | noreply(State); 330 | handle_cast({lock, Locker}, State = #state { comms = Comms }) -> 331 | ok = rabbit_clusterer_comms:lock(Locker, Comms), 332 | noreply(State); 333 | handle_cast({unlock, _Locker}, State = #state { comms = undefined }) -> 334 | noreply(State); 335 | handle_cast({unlock, Locker}, State = #state { comms = Comms }) -> 336 | ok = rabbit_clusterer_comms:unlock(Locker, Comms), 337 | noreply(State); 338 | 339 | %% anything else kills us 340 | handle_cast(Msg, State) -> 341 | {stop, {unhandled_cast, Msg}, State}. 342 | 343 | %%---------------- 344 | %% Info 345 | %%---------------- 346 | 347 | handle_info({transitioner_delay, Event}, 348 | State = #state { status = {transitioner, _} }) -> 349 | %% A transitioner wanted some sort of timer based callback. Note 350 | %% it is the transitioner's responsibility to filter out 351 | %% invalid/outdated etc delayed events. 352 | noreply(transitioner_event(Event, State)); 353 | handle_info({transitioner_delay, _Event}, State) -> 354 | noreply(State); 355 | 356 | %% Monitoring stuff 357 | handle_info({'DOWN', MRef, process, {?SERVER, Node}, _Info}, 358 | State = #state { alive_mrefs = Alive, dead = Dead }) -> 359 | case lists:delete(MRef, Alive) of 360 | Alive -> noreply(State); 361 | Alive1 -> noreply(ensure_poke_timer( 362 | State #state { alive_mrefs = Alive1, 363 | dead = [Node | Dead] })) 364 | end; 365 | handle_info(poke_the_dead, State = #state { dead = Dead, 366 | alive_mrefs = Alive, 367 | status = ready, 368 | config = Config }) -> 369 | %% When we're transitioning to something else (or even booting) we 370 | %% don't bother with the poke as the transitioner will take care 371 | %% of updating nodes we want to cluster with and the surrounding 372 | %% code will update the nodes we're currently clustered with and 373 | %% any other nodes that contacted us whilst we were transitioning 374 | %% or booting. 375 | MRefsNew = [erlang:monitor(process, {?SERVER, N}) || N <- Dead], 376 | ok = send_new_config(Config, Dead), 377 | Alive1 = MRefsNew ++ Alive, 378 | noreply(State #state { dead = [], 379 | alive_mrefs = Alive1, 380 | poke_timer_ref = undefined }); 381 | handle_info(poke_the_dead, State) -> 382 | noreply(State #state { poke_timer_ref = undefined }); 383 | 384 | %% anything else kills us 385 | handle_info(Msg, State) -> 386 | {stop, {unhandled_info, Msg}, State}. 387 | 388 | %%---------------- 389 | %% Rest 390 | %%---------------- 391 | 392 | terminate(_Reason, _State) -> ok. 393 | 394 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 395 | 396 | %%---------------------------------------------------------------------------- 397 | %% Status changes state machine 398 | %%---------------------------------------------------------------------------- 399 | 400 | %% Here we enforce the state machine of valid changes to status. 401 | 402 | %% preboot -> a transitioner ({transitioner, TKind}) 403 | %% preboot -> shutdown 404 | %% {transitioner, _} -> booting 405 | %% {transitioner, _} -> a transitioner 406 | %% {transitioner, _} -> shutdown 407 | %% booting -> ready 408 | %% booting -> booting 409 | %% booting -> a transitioner 410 | %% ready -> a transitioner 411 | %% ready -> shutdown 412 | 413 | set_status(NewStatus, State) when ?IS_TRANSITIONER(NewStatus) -> 414 | State #state { status = NewStatus }; 415 | set_status(booting, State = #state { status = Status, 416 | booted = Booted, 417 | node_id = NodeID, 418 | config = Config }) 419 | when ?IS_TRANSITIONER(Status) orelse Status =:= booting -> 420 | error_logger:info_msg( 421 | "Clusterer booting Rabbit into cluster configuration:~n~p~n", 422 | [rabbit_clusterer_config:to_proplist(NodeID, Config)]), 423 | case Booted of 424 | true -> ok = rabbit_clusterer_utils:start_rabbit_async(); 425 | false -> ok = rabbit_clusterer_utils:boot_rabbit_async() 426 | end, 427 | State #state { status = booting }; 428 | set_status(ready, State = #state { status = booting }) -> 429 | error_logger:info_msg("Cluster achieved and Rabbit running.~n"), 430 | update_monitoring(State #state { status = ready }); 431 | set_status(shutdown, State = #state { status = Status }) 432 | when Status =/= booting -> 433 | case Status of 434 | ready -> %% Even though we think we're ready, there might 435 | %% still be some rabbit boot actions going on... 436 | ok = stop_rabbit(); 437 | _ -> ok 438 | end, 439 | error_logger:info_msg("Clusterer stopping node now.~n"), 440 | init:stop(), 441 | State #state { status = shutdown }. 442 | 443 | noreply(State = #state { status = shutdown }) -> 444 | {stop, normal, State}; 445 | noreply(State) -> 446 | {noreply, State}. 447 | 448 | reply(Reply, State = #state { status = shutdown }) -> 449 | {stop, normal, Reply, State}; 450 | reply(Reply, State) -> 451 | {reply, Reply, State}. 452 | 453 | %%---------------------------------------------------------------------------- 454 | %% Changing cluster config 455 | %%---------------------------------------------------------------------------- 456 | 457 | begin_transition(NewConfig, State = #state { config = Config }) -> 458 | case rabbit_clusterer_config:contains_node(node(), NewConfig) of 459 | false -> process_transitioner_response({shutdown, NewConfig}, State); 460 | true -> begin_transition( 461 | rabbit_clusterer_config:is_compatible(NewConfig, Config), 462 | rabbit_clusterer_config:transfer_node_ids(Config, NewConfig), 463 | State) 464 | end. 465 | 466 | begin_transition(true, NewConfig, State = #state { status = ready, 467 | node_id = NodeID }) -> 468 | ok = rabbit_clusterer_config:store_internal(NodeID, NewConfig), 469 | error_logger:info_msg( 470 | "Clusterer seemlessly transitioned to new configuration:~n~p~n", 471 | [rabbit_clusterer_config:to_proplist(NodeID, NewConfig)]), 472 | update_monitoring(State #state { config = NewConfig }); 473 | begin_transition(false, NewConfig, State = #state { status = ready }) -> 474 | ok = stop_rabbit(), 475 | join_or_rejoin(join, NewConfig, State); 476 | begin_transition(true, NewConfig, State) -> 477 | join_or_rejoin(rejoin, NewConfig, State); 478 | begin_transition(false, NewConfig, State) -> 479 | join_or_rejoin(join, NewConfig, State). 480 | 481 | join_or_rejoin(TKind, NewConfig, State = #state { node_id = NodeID, 482 | nodes = Nodes, 483 | last_boot_failed = LBF }) -> 484 | ok = send_new_config(NewConfig, Nodes), 485 | %% Wipe out alive_mrefs and dead so that if we get DOWN's we don't 486 | %% care about them. 487 | {Comms, State1} = fresh_comms(State #state { alive_mrefs = [], 488 | dead = [], 489 | nodes = [] }), 490 | process_transitioner_response( 491 | rabbit_clusterer_transitioner:init(TKind, NodeID, NewConfig, LBF, Comms), 492 | set_status({transitioner, TKind}, State1)). 493 | 494 | transitioner_event(Event, State = #state { status = {transitioner, _TKind}, 495 | transitioner_state = TState }) -> 496 | process_transitioner_response( 497 | rabbit_clusterer_transitioner:event(Event, TState), State). 498 | 499 | process_transitioner_response({continue, TState}, State) -> 500 | State #state { transitioner_state = TState }; 501 | process_transitioner_response({SuccessOrShutdown, ConfigNew}, 502 | State = #state { node_id = NodeID }) 503 | when SuccessOrShutdown =:= success orelse SuccessOrShutdown =:= shutdown -> 504 | %% Both success and shutdown are treated the same as they're exit 505 | %% nodes from the states of the transitioners. If we've had a 506 | %% config applied to us that tells us to shutdown, we must record 507 | %% that config, otherwise we can later be restarted and try to 508 | %% start up with an outdated config. 509 | ok = rabbit_clusterer_config:store_internal(NodeID, ConfigNew), 510 | State1 = stop_comms(State #state { transitioner_state = undefined, 511 | config = ConfigNew }), 512 | case SuccessOrShutdown of 513 | success -> %% Wait for the ready transition before updating monitors 514 | set_status(booting, State1); 515 | shutdown -> set_status(shutdown, stop_monitoring(State1)) 516 | end; 517 | process_transitioner_response({config_changed, ConfigNew}, State) -> 518 | %% begin_transition relies on unmerged configs, so don't merge 519 | %% through here. 520 | begin_transition(ConfigNew, State); 521 | process_transitioner_response({sleep, Delay, Event, TState}, State) -> 522 | erlang:send_after(Delay, self(), {transitioner_delay, Event}), 523 | State #state { transitioner_state = TState }; 524 | process_transitioner_response({invalid_config, Config}, 525 | State = #state { node_id = NodeID }) -> 526 | %% An invalid config was detected somewhere. We shut ourselves 527 | %% down, but we do not write out the config. Do not 528 | %% update_monitoring either. 529 | State1 = stop_comms(State #state { transitioner_state = undefined }), 530 | error_logger:info_msg("Multiple different configurations with equal " 531 | "version numbers detected. Shutting down.~n~p~n", 532 | [rabbit_clusterer_config:to_proplist( 533 | NodeID, Config)]), 534 | set_status(shutdown, State1). 535 | 536 | fresh_comms(State) -> 537 | State1 = stop_comms(State), 538 | {ok, Token} = rabbit_clusterer_comms_sup:start_comms(), 539 | {Token, State1 #state { comms = Token }}. 540 | 541 | stop_comms(State = #state { comms = undefined }) -> 542 | State; 543 | stop_comms(State = #state { comms = Token }) -> 544 | ok = rabbit_clusterer_comms:stop(Token), 545 | State #state { comms = undefined }. 546 | 547 | %%---------------------------------------------------------------------------- 548 | %% Helpers 549 | %%---------------------------------------------------------------------------- 550 | 551 | stop_rabbit() -> 552 | %% This is not idempotent and always assumes that Rabbit should be 553 | %% running (or at least booting) before being called. 554 | error_logger:info_msg("Clusterer stopping Rabbit.~n"), 555 | ok = rabbit:await_startup(), 556 | ok = rabbit_clusterer_utils:stop_rabbit(), 557 | ok = rabbit_clusterer_utils:stop_mnesia(), 558 | ok. 559 | 560 | update_monitoring(State = #state { config = ConfigNew, 561 | nodes = NodesOld }) -> 562 | State1 = stop_monitoring(State), 563 | NodesNew = rabbit_clusterer_config:nodenames(ConfigNew) -- [node()], 564 | ok = send_new_config(ConfigNew, NodesNew -- NodesOld), 565 | AliveNew = [erlang:monitor(process, {?SERVER, N}) || N <- NodesNew], 566 | State1 #state { nodes = NodesNew, 567 | alive_mrefs = AliveNew}. 568 | 569 | stop_monitoring(State = #state { config = ConfigNew, 570 | nodes = NodesOld, 571 | alive_mrefs = AliveOld }) -> 572 | ok = send_new_config(ConfigNew, NodesOld), 573 | [erlang:demonitor(MRef) || MRef <- AliveOld], 574 | State #state { nodes = [], 575 | alive_mrefs = [], 576 | dead = [], 577 | poke_timer_ref = undefined }. 578 | 579 | ensure_poke_timer(State = #state { poke_timer_ref = undefined }) -> 580 | %% TODO: justify 2000 581 | State #state { poke_timer_ref = 582 | erlang:send_after(2000, self(), poke_the_dead) }; 583 | ensure_poke_timer(State) -> 584 | State. 585 | -------------------------------------------------------------------------------- /src/rabbit_clusterer_transitioner.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% https://www.mozilla.org/MPL/1.1/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developer of the Original Code is Pivotal Software, Inc. 14 | %% Portions created by the Initial Developer are Copyright (C) 2013-2016 15 | %% Pivotal Software, Inc. All Rights Reserved. 16 | 17 | -module(rabbit_clusterer_transitioner). 18 | 19 | -export([init/5, event/2]). 20 | 21 | -record(state, { kind, status, node_id, config, comms, awaiting, eliminable }). 22 | 23 | %% Concerns for join: 24 | %% 25 | %% We need to figure out what our peers are doing. If any of them are 26 | %% up and running we can just join in with them. The only other case 27 | %% we care about is when everyone in the cluster config is alive 28 | %% (i.e. no BadNodes) and everyone is joining, just like us. In this 29 | %% case we know that there's no one with knowledge that must be 30 | %% preserved, so we elect a leader (based on gospel, though in this 31 | %% case it's not actually necessary to pay attention to gospel, it's 32 | %% just an easy and unambiguous decider). The leader then comes up on 33 | %% its own and everyone else waits for them to become ready, and then 34 | %% syncs. 35 | %% 36 | %% In all other cases (i.e. there are nodes rejoining etc) then we 37 | %% just wait and try again as we should be guaranteed to end up in 38 | %% some state with some nodes up and running and we can then sync to 39 | %% them. 40 | %% 41 | %% 42 | %% Concerns for rejoin: 43 | %% 44 | %% - Cluster could have grown or shrunk since we last saw it. 45 | %% 46 | %% - We want to avoid the timeout on mnesia:wait_for_tables, so we 47 | %% need to manage the "dependencies" ourselves. 48 | %% 49 | %% - The disk-nodes-running-when-we-last-shutdown result can be 50 | %% satisfied by any one of those nodes managing to start up. It's 51 | %% certainly not that we depend on *all* of the nodes in there, mearly 52 | %% *any*. 53 | %% 54 | %% - We try to shrink that set as much as possible. If we can get it 55 | %% down to the empty set then we can consider ourselves the "winner" 56 | %% and can start up without waiting for anyone else. 57 | %% 58 | %% - We can remove a node N from that set if: 59 | %% a) We can show that N (transitively) depends on us (i.e. we have 60 | %% a cycle) and its other dependencies we can also disregard. 61 | %% b) We can show that N is currently joining and not 62 | %% rejoining. Thus it has been reset and we're witnessing hostname 63 | %% reuse. In this case we must ignore N: if we don't then there's 64 | %% a risk all the rejoining nodes decide to depend on N, and N (as 65 | %% it's joining, not rejoining) waits for everyone else. Thus 66 | %% deadlock. 67 | %% 68 | %% It's tempting to consider a generalisation of (b) where if we see 69 | %% that we depend on a node that is currently rejoining but has a 70 | %% different node id than what we were expecting then it must have 71 | %% been reset since we last saw it and so we shouldn't depend on 72 | %% it. However, this is wrong: the fact that it's rejoining shows that 73 | %% it managed to join (and then be stopped) the cluster after we last 74 | %% saw it. Thus it still has more up-to-date information than us, so 75 | %% we should still depend on it. In this case it should also follow 76 | %% that (a) won't hold for such an N either. 77 | %% 78 | %% Both (a) and (b) require that we can communicate with N. Thus if we 79 | %% have a dependency on a node we can't contact then we can't 80 | %% eliminate it as a dependency, so we just have to wait for either 81 | %% said node to come up, or for someone else to determine they can 82 | %% start. 83 | %% 84 | %% The problem with the cluster shrinking is that we have the 85 | %% possibility of multiple leaders. If A and B both depend on C and 86 | %% the cluster shrinks, losing C, then A and B could both come up, 87 | %% learn of the loss of C and thus declare themselves the leader. It 88 | %% would be possible for both A and B to have *only* C in their 89 | %% initial set of disk-nodes-running-when-we-last-shutdown (in 90 | %% general, the act of adding a node to a cluster and all the nodes 91 | %% rewriting their nodes-running file is non-atomic so we need to be 92 | %% as accomodating as possible here) so neither would feel necessary 93 | %% to wait for each other. Consequently, we have to have some locking 94 | %% to make sure that we don't have multiple leaders (which could cause 95 | %% an mnesia fail-to-merge issue). The rule about locking is that you 96 | %% have to take locks in the same order, and then you can't 97 | %% deadlock. So, we sort all the nodes from the cluster config, and 98 | %% grab locks in order. If a node is down, that's treated as being ok 99 | %% (i.e. you don't abort). You also have to lock yourself. Only when 100 | %% you have all the locks can you actually boot. Why do we lock 101 | %% everyone? Because we can't agree on who to lock. If you tried to 102 | %% pick someone (eg minimum node) then you'd find that could change as 103 | %% other nodes come up or go down, so it's not stable. So lock 104 | %% everyone. 105 | %% 106 | %% Unsurprisingly, this gets a bit more complex. The lock is the Comms 107 | %% Pid, and the lock is taken by Comms Pids. The lock monitors the 108 | %% taker. This is elegant in that if A locks A and B, and then a new 109 | %% cluster config is applied to A then A's comms will be restarted, so 110 | %% the old comms Pid will die, so the locks are released. Similarly, 111 | %% on success, the comms will be stopped, so the lock releases. This 112 | %% is simple and nice. Where it gets slightly more complex is what 113 | %% happens if A locks A and B and then a new config is applied to 114 | %% B. If that were to happen then that would clearly invalidate the 115 | %% config that A is also using. B will forward new config to A too. B 116 | %% and A will both restart their comms, in any order. If B goes first, 117 | %% we don't want B to be held up, so as B will get a new comms, it 118 | %% also gets a new lock as the lock is the comms Pid itself. So when B 119 | %% restarts its comms, it's unlocking itself too. 120 | 121 | -define(MINI_SLEEP, 500). 122 | -define(BIG_SLEEP, 5000). 123 | 124 | %%---------------------------------------------------------------------------- 125 | %% API 126 | %%---------------------------------------------------------------------------- 127 | 128 | init(Kind, NodeID, Config, PreSleep, Comms) -> 129 | case rabbit_clusterer_config:is_singleton(node(), Config) of 130 | true -> ok = rabbit_clusterer_utils:make_mnesia_singleton( 131 | Kind =:= join andalso 132 | rabbit_clusterer_config:gospel(Config) =:= reset), 133 | {success, Config}; 134 | false -> State = #state { kind = Kind, 135 | node_id = NodeID, 136 | config = Config, 137 | comms = Comms, 138 | awaiting = undefined, 139 | eliminable = [] }, 140 | %% If the last boot failed, we want to sleep for a 141 | %% while before trying another boot. This is (a) 142 | %% generally polite and avoids spinning too rapidly; 143 | %% (b) gives a period of time during which we're 144 | %% lockable by other nodes and generally receptive to 145 | %% other nodes trying to do things. This is important 146 | %% in the case of upgrades: we might be a node trying 147 | %% to join in with an existing cluster but we're on a 148 | %% new version, so the rabbit boot 149 | %% fails. Consequently, as the other nodes get 150 | %% updated we need to cope with them potentially 151 | %% having different configs and needing to 152 | %% communicate with us, thus we need to be responsive 153 | %% during this sleep period. 154 | case PreSleep of 155 | true -> delayed_request_status(?BIG_SLEEP, State); 156 | false -> request_status(State) 157 | end 158 | end. 159 | 160 | event({comms, {Replies, BadNodes}}, State = #state { kind = Kind, 161 | status = awaiting_status, 162 | node_id = NodeID, 163 | config = Config }) -> 164 | case analyse_node_statuses(Replies, NodeID, Config) of 165 | invalid -> 166 | {invalid_config, Config}; 167 | {Youngest, OlderThanUs, StatusDict} -> 168 | case rabbit_clusterer_config:compare(Youngest, Config) of 169 | coeval when OlderThanUs =:= [] -> 170 | %% We have the most up to date config. But we must 171 | %% use Youngest from here on as it has the updated 172 | %% node_ids map. 173 | (case Kind of 174 | join -> fun maybe_join/3; 175 | rejoin -> fun maybe_rejoin/3 176 | end)(BadNodes, StatusDict, 177 | State #state { config = Youngest }); 178 | coeval -> 179 | %% Update nodes which are older than us. In 180 | %% reality they're likely to receive lots of the 181 | %% same update from everyone else, but meh, 182 | %% they'll just have to cope. 183 | %% 184 | %% We deliberately do this cast out of Comms to 185 | %% preserve ordering of messages. 186 | Msg = rabbit_clusterer_coordinator:template_new_config( 187 | Youngest), 188 | ok = rabbit_clusterer_comms:multi_cast( 189 | OlderThanUs, Msg, State #state.comms), 190 | request_status(State #state { config = Youngest }); 191 | younger -> %% cannot be older or invalid 192 | {config_changed, Youngest} 193 | end 194 | end; 195 | event({comms, {Replies, BadNodes}}, State = #state { kind = rejoin, 196 | status = awaiting_awaiting, 197 | awaiting = MyAwaiting }) -> 198 | InvalidOrUndef = [N || {N, Res} <- Replies, 199 | Res =:= invalid orelse Res =:= undefined ], 200 | case {BadNodes, InvalidOrUndef} of 201 | {[], []} -> 202 | MyNode = node(), 203 | G = digraph:new(), 204 | try 205 | %% To win, we need to find that we are in a cycle, and 206 | %% that cycle, treated as a single unit, has no 207 | %% outgoing edges. If we detect this, then we can 208 | %% start to grab locks. In all other cases, we just go 209 | %% back around. 210 | %% Add all vertices. This is slightly harder than 211 | %% you'd imagine because we could have that a node 212 | %% depends on a node which we've not queried yet 213 | %% (because it's a badnode). 214 | Replies1 = [{MyNode, MyAwaiting} | Replies], 215 | Nodes = lists:usort( 216 | lists:append( 217 | [[N|Awaiting] || {N, Awaiting} <- Replies1])), 218 | [digraph:add_vertex(G, N) || N <- Nodes], 219 | [digraph:add_edge(G, N, T) || {N, Awaiting} <- Replies1, 220 | T <- Awaiting], 221 | %% We want to use the 222 | %% digraph_utils:cyclic_strong_components/1 call as it 223 | %% captures the general case nicely: it returns a list 224 | %% of groups of nodes where each group is a set of 225 | %% nodes which are dependent on each other. However, 226 | %% for simple graphs with no loops at all, this call 227 | %% can return an empty list. Rather than detect and 228 | %% special case for that, we instead make every node 229 | %% dependent on itself. For simple graphs, this will 230 | %% result in each group returned being a single node. 231 | [digraph:add_edge(G, N, N) || N <- Nodes], 232 | CSC = digraph_utils:cyclic_strong_components(G), 233 | [OurComponent] = [C || C <- CSC, lists:member(MyNode, C)], 234 | %% Detect if there are any outbound edges from this 235 | %% component 236 | case [N || V <- OurComponent, 237 | N <- digraph:out_neighbours(G, V), 238 | not lists:member(N, OurComponent) ] of 239 | [] -> %% We appear to be in the "root" 240 | %% component. Begin the fight. 241 | lock_nodes(State); 242 | _ -> delayed_request_status(State) 243 | end 244 | after 245 | true = digraph:delete(G) 246 | end; 247 | _ -> 248 | %% Go around again... 249 | delayed_request_status(State) 250 | end; 251 | 252 | event({comms, lock_rejected}, State = #state { kind = rejoin, 253 | status = awaiting_lock }) -> 254 | %% Oh, well let's just wait and try again. Something must have 255 | %% changed. 256 | delayed_request_status(State); 257 | event({comms, lock_ok}, #state { kind = rejoin, 258 | status = awaiting_lock, 259 | config = Config, 260 | eliminable = Eliminable }) -> 261 | ok = rabbit_clusterer_utils:eliminate_mnesia_dependencies(Eliminable), 262 | {success, Config}; 263 | 264 | event({request_awaiting, Fun}, State = #state { kind = rejoin, 265 | awaiting = Awaiting }) -> 266 | ok = Fun(Awaiting), 267 | {continue, State}; 268 | 269 | event({delayed_request_status, Ref}, 270 | State = #state { status = {delayed_request_status, Ref} }) -> 271 | request_status(State); 272 | event({delayed_request_status, _Ref}, State) -> 273 | %% ignore it 274 | {continue, State}; 275 | 276 | event({request_config, NewNode, NewNodeID, Fun}, 277 | State = #state { node_id = NodeID, config = Config }) -> 278 | %% Right here we could have a node that we're dependent on being 279 | %% reset. 280 | {NodeIDChanged, Config1} = 281 | rabbit_clusterer_config:add_node_id(NewNode, NewNodeID, NodeID, Config), 282 | ok = Fun(Config1), 283 | case NodeIDChanged of 284 | true -> {config_changed, Config1}; 285 | false -> {continue, State #state { config = Config1 }} 286 | end; 287 | 288 | event({new_config, ConfigRemote, Node}, 289 | State = #state { node_id = NodeID, config = Config }) -> 290 | case rabbit_clusterer_config:compare(ConfigRemote, Config) of 291 | younger -> %% Here we also need to make sure we forward this to 292 | %% anyone we're currently trying to cluster with: 293 | %% the fact that we're about to change which config 294 | %% we're using clearly invalidates our current 295 | %% config and it's not just us using this 296 | %% config. We send from here and not comms as we're 297 | %% about to kill off comms anyway so there's no 298 | %% ordering issues to consider. 299 | ok = rabbit_clusterer_coordinator:send_new_config( 300 | ConfigRemote, 301 | rabbit_clusterer_config:nodenames(Config) -- 302 | [node(), Node]), 303 | {config_changed, ConfigRemote}; 304 | older -> ok = rabbit_clusterer_coordinator:send_new_config(Config, Node), 305 | {continue, State}; 306 | coeval -> Config1 = rabbit_clusterer_config:update_node_id( 307 | Node, ConfigRemote, NodeID, Config), 308 | {continue, State #state { config = Config1 }}; 309 | invalid -> %% ignore 310 | {continue, State} 311 | end. 312 | 313 | %%---------------------------------------------------------------------------- 314 | %% 'join' helpers 315 | %%---------------------------------------------------------------------------- 316 | 317 | maybe_join(BadNodes, StatusDict, State = #state { config = Config }) -> 318 | %% Everyone here has the same config, thus Statuses can be trusted 319 | %% as the statuses of all nodes trying to achieve *this* config 320 | %% and not some other config. 321 | %% 322 | %% Expected entries in Statuses are: 323 | %% - preboot: 324 | %% Clusterer has started, but the boot step not yet hit 325 | %% - {transitioner, join}: 326 | %% it's joining some cluster - blocked in Clusterer 327 | %% - {transitioner, rejoin}: 328 | %% it's rejoining some cluster - blocked in Clusterer 329 | %% - booting: 330 | %% Clusterer is happy and the rest of rabbit is currently 331 | %% booting 332 | %% - ready: 333 | %% Clusterer is happy and enough of rabbit has booted 334 | Statuses = dict:fetch_keys(StatusDict), 335 | ReadyNodes = lists:member(ready, Statuses), 336 | AllJoining = [{transitioner, join}] =:= Statuses, 337 | %% ReadyNodes are nodes that are in this cluster (well, they could 338 | %% be in any cluster, but seeing as we've checked everyone has the 339 | %% same cluster config as us, we're sure it really is *this* 340 | %% cluster) and are fully up and running. 341 | %% 342 | %% If ReadyNodes exists, we should just reset and join into that, 343 | %% and ignore anything about gospel: it's possible that gospel is 344 | %% {node, node()} but that, in combination with ReadyNodes, 345 | %% suggests that the cluster previously existed with an older 346 | %% version of 'us': we must have been cleaned out and restarted 347 | %% (aka host-name reuse). Here we don't care about BadNodes. 348 | %% 349 | %% If ReadyNodes doesn't exist we can only safely proceed if there 350 | %% are no BadNodes, and everyone is joining (rather than 351 | %% rejoining) i.e. transitioner kind for all is 'join'. In all 352 | %% other cases, we must wait: 353 | %% 354 | %% - If BadNodes =/= [] then there may be a node that was cleanly 355 | %% shutdown last with what we think is the current config and so 356 | %% if it was started up, it would rejoin (itself, sort of) and 357 | %% then become ready: we could then sync to it. 358 | %% 359 | %% - If the transitioner kind is not all 'join' then some other 360 | %% nodes must be rejoining. We should wait for them to succeed (or 361 | %% at least change state) because if they do succeed we should 362 | %% sync off them. 363 | case ReadyNodes of 364 | true -> 365 | ok = cluster_with_nodes(Config), 366 | {success, Config}; 367 | false when AllJoining andalso BadNodes =:= [] -> 368 | case maybe_form_new_cluster(Config) of 369 | true -> {success, Config}; 370 | false -> delayed_request_status(State) 371 | end; 372 | false -> 373 | delayed_request_status(State) 374 | end. 375 | 376 | cluster_with_nodes(Config) -> 377 | ok = rabbit_clusterer_utils:make_mnesia_singleton(true), 378 | ok = rabbit_clusterer_utils:configure_cluster( 379 | rabbit_clusterer_config:nodenames(Config), 380 | rabbit_clusterer_config:node_type(node(), Config)). 381 | 382 | maybe_form_new_cluster(Config) -> 383 | %% Is it necessary to limit the election of a leader to disc 384 | %% nodes? No: we're only here when we have everyone in the cluster 385 | %% joining, so we know we wouldn't be creating a RAM-node-only 386 | %% cluster. Given that we enforce that the cluster config must 387 | %% have at least one disc node in it anyway, it's safe to allow a 388 | %% RAM node to lead. However, I'm not 100% sure that the rest of 389 | %% rabbit/mnesia likes that, so we leave in the 'disc' 390 | %% filter. This might get reviewed in QA. 391 | MyNode = node(), 392 | {Wipe, Leader} = 393 | case rabbit_clusterer_config:gospel(Config) of 394 | {node, Node} -> {Node =/= MyNode, Node}; 395 | reset -> {true, lists:min(rabbit_clusterer_config:disc_nodenames(Config))} 396 | end, 397 | case Leader of 398 | MyNode -> ok = rabbit_clusterer_utils:make_mnesia_singleton(Wipe), 399 | Type = rabbit_clusterer_config:node_type(MyNode, Config), 400 | ok = rabbit_clusterer_utils:configure_cluster([MyNode], Type), 401 | true; 402 | _ -> false 403 | end. 404 | 405 | %%---------------------------------------------------------------------------- 406 | %% 'rejoin' helpers 407 | %%---------------------------------------------------------------------------- 408 | 409 | collect_dependency_graph(RejoiningNodes, State = #state { comms = Comms }) -> 410 | ok = rabbit_clusterer_comms:multi_call( 411 | RejoiningNodes, {{transitioner, rejoin}, request_awaiting}, Comms), 412 | {continue, State #state { status = awaiting_awaiting }}. 413 | 414 | maybe_rejoin(BadNodes, StatusDict, State = #state { config = Config }) -> 415 | %% Everyone who's here is on the same config as us. If anyone is 416 | %% running then we can just declare success and trust mnesia to 417 | %% join into them. 418 | MyNode = node(), 419 | SomeoneRunning = dict:is_key(ready, StatusDict), 420 | IsRam = ram =:= rabbit_clusterer_config:node_type(MyNode, Config), 421 | if 422 | SomeoneRunning -> 423 | %% Someone is running, so we should be able to cluster to 424 | %% them. 425 | {success, Config}; 426 | IsRam -> 427 | %% We're ram; can't do anything but wait for someone else 428 | delayed_request_status(State); 429 | true -> 430 | {All, _Disc, Running} = rabbit_node_monitor:read_cluster_status(), 431 | DiscSet = ordsets:from_list( 432 | rabbit_clusterer_config:disc_nodenames(Config)), 433 | %% Intersect with Running and remove MyNode 434 | DiscRunningSet = 435 | ordsets:del_element( 436 | MyNode, ordsets:intersection( 437 | DiscSet, ordsets:from_list(Running))), 438 | BadNodesSet = ordsets:from_list(BadNodes), 439 | Joining = case dict:find({transitioner, join}, StatusDict) of 440 | {ok, List} -> List; 441 | error -> [] 442 | end, 443 | JoiningSet = ordsets:from_list(Joining), 444 | NotJoiningSet = ordsets:subtract(DiscRunningSet, JoiningSet), 445 | DeletedSet = 446 | ordsets:subtract( 447 | ordsets:from_list(All), 448 | ordsets:from_list(rabbit_clusterer_config:nodenames(Config))), 449 | EliminableSet = ordsets:union(JoiningSet, DeletedSet), 450 | State1 = State #state { awaiting = ordsets:to_list(NotJoiningSet), 451 | eliminable = ordsets:to_list(EliminableSet) }, 452 | case ordsets:is_disjoint(DiscRunningSet, BadNodesSet) of 453 | true -> 454 | %% Everyone we depend on is alive in some form. 455 | case {ordsets:size(NotJoiningSet), 456 | dict:find({transitioner, rejoin}, StatusDict)} of 457 | {0, _} -> 458 | %% We win! 459 | lock_nodes(State1); 460 | {_, error} -> 461 | %% No one else is rejoining, nothing we 462 | %% can do but wait. 463 | delayed_request_status(State1); 464 | {_, {ok, Rejoining}} -> 465 | collect_dependency_graph(Rejoining, State1) 466 | end; 467 | false -> 468 | %% We might depend on a node in BadNodes. We must 469 | %% wait for it to appear. 470 | delayed_request_status(State1) 471 | end 472 | end. 473 | 474 | lock_nodes(State = #state { comms = Comms, config = Config }) -> 475 | ok = rabbit_clusterer_comms:lock_nodes( 476 | rabbit_clusterer_config:nodenames(Config), Comms), 477 | {continue, State #state { status = awaiting_lock }}. 478 | 479 | %%---------------------------------------------------------------------------- 480 | %% common helpers 481 | %%---------------------------------------------------------------------------- 482 | 483 | request_status(State = #state { node_id = NodeID, 484 | config = Config, 485 | comms = Comms }) -> 486 | MyNode = node(), 487 | NodesNotUs = rabbit_clusterer_config:nodenames(Config) -- [MyNode], 488 | ok = rabbit_clusterer_comms:multi_call( 489 | NodesNotUs, {request_status, MyNode, NodeID}, Comms), 490 | {continue, State #state { status = awaiting_status }}. 491 | 492 | delayed_request_status(State) -> 493 | delayed_request_status(?MINI_SLEEP, State). 494 | 495 | delayed_request_status(Sleep, State) -> 496 | %% TODO: work out some sensible timeout value 497 | Ref = make_ref(), 498 | {sleep, Sleep, {delayed_request_status, Ref}, 499 | State #state { status = {delayed_request_status, Ref} }}. 500 | 501 | %% The input is a k/v list of nodes and their config+status tuples (or 502 | %% the atom 'preboot' if the node is in the process of starting up), 503 | %% plus the local node's id and config. 504 | %% 505 | %% Returns a tuple containing 506 | %% 1) the youngest config of all, with an enriched node_ids map 507 | %% 2) a list of nodes operating with configs older than the local node's 508 | %% 3) a dict mapping status to lists of nodes 509 | analyse_node_statuses(NodeConfigStatusList, NodeID, Config) -> 510 | case lists:foldr( 511 | fun (Elem, Acc) -> analyse_node_status(Config, Elem, Acc) end, 512 | {Config, [], [], dict:new()}, NodeConfigStatusList) of 513 | invalid -> 514 | invalid; 515 | {Youngest, Older, IDs, Status} -> 516 | %% We want to make sure anything that we had in Config 517 | %% that does not exist in IDs is still maintained. 518 | YoungestOrigMap = rabbit_clusterer_config:transfer_node_ids( 519 | Config, Youngest), 520 | {rabbit_clusterer_config:add_node_ids(IDs, NodeID, YoungestOrigMap), 521 | Older, Status} 522 | end. 523 | 524 | analyse_node_status(_Config, _Reply, invalid) -> 525 | invalid; 526 | analyse_node_status(_Config, {Node, preboot}, 527 | {YoungestN, OlderN, IDsN, StatusesN}) -> 528 | {YoungestN, OlderN, IDsN, dict:append(preboot, Node, StatusesN)}; 529 | analyse_node_status(Config, {Node, {ConfigN, StatusN}}, 530 | {YoungestN, OlderN, IDsN, StatusesN}) -> 531 | case {rabbit_clusterer_config:compare(ConfigN, YoungestN), 532 | rabbit_clusterer_config:compare(ConfigN, Config)} of 533 | {invalid, _} -> invalid; 534 | {_, invalid} -> invalid; 535 | {VsYoungest, VsConfig} -> {case VsYoungest of 536 | younger -> ConfigN; 537 | _ -> YoungestN 538 | end, 539 | case VsConfig of 540 | older -> [Node | OlderN]; 541 | _ -> OlderN 542 | end, 543 | [{Node, rabbit_clusterer_config:node_id( 544 | Node, ConfigN)} | IDsN], 545 | dict:append(StatusN, Node, StatusesN)} 546 | end. 547 | --------------------------------------------------------------------------------