├── .formatter.exs ├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── elixir.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── lib ├── logger.ex ├── strategy │ ├── dns_poll.ex │ ├── epmd.ex │ ├── erlang_hosts.ex │ ├── gossip.ex │ ├── kubernetes.ex │ ├── kubernetes_dns.ex │ ├── kubernetes_dns_srv.ex │ ├── local_epmd.ex │ ├── rancher.ex │ ├── state.ex │ └── strategy.ex └── supervisor.ex ├── mix.exs ├── mix.lock ├── priv └── endpoint-viewer.yaml └── test ├── app_test.exs ├── dns_poll_test.exs ├── epmd_test.exs ├── fixtures ├── kubernetes │ └── service_account │ │ └── .gitkeep └── vcr_cassettes │ ├── kubernetes.json │ └── kubernetes_pods.json ├── gossip_test.exs ├── kubernetes_dns_srv_test.exs ├── kubernetes_dns_test.exs ├── kubernetes_test.exs ├── logger_test.exs ├── strategy_test.exs ├── support ├── exvcr.ex ├── nodes.ex └── telemetry.ex └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Steps to reproduce 2 | 3 | - Configuration Used 4 | - Strategy Used 5 | - Errors/Incorrect Behaviour Encountered 6 | 7 | ### Description of issue 8 | 9 | - What are the expected results? 10 | - Is the documentation incorrect? 11 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Summary of changes 2 | 3 | I'll review the commits, so I mostly want to understand the "why" rather than the "what" 4 | 5 | ### Checklist 6 | 7 | - [ ] New functions have typespecs, changed functions were updated 8 | - [ ] Same for documentation, including moduledocs 9 | - [ ] Tests were added or updated to cover changes 10 | - [ ] Commits were squashed into a single coherent commit 11 | - [ ] Notes added to CHANGELOG file which describe changes at a high-level 12 | -------------------------------------------------------------------------------- /.github/workflows/elixir.yml: -------------------------------------------------------------------------------- 1 | name: elixir 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-20.04 12 | env: 13 | MIX_ENV: test 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | include: 18 | - pair: 19 | elixir: "1.13" 20 | otp: "22" 21 | - pair: 22 | elixir: "1.17" 23 | otp: "27" 24 | lint: lint 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: erlef/setup-beam@v1 29 | with: 30 | otp-version: ${{matrix.pair.otp}} 31 | elixir-version: ${{matrix.pair.elixir}} 32 | 33 | - uses: actions/cache@v4 34 | with: 35 | path: | 36 | deps 37 | _build 38 | key: ${{ runner.os }}-mix-${{matrix.pair.elixir}}-${{matrix.pair.otp}}-${{ hashFiles('**/mix.lock') }} 39 | restore-keys: | 40 | ${{ runner.os }}-mix-${{matrix.pair.elixir}}-${{matrix.pair.otp}}- 41 | 42 | - run: mix deps.get --only test 43 | 44 | - run: mix format --check-formatted 45 | if: ${{ matrix.lint }} 46 | 47 | - run: mix deps.get && mix deps.unlock --check-unused 48 | if: ${{ matrix.lint }} 49 | 50 | - run: mix deps.compile 51 | 52 | # TODO: disable for now due to upstream error with ExVCR 53 | # warning: redefining module ExVCR.Adapter.Httpc.Converter (current 54 | # version loaded from 55 | # _build/test/lib/exvcr/ebin/Elixir.ExVCR.Adapter.Httpc.Converter.beam) 56 | 57 | # - run: mix compile --warnings-as-errors 58 | # if: ${{ matrix.lint }} 59 | 60 | - run: mix test 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | libcluster-*.tar 24 | 25 | # Temporary files for e.g. tests. 26 | /tmp/ 27 | 28 | # Misc. 29 | /priv/test/service_account/* 30 | !/priv/test/service_account/.gitkeep 31 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | - Add `kubernetes_use_cached_resources` option to Kubernetes strategy 6 | 7 | ## 3.4.1 8 | 9 | - Use new cypher names 10 | - Allow Epmd strategy to reconnect after connection failures 11 | - Detect Self Signed Certificate Authority for Kubernetes Strategy 12 | - Remove calls to deprecated `Logger.warn/2` 13 | - Correct misspell of 'Empd' -> 'Epmd' in `Cluster.Strategy.LocalEpmd` moduledoc 14 | 15 | ## 3.4.0 16 | 17 | ### Added 18 | 19 | - Telemetry events added for tracking node connects and disconnects 20 | 21 | ### 3.3.0 22 | 23 | ### Changed 24 | 25 | - Default multicast address is now 233.252.1.32, was 230.1.1.251, [commit](https://github.com/bitwalker/libcluster/commit/449a65e14f152a83a0f8ee371f05743610cd292f) 26 | 27 | 28 | ### 2.3.0 29 | 30 | ### Added 31 | 32 | - Clustering strategy for the Rancher container platform (see: https://github.com/rancher/rancher) 33 | - LocalEpmd strategy that uses epmd to discover nodes on the local host 34 | - Gossip strategy multicast interface is used for adding multicast membership 35 | 36 | ## 2.0.0 37 | 38 | ### Added 39 | 40 | - Configurable `connect` and `disconnect` options for implementing strategies 41 | on top of custom topologies 42 | - The ability to start libcluster for more than a single topology 43 | - Added `polling_interval` option to Kubernetes strategy 44 | - Added ability to specify a list of hosts for the Epmd strategy to connect to on start 45 | 46 | ### Removed 47 | 48 | - Cluster.Events module, as it was redundant and unused 49 | 50 | ### Changed 51 | 52 | - Configuration format has changed significantly, please review the docs 53 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | ## Copyright (c) 2016 Paul Schoenfelder 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libcluster 2 | 3 | [![Build Status](https://github.com/bitwalker/libcluster/workflows/elixir/badge.svg?branch=main)](https://github.com/bitwalker/libcluster/actions?query=workflow%3A%22elixir%22+branch%3Amain) 4 | [![Module Version](https://img.shields.io/hexpm/v/libcluster.svg)](https://hex.pm/packages/libcluster) 5 | [![Hex Docs](https://img.shields.io/badge/hex-docs-lightgreen.svg)](https://hexdocs.pm/libcluster/) 6 | [![Total Download](https://img.shields.io/hexpm/dt/libcluster.svg)](https://hex.pm/packages/libcluster) 7 | [![License](https://img.shields.io/hexpm/l/libcluster.svg)](https://github.com/bitwalker/libcluster/blob/main/LICENSE) 8 | [![Last Updated](https://img.shields.io/github/last-commit/bitwalker/libcluster.svg)](https://github.com/bitwalker/libcluster/commits/main) 9 | 10 | This library provides a mechanism for automatically forming clusters of Erlang nodes, with 11 | either static or dynamic node membership. It provides a pluggable "strategy" system, with a variety of strategies 12 | provided out of the box. 13 | 14 | You can find supporting documentation [here](https://hexdocs.pm/libcluster). 15 | 16 | ## Features 17 | 18 | - Automatic cluster formation/healing 19 | - Choice of multiple clustering strategies out of the box: 20 | - Standard Distributed Erlang facilities (e.g. `epmd`, `.hosts.erlang`), which supports IP-based or DNS-based names 21 | - Multicast UDP gossip, using a configurable port/multicast address, 22 | - Kubernetes via its metadata API using via a configurable label selector and 23 | node basename; or alternatively, using DNS. 24 | - Rancher, via its [metadata API][rancher-api] 25 | - Easy to provide your own custom clustering strategies for your specific environment. 26 | - Easy to use provide your own distribution plumbing (i.e. something other than 27 | Distributed Erlang), by implementing a small set of callbacks. This allows 28 | `libcluster` to support projects like 29 | [Partisan](https://github.com/lasp-lang/partisan). 30 | 31 | ## Installation 32 | 33 | ```elixir 34 | defp deps do 35 | [{:libcluster, "~> MAJ.MIN"}] 36 | end 37 | ``` 38 | 39 | You can determine the latest version by running `mix hex.info libcluster` in 40 | your shell, or by going to the `libcluster` [page on Hex.pm](https://hex.pm/packages/libcluster). 41 | 42 | ## Usage 43 | 44 | It is easy to get started using `libcluster`, simply decide which strategy you 45 | want to use to form a cluster, define a topology, and then start the `Cluster.Supervisor` module in 46 | the supervision tree of an application in your Elixir system, as demonstrated below: 47 | 48 | ```elixir 49 | defmodule MyApp.App do 50 | use Application 51 | 52 | def start(_type, _args) do 53 | topologies = [ 54 | example: [ 55 | strategy: Cluster.Strategy.Epmd, 56 | config: [hosts: [:"a@127.0.0.1", :"b@127.0.0.1"]], 57 | ] 58 | ] 59 | children = [ 60 | {Cluster.Supervisor, [topologies, [name: MyApp.ClusterSupervisor]]}, 61 | # ..other children.. 62 | ] 63 | Supervisor.start_link(children, strategy: :one_for_one, name: MyApp.Supervisor) 64 | end 65 | end 66 | ``` 67 | 68 | The following section describes topology configuration in more detail. 69 | 70 | ## Example Configuration 71 | 72 | You can configure `libcluster` either in your Mix config file (`config.exs`) as 73 | shown below, or construct the keyword list structure manually, as shown in the 74 | previous section. Either way, you need to pass the configuration to the 75 | `Cluster.Supervisor` module in it's start arguments. If you prefer to use Mix 76 | config files, then simply use `Application.get_env(:libcluster, :topologies)` to 77 | get the config that `Cluster.Supervisor` expects. 78 | 79 | ```elixir 80 | config :libcluster, 81 | topologies: [ 82 | epmd_example: [ 83 | # The selected clustering strategy. Required. 84 | strategy: Cluster.Strategy.Epmd, 85 | # Configuration for the provided strategy. Optional. 86 | config: [hosts: [:"a@127.0.0.1", :"b@127.0.0.1"]], 87 | # The function to use for connecting nodes. The node 88 | # name will be appended to the argument list. Optional 89 | connect: {:net_kernel, :connect_node, []}, 90 | # The function to use for disconnecting nodes. The node 91 | # name will be appended to the argument list. Optional 92 | disconnect: {:erlang, :disconnect_node, []}, 93 | # The function to use for listing nodes. 94 | # This function must return a list of node names. Optional 95 | list_nodes: {:erlang, :nodes, [:connected]}, 96 | ], 97 | # more topologies can be added ... 98 | gossip_example: [ 99 | # ... 100 | ] 101 | ] 102 | ``` 103 | 104 | ## Strategy Configuration 105 | 106 | For instructions on configuring each strategy included with `libcluster`, please 107 | visit the docs on [HexDocs](https://hexdocs.pm/libcluster), and look at the 108 | module doc for the strategy you want to use. The authoritative documentation for 109 | each strategy is kept up to date with the module implementing it. 110 | 111 | ## Clustering 112 | 113 | You have a handful of choices with regards to cluster management out of the box: 114 | 115 | - `Cluster.Strategy.Epmd`, which relies on `epmd` to connect to a configured set 116 | of hosts. 117 | - `Cluster.Strategy.LocalEpmd`, which relies on `epmd` to connect to discovered 118 | nodes on the local host. 119 | - `Cluster.Strategy.ErlangHosts`, which uses the `.hosts.erlang` file to 120 | determine which hosts to connect to. 121 | - `Cluster.Strategy.Gossip`, which uses multicast UDP to form a cluster between 122 | nodes gossiping a heartbeat. 123 | - `Cluster.Strategy.Kubernetes`, which uses the Kubernetes Metadata API to query 124 | nodes based on a label selector and basename. 125 | - `Cluster.Strategy.Kubernetes.DNS`, which uses DNS to join nodes under a shared 126 | headless service in a given namespace. 127 | - `Cluster.Strategy.Rancher`, which like the Kubernetes strategy, uses a 128 | metadata API to query nodes to cluster with. 129 | 130 | You can also define your own strategy implementation, by implementing the 131 | `Cluster.Strategy` behavior. This behavior expects you to implement a 132 | `start_link/1` callback, optionally overriding `child_spec/1` if needed. You don't necessarily have 133 | to start a process as part of your strategy, but since it's very likely you will need to maintain some state, designing your 134 | strategy as an OTP process (e.g. `GenServer`) is the ideal method, however any 135 | valid OTP process will work. See the `Cluster.Strategy` module for details on 136 | the callbacks you need to implement and the arguments they receive. 137 | 138 | If you do not wish to use the default Erlang distribution protocol, you may provide an alternative means of connecting/ 139 | disconnecting nodes via the `connect` and `disconnect` configuration options, if not using Erlang distribution you must provide a `list_nodes` implementation as well. 140 | They take a `{module, fun, args}` tuple, and append the node name being targeted to the `args` list. How to implement distribution in this way is left as an 141 | exercise for the reader, but I recommend taking a look at the [Firenest](https://github.com/phoenixframework/firenest) project 142 | currently under development. By default, `libcluster` uses Distributed Erlang. 143 | 144 | ### Third-Party Strategies 145 | 146 | The following list of third-party strategy implementations is not comprehensive, 147 | but are known to exist. 148 | 149 | - [libcluster_ec2](https://github.com/kyleaa/libcluster_ec2) - EC2 clustering strategy based on tags 150 | - [libcluster_droplet](https://github.com/jsonmaur/libcluster-droplet) - Digital Ocean Droplet clustering strategy 151 | - [libcluster_consul](https://github.com/team-telnyx/libcluster_consul) - Consul clustering strategy 152 | - [libcluster_postgres](https://github.com/supabase/libcluster_postgres) - Postgres clustering strategy 153 | 154 | ## Copyright and License 155 | 156 | Copyright (c) 2016 Paul Schoenfelder 157 | 158 | This library is MIT licensed. See the 159 | [LICENSE.md](https://github.com/bitwalker/libcluster/blob/master/LICENSE.md) for details. 160 | 161 | [rancher-api]: http://rancher.com/docs/rancher/latest/en/rancher-services/metadata-service/ 162 | -------------------------------------------------------------------------------- /lib/logger.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Logger do 2 | @moduledoc false 3 | require Logger 4 | 5 | def debug(t, msg) do 6 | case Application.get_env(:libcluster, :debug, false) do 7 | dbg when dbg in [nil, false, "false"] -> 8 | :ok 9 | 10 | _ -> 11 | Logger.debug(log_message(t, msg)) 12 | end 13 | end 14 | 15 | def info(t, msg), do: Logger.info(log_message(t, msg)) 16 | 17 | if Version.match?(System.version(), ">= 1.11.0") do 18 | def warn(t, msg), do: Logger.warning(log_message(t, msg)) 19 | else 20 | def warn(t, msg), do: Logger.warn(log_message(t, msg)) 21 | end 22 | 23 | def error(t, msg), do: Logger.error(log_message(t, msg)) 24 | 25 | @compile {:inline, log_message: 2} 26 | defp log_message(t, msg) do 27 | "[libcluster:#{t}] #{msg}" 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/strategy/dns_poll.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.DNSPoll do 2 | @moduledoc """ 3 | Assumes you have nodes that respond to the specified DNS query (A record), and which follow the node name pattern of 4 | `@`. If your setup matches those assumptions, this strategy will periodically poll DNS and connect 5 | all nodes it finds. 6 | 7 | ## Options 8 | 9 | * `poll_interval` - How often to poll in milliseconds (optional; default: 5_000) 10 | * `query` - DNS query to use (required; e.g. "my-app.example.com") 11 | * `node_basename` - The short name of the nodes you wish to connect to (required; e.g. "my-app") 12 | 13 | ## Usage 14 | 15 | config :libcluster, 16 | topologies: [ 17 | dns_poll_example: [ 18 | strategy: #{__MODULE__}, 19 | config: [ 20 | polling_interval: 5_000, 21 | query: "my-app.example.com", 22 | node_basename: "my-app"]]] 23 | """ 24 | 25 | use GenServer 26 | import Cluster.Logger 27 | 28 | alias Cluster.Strategy.State 29 | alias Cluster.Strategy 30 | 31 | @default_polling_interval 5_000 32 | 33 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 34 | 35 | @impl true 36 | def init([%State{meta: nil} = state]) do 37 | init([%State{state | :meta => MapSet.new()}]) 38 | end 39 | 40 | def init([%State{} = state]) do 41 | {:ok, do_poll(state)} 42 | end 43 | 44 | @impl true 45 | def handle_info(:timeout, state), do: handle_info(:poll, state) 46 | def handle_info(:poll, state), do: {:noreply, do_poll(state)} 47 | def handle_info(_, state), do: {:noreply, state} 48 | 49 | defp do_poll( 50 | %State{ 51 | topology: topology, 52 | connect: connect, 53 | disconnect: disconnect, 54 | list_nodes: list_nodes 55 | } = state 56 | ) do 57 | new_nodelist = state |> get_nodes() |> MapSet.new() 58 | removed = MapSet.difference(state.meta, new_nodelist) 59 | 60 | new_nodelist = 61 | case Strategy.disconnect_nodes( 62 | topology, 63 | disconnect, 64 | list_nodes, 65 | MapSet.to_list(removed) 66 | ) do 67 | :ok -> 68 | new_nodelist 69 | 70 | {:error, bad_nodes} -> 71 | # Add back the nodes which should have been removed, but which couldn't be for some reason 72 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 73 | MapSet.put(acc, n) 74 | end) 75 | end 76 | 77 | new_nodelist = 78 | case Strategy.connect_nodes( 79 | topology, 80 | connect, 81 | list_nodes, 82 | MapSet.to_list(new_nodelist) 83 | ) do 84 | :ok -> 85 | new_nodelist 86 | 87 | {:error, bad_nodes} -> 88 | # Remove the nodes which should have been added, but couldn't be for some reason 89 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 90 | MapSet.delete(acc, n) 91 | end) 92 | end 93 | 94 | Process.send_after(self(), :poll, polling_interval(state)) 95 | 96 | %{state | :meta => new_nodelist} 97 | end 98 | 99 | defp polling_interval(%{config: config}) do 100 | Keyword.get(config, :polling_interval, @default_polling_interval) 101 | end 102 | 103 | defp get_nodes(%State{config: config} = state) do 104 | query = Keyword.fetch(config, :query) 105 | node_basename = Keyword.fetch(config, :node_basename) 106 | 107 | resolver = 108 | Keyword.get(config, :resolver, fn query -> 109 | query 110 | |> String.to_charlist() 111 | |> lookup_all_ips 112 | end) 113 | 114 | resolve(query, node_basename, resolver, state) 115 | end 116 | 117 | # query for all ips responding to a given dns query 118 | # format ips as node names 119 | # filter out me 120 | defp resolve({:ok, query}, {:ok, node_basename}, resolver, %State{topology: topology}) 121 | when is_binary(query) and is_binary(node_basename) and query != "" and node_basename != "" do 122 | debug(topology, "polling dns for '#{query}'") 123 | me = node() 124 | 125 | query 126 | |> resolver.() 127 | |> Enum.map(&format_node(&1, node_basename)) 128 | |> Enum.reject(fn n -> n == me end) 129 | end 130 | 131 | defp resolve({:ok, invalid_query}, {:ok, invalid_basename}, _resolver, %State{ 132 | topology: topology 133 | }) do 134 | warn( 135 | topology, 136 | "dns polling strategy is selected, but query or basename param is invalid: #{inspect(%{query: invalid_query, node_basename: invalid_basename})}" 137 | ) 138 | 139 | [] 140 | end 141 | 142 | defp resolve(:error, :error, _resolver, %State{topology: topology}) do 143 | warn( 144 | topology, 145 | "dns polling strategy is selected, but query and basename params missed" 146 | ) 147 | 148 | [] 149 | end 150 | 151 | def lookup_all_ips(q) do 152 | Enum.flat_map([:a, :aaaa], fn t -> :inet_res.lookup(q, :in, t) end) 153 | end 154 | 155 | # turn an ip into a node name atom, assuming that all other node names looks similar to our own name 156 | defp format_node(ip, base_name), do: :"#{base_name}@#{:inet_parse.ntoa(ip)}" 157 | end 158 | -------------------------------------------------------------------------------- /lib/strategy/epmd.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Epmd do 2 | @moduledoc """ 3 | This clustering strategy relies on Erlang's built-in distribution protocol. 4 | 5 | You can have libcluster automatically connect nodes on startup for you by configuring 6 | the strategy like below: 7 | 8 | config :libcluster, 9 | topologies: [ 10 | epmd_example: [ 11 | strategy: #{__MODULE__}, 12 | config: [ 13 | timeout: 30_000, 14 | hosts: [:"a@127.0.0.1", :"b@127.0.0.1"]]]] 15 | 16 | An optional timeout can be specified in the config. This is the timeout that 17 | will be used in the GenServer to connect the nodes. This defaults to 18 | `:infinity` meaning that the connection process will only happen when the 19 | worker is started. Any integer timeout will result in the connection process 20 | being triggered. In the example above, it has been configured for 30 seconds. 21 | """ 22 | use GenServer 23 | use Cluster.Strategy 24 | 25 | alias Cluster.Strategy.State 26 | 27 | @impl true 28 | def start_link([%State{config: config} = state]) do 29 | case Keyword.get(config, :hosts, []) do 30 | [] -> 31 | :ignore 32 | 33 | nodes when is_list(nodes) -> 34 | GenServer.start_link(__MODULE__, [state]) 35 | end 36 | end 37 | 38 | @impl true 39 | def init([state]) do 40 | connect_hosts(state) 41 | {:ok, state, configured_timeout(state)} 42 | end 43 | 44 | @impl true 45 | def handle_info(:timeout, state) do 46 | handle_info(:connect, state) 47 | end 48 | 49 | def handle_info(:connect, state) do 50 | connect_hosts(state) 51 | {:noreply, state, configured_timeout(state)} 52 | end 53 | 54 | @spec configured_timeout(State.t()) :: integer() | :infinity 55 | defp configured_timeout(%State{config: config}) do 56 | Keyword.get(config, :timeout, :infinity) 57 | end 58 | 59 | @spec connect_hosts(State.t()) :: State.t() 60 | defp connect_hosts(%State{config: config} = state) do 61 | nodes = Keyword.get(config, :hosts, []) 62 | Cluster.Strategy.connect_nodes(state.topology, state.connect, state.list_nodes, nodes) 63 | state 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /lib/strategy/erlang_hosts.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.ErlangHosts do 2 | @moduledoc """ 3 | This clustering strategy relies on Erlang's built-in distribution protocol by 4 | using a `.hosts.erlang` file (as used by the `:net_adm` module). 5 | 6 | Please see [the net_adm docs](http://erlang.org/doc/man/net_adm.html) for more details. 7 | 8 | In short, the following is the gist of how it works: 9 | 10 | > File `.hosts.erlang` consists of a number of host names written as Erlang terms. It is looked for in the current work 11 | > directory, the user's home directory, and $OTP_ROOT (the root directory of Erlang/OTP), in that order. 12 | 13 | This looks a bit like the following in practice: 14 | 15 | ```erlang 16 | 'super.eua.ericsson.se'. 17 | 'renat.eua.ericsson.se'. 18 | 'grouse.eua.ericsson.se'. 19 | 'gauffin1.eua.ericsson.se'. 20 | 21 | ``` 22 | 23 | You can have `libcluster` automatically connect nodes on startup for you by configuring 24 | the strategy like below: 25 | 26 | config :libcluster, 27 | topologies: [ 28 | erlang_hosts_example: [ 29 | strategy: #{__MODULE__}, 30 | config: [timeout: 30_000] 31 | ] 32 | ] 33 | 34 | An optional timeout can be specified in the config. This is the timeout that 35 | will be used in the GenServer to connect the nodes. This defaults to 36 | `:infinity` meaning that the connection process will only happen when the 37 | worker is started. Any integer timeout will result in the connection process 38 | being triggered. In the example above, it has been configured for 30 seconds. 39 | """ 40 | use GenServer 41 | use Cluster.Strategy 42 | 43 | alias Cluster.Strategy.State 44 | 45 | def start_link([%State{topology: topology} = state]) do 46 | case :net_adm.host_file() do 47 | {:error, _} -> 48 | Cluster.Logger.warn(topology, "couldn't find .hosts.erlang file - not joining cluster") 49 | :ignore 50 | 51 | file -> 52 | new_state = %State{state | :meta => file} 53 | GenServer.start_link(__MODULE__, [new_state]) 54 | end 55 | end 56 | 57 | @impl true 58 | def init([state]) do 59 | new_state = connect_hosts(state) 60 | {:ok, new_state, configured_timeout(new_state)} 61 | end 62 | 63 | @impl true 64 | def handle_info(:timeout, state) do 65 | handle_info(:connect, state) 66 | end 67 | 68 | def handle_info(:connect, state) do 69 | new_state = connect_hosts(state) 70 | {:noreply, new_state, configured_timeout(new_state)} 71 | end 72 | 73 | defp configured_timeout(%State{config: config}) do 74 | Keyword.get(config, :timeout, :infinity) 75 | end 76 | 77 | defp connect_hosts(%State{meta: hosts_file} = state) do 78 | nodes = 79 | hosts_file 80 | |> Enum.map(&{:net_adm.names(&1), &1}) 81 | |> gather_node_names([]) 82 | |> List.delete(node()) 83 | 84 | Cluster.Strategy.connect_nodes(state.topology, state.connect, state.list_nodes, nodes) 85 | state 86 | end 87 | 88 | defp gather_node_names([], acc), do: acc 89 | 90 | defp gather_node_names([{{:ok, names}, host} | rest], acc) do 91 | names = Enum.map(names, fn {name, _} -> String.to_atom("#{name}@#{host}") end) 92 | gather_node_names(rest, names ++ acc) 93 | end 94 | 95 | defp gather_node_names([_ | rest], acc), do: gather_node_names(rest, acc) 96 | end 97 | -------------------------------------------------------------------------------- /lib/strategy/gossip.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Gossip do 2 | @moduledoc """ 3 | This clustering strategy uses multicast UDP to gossip node names 4 | to other nodes on the network. These packets are listened for on 5 | each node as well, and a connection will be established between the 6 | two nodes if they are reachable on the network, and share the same 7 | magic cookie. In this way, a cluster of nodes may be formed dynamically. 8 | 9 | The gossip protocol is extremely simple, with a prelude followed by the node 10 | name which sent the packet. The node name is parsed from the packet, and a 11 | connection attempt is made. It will fail if the two nodes do not share a cookie. 12 | 13 | By default, the gossip occurs on port 45892, using the multicast address 233.252.1.32 14 | 15 | The gossip protocol is not encrypted by default, but can be by providing a secret 16 | in the configuration of the strategy (as shown below). 17 | This can also be used to run multiple clusters with the same multicast configuration, 18 | as nodes not sharing the same encryption key will not be connected. 19 | 20 | You may configure the multicast interface, multicast address, the interface address to bind to, the port, 21 | the TTL of the packets and the optional secret using the following settings: 22 | 23 | config :libcluster, 24 | topologies: [ 25 | gossip_example: [ 26 | strategy: #{__MODULE__}, 27 | config: [ 28 | port: 45892, 29 | if_addr: "0.0.0.0", 30 | multicast_if: "192.168.1.1", 31 | multicast_addr: "233.252.1.32", 32 | multicast_ttl: 1, 33 | secret: "somepassword"]]] 34 | 35 | A TTL of 1 will limit packets to the local network, and is the default TTL. 36 | 37 | Optionally, `broadcast_only: true` option can be set which disables multicast and 38 | only uses broadcasting. This limits connectivity to local network but works on in 39 | scenarios where multicast is not enabled. Use `multicast_addr` as the broadcast address. 40 | 41 | Example for broadcast only: 42 | 43 | config :libcluster, 44 | topologies: [ 45 | gossip_example: [ 46 | strategy: #{__MODULE__}, 47 | config: [ 48 | port: 45892, 49 | if_addr: "0.0.0.0", 50 | multicast_addr: "255.255.255.255", 51 | broadcast_only: true]]] 52 | 53 | Debug logging is deactivated by default for this clustering strategy, but it can be easily activated by configuring the application: 54 | 55 | use Mix.Config 56 | 57 | config :libcluster, 58 | debug: true 59 | 60 | All the checks are done at runtime, so you can flip the debug level without being forced to shutdown your node. 61 | """ 62 | use GenServer 63 | use Cluster.Strategy 64 | import Cluster.Logger 65 | 66 | alias Cluster.Strategy.State 67 | 68 | @default_port 45892 69 | @default_addr {0, 0, 0, 0} 70 | @default_multicast_addr {233, 252, 1, 32} 71 | @sol_socket 0xFFFF 72 | @so_reuseport 0x0200 73 | 74 | def start_link(args) do 75 | GenServer.start_link(__MODULE__, args) 76 | end 77 | 78 | @impl true 79 | def init([%State{config: config} = state]) do 80 | port = Keyword.get(config, :port, @default_port) 81 | 82 | ip = 83 | config 84 | |> Keyword.get(:if_addr, @default_addr) 85 | |> sanitize_ip() 86 | 87 | broadcast_only? = Keyword.get(config, :broadcast_only, false) 88 | ttl = Keyword.get(config, :multicast_ttl, 1) 89 | 90 | multicast_if = Keyword.get(config, :multicast_if) 91 | 92 | multicast_addr = 93 | config 94 | |> Keyword.get(:multicast_addr, @default_multicast_addr) 95 | |> sanitize_ip() 96 | 97 | multicast_opts = 98 | cond do 99 | broadcast_only? -> 100 | [] 101 | 102 | multicast_if != nil -> 103 | [ 104 | multicast_if: sanitize_ip(multicast_if), 105 | multicast_ttl: ttl, 106 | multicast_loop: true, 107 | add_membership: {multicast_addr, sanitize_ip(multicast_if)} 108 | ] 109 | 110 | :else -> 111 | [ 112 | multicast_ttl: ttl, 113 | multicast_loop: true, 114 | add_membership: {multicast_addr, {0, 0, 0, 0}} 115 | ] 116 | end 117 | 118 | options = 119 | [ 120 | :binary, 121 | active: true, 122 | ip: ip, 123 | reuseaddr: true, 124 | broadcast: true 125 | ] ++ multicast_opts ++ reuse_port() 126 | 127 | {:ok, socket} = :gen_udp.open(port, options) 128 | 129 | secret = Keyword.get(config, :secret, nil) 130 | state = %State{state | :meta => {multicast_addr, port, socket, secret}} 131 | 132 | # TODO: Remove this version check when we deprecate OTP < 21 support 133 | if :erlang.system_info(:otp_release) >= ~c"21" do 134 | {:ok, state, {:continue, nil}} 135 | else 136 | {:ok, state, 0} 137 | end 138 | end 139 | 140 | defp reuse_port() do 141 | case :os.type() do 142 | {:unix, os_name} -> 143 | cond do 144 | os_name in [:darwin, :freebsd, :openbsd, :linux, :netbsd] -> 145 | [{:raw, @sol_socket, @so_reuseport, <<1::native-32>>}] 146 | 147 | true -> 148 | [] 149 | end 150 | 151 | _ -> 152 | [] 153 | end 154 | end 155 | 156 | defp sanitize_ip(input) do 157 | case input do 158 | {_a, _b, _c, _d} = ip -> 159 | ip 160 | 161 | ip when is_binary(ip) -> 162 | {:ok, addr} = :inet.parse_ipv4_address(~c"#{ip}") 163 | addr 164 | end 165 | end 166 | 167 | # Send stuttered heartbeats 168 | # TODO: Remove this version check when we deprecate OTP < 21 support 169 | if :erlang.system_info(:otp_release) >= ~c"21" do 170 | @impl true 171 | def handle_continue(_, state), do: handle_info(:heartbeat, state) 172 | else 173 | @impl true 174 | def handle_info(:timeout, state), do: handle_info(:heartbeat, state) 175 | end 176 | 177 | @impl true 178 | def handle_info(:heartbeat, %State{meta: {multicast_addr, port, socket, _}} = state) do 179 | debug(state.topology, "heartbeat") 180 | :gen_udp.send(socket, multicast_addr, port, heartbeat(node(), state)) 181 | Process.send_after(self(), :heartbeat, :rand.uniform(5_000)) 182 | {:noreply, state} 183 | end 184 | 185 | # Handle received heartbeats 186 | def handle_info( 187 | {:udp, _socket, _ip, _port, <<"heartbeat::", _::binary>> = packet}, 188 | %State{meta: {_, _, _, secret}} = state 189 | ) 190 | when is_nil(secret) do 191 | handle_heartbeat(state, packet) 192 | {:noreply, state} 193 | end 194 | 195 | def handle_info( 196 | {:udp, _socket, _ip, _port, <> <> ciphertext}, 197 | %State{meta: {_, _, _, secret}} = state 198 | ) 199 | when is_binary(secret) do 200 | case decrypt(state, ciphertext, secret, iv) do 201 | {:ok, plaintext} -> 202 | handle_heartbeat(state, plaintext) 203 | {:noreply, state} 204 | 205 | _ -> 206 | {:noreply, state} 207 | end 208 | end 209 | 210 | def handle_info({:udp, _socket, _ip, _port, _}, state) do 211 | {:noreply, state} 212 | end 213 | 214 | @impl true 215 | def terminate(_reason, %State{meta: {_, _, socket, _}}) do 216 | :gen_udp.close(socket) 217 | :ok 218 | end 219 | 220 | # Construct iodata representing packet to send 221 | defp heartbeat(node_name, %State{meta: {_, _, _, secret}}) 222 | when is_nil(secret) do 223 | ["heartbeat::", :erlang.term_to_binary(%{node: node_name})] 224 | end 225 | 226 | defp heartbeat(node_name, %State{meta: {_, _, _, secret}} = state) when is_binary(secret) do 227 | message = "heartbeat::" <> :erlang.term_to_binary(%{node: node_name}) 228 | {:ok, iv, msg} = encrypt(state, message, secret) 229 | 230 | [iv, msg] 231 | end 232 | 233 | # Upon receipt of a heartbeat, we check to see if the node 234 | # is connected to us, and if not, we connect to it. 235 | # If the connection fails, it's likely because the cookie 236 | # is different, and thus a node we can ignore 237 | @spec handle_heartbeat(State.t(), binary) :: :ok 238 | defp handle_heartbeat(%State{} = state, <<"heartbeat::", rest::binary>>) do 239 | self = node() 240 | connect = state.connect 241 | list_nodes = state.list_nodes 242 | topology = state.topology 243 | 244 | case :erlang.binary_to_term(rest) do 245 | %{node: ^self} -> 246 | :ok 247 | 248 | %{node: n} when is_atom(n) -> 249 | debug(state.topology, "received heartbeat from #{n}") 250 | Cluster.Strategy.connect_nodes(topology, connect, list_nodes, [n]) 251 | :ok 252 | 253 | _ -> 254 | :ok 255 | end 256 | end 257 | 258 | defp handle_heartbeat(_state, _packet) do 259 | :ok 260 | end 261 | 262 | defp encrypt(_state, plaintext, password) do 263 | iv = :crypto.strong_rand_bytes(16) 264 | key = :crypto.hash(:sha256, password) 265 | ciphertext = :crypto.crypto_one_time(:aes_256_cbc, key, iv, pkcs7_pad(plaintext), true) 266 | 267 | {:ok, iv, ciphertext} 268 | end 269 | 270 | defp decrypt(state, ciphertext, password, iv) do 271 | key = :crypto.hash(:sha256, password) 272 | 273 | with {:unpadding, {:ok, padded}} <- {:unpadding, safe_decrypt(state, key, iv, ciphertext)}, 274 | {:decrypt, {:ok, _plaintext} = res} <- {:decrypt, pkcs7_unpad(padded)} do 275 | res 276 | else 277 | {:unpadding, :error} -> {:error, :decrypt} 278 | {:decrypt, :error} -> {:error, :unpadding} 279 | end 280 | end 281 | 282 | defp safe_decrypt(state, key, iv, ciphertext) do 283 | try do 284 | {:ok, :crypto.crypto_one_time(:aes_256_cbc, key, iv, ciphertext, false)} 285 | catch 286 | :error, {tag, {file, line}, desc} -> 287 | warn(state.topology, "decryption failed: #{inspect(tag)} (#{file}:#{line}): #{desc}") 288 | :error 289 | end 290 | end 291 | 292 | # 293 | # Pads a message using the PKCS #7 cryptographic message syntax. 294 | # 295 | # from: https://github.com/izelnakri/aes256/blob/master/lib/aes256.ex 296 | # 297 | # See: https://tools.ietf.org/html/rfc2315 298 | # See: `pkcs7_unpad/1` 299 | defp pkcs7_pad(message) do 300 | bytes_remaining = rem(byte_size(message), 16) 301 | padding_size = 16 - bytes_remaining 302 | message <> :binary.copy(<>, padding_size) 303 | end 304 | 305 | # 306 | # Unpads a message using the PKCS #7 cryptographic message syntax. 307 | # 308 | # from: https://github.com/izelnakri/aes256/blob/master/lib/aes256.ex 309 | # 310 | # See: https://tools.ietf.org/html/rfc2315 311 | # See: `pkcs7_pad/1` 312 | defp pkcs7_unpad(<<>>), do: :error 313 | 314 | defp pkcs7_unpad(message) do 315 | padding_size = :binary.last(message) 316 | 317 | if padding_size <= 16 do 318 | message_size = byte_size(message) 319 | 320 | if binary_part(message, message_size, -padding_size) === 321 | :binary.copy(<>, padding_size) do 322 | {:ok, binary_part(message, 0, message_size - padding_size)} 323 | else 324 | :error 325 | end 326 | else 327 | :error 328 | end 329 | end 330 | end 331 | -------------------------------------------------------------------------------- /lib/strategy/kubernetes.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Kubernetes do 2 | @default_polling_interval 5_000 3 | @kubernetes_master "kubernetes.default.svc" 4 | @service_account_path "/var/run/secrets/kubernetes.io/serviceaccount" 5 | 6 | @moduledoc """ 7 | This clustering strategy works by fetching information of endpoints or pods, which are filtered by 8 | given Kubernetes namespace and label. 9 | 10 | > This strategy requires a service account with the ability to list endpoints or pods. If you want 11 | > to avoid that, you could use one of the DNS-based strategies instead. 12 | > 13 | > See `Cluster.Strategy.Kubernetes.DNS` and `Cluster.Strategy.Kubernetes.DNSSRV`. 14 | 15 | It assumes that all Erlang nodes are using longnames - `@`: 16 | 17 | + all nodes are using the same `` 18 | + all nodes are using unique `` 19 | 20 | In `@`: 21 | 22 | + `` would be the value configured by `:kubernetes_node_basename` option. 23 | + `` would be the value which is controlled by following options: 24 | - `:kubernetes_namespace` 25 | - `:kubernetes_selector` 26 | - `:kubernetes_service_name` 27 | - `:kubernetes_ip_lookup_mode` 28 | - `:kubernetes_use_cached_resources` 29 | - `:mode` 30 | 31 | ## Getting `` 32 | 33 | As said above, the basename is configured by `:kubernetes_node_basename` option. 34 | 35 | Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP 36 | release matches the name configured by `:kubernetes_node_basename`. 37 | 38 | ## Getting `` 39 | 40 | ### `:kubernetes_namespace` and `:kubernetes_selector` option 41 | 42 | These two options configure how to filter required endpoints or pods. 43 | 44 | ### `:kubernetes_ip_lookup_mode` option 45 | 46 | These option configures where to lookup the required IP. 47 | 48 | Available values: 49 | 50 | + `:endpoints` (default) 51 | + `:pods` 52 | 53 | #### :endpoints 54 | 55 | When setting this value, this strategy will lookup IP from endpoints. 56 | 57 | In order for your endpoints to be found they should be returned when you run: 58 | 59 | kubectl get endpoints -l app=myapp 60 | 61 | Then, this strategy will fetch the addresses of all endpoints with that label and attempt to 62 | connect. 63 | 64 | #### :pods 65 | 66 | When setting this value, this strategy will lookup IP from pods directly. 67 | 68 | In order for your pods to be found they should be returned when you run: 69 | 70 | kubectl get pods -l app=myapp 71 | 72 | Then, this strategy will fetch the IP of all pods with that label and attempt to connect. 73 | 74 | ### `kubernetes_use_cached_resources` option 75 | 76 | When setting this value, this strategy will use cached resource version value to fetch k8s resources. 77 | In k8s resources are incremented by 1 on every change, this version will set requested resourceVersion 78 | to 0, that will use cached versions of resources, take in mind that this may be outdated or unavailable. 79 | 80 | ### `:mode` option 81 | 82 | These option configures how to build the longname. 83 | 84 | Available values: 85 | 86 | + `:ip` (default) 87 | + `:dns` 88 | + `:hostname` 89 | 90 | #### :ip 91 | 92 | In this mode, the IP address is used directly. The longname will be something like: 93 | 94 | myapp@ 95 | 96 | Getting this mode to work requires: 97 | 98 | 1. exposing pod IP from Kubernetes to the Erlang node. 99 | 2. setting the name of Erlang node according to the exposed information 100 | 101 | First, expose required information from Kubernetes as environment variables of Erlang node: 102 | 103 | # deployment.yaml 104 | env: 105 | - name: POD_IP 106 | valueFrom: 107 | fieldRef: 108 | fieldPath: status.podIP 109 | 110 | Then, set the name of Erlang node by using the exposed environment variables. If you use mix releases, you 111 | can configure the required options in `rel/env.sh.eex`: 112 | 113 | # rel/env.sh.eex 114 | export RELEASE_DISTRIBUTION=name 115 | export RELEASE_NODE=<%= @release.name %>@${POD_IP} 116 | 117 | > `export RELEASE_DISTRIBUTION=name` will append a `-name` option to the `start` command directly 118 | > and requires no further changes to the `vm.args`. 119 | 120 | #### :hostname 121 | 122 | In this mode, the hostname is used directly. The longname will be something like: 123 | 124 | myapp@...svc. 125 | 126 | Getting `:hostname` mode to work requires: 127 | 128 | 1. deploying pods as a StatefulSet (otherwise, hostname is not set for pods) 129 | 2. setting `:kubernetes_service_name` to the name of the Kubernetes service that is being lookup 130 | 3. setting the name of Erlang node according to hostname of pods 131 | 132 | Then, set the name of Erlang node by using the hostname of pod. If you use mix releases, you can 133 | configure the required options in `rel/env.sh.eex`: 134 | 135 | # rel/env.sh.eex 136 | export RELEASE_DISTRIBUTION=name 137 | export RELEASE_NODE=<%= @release.name %>@$(hostname -f) 138 | 139 | > `hostname -f` returns the whole FQDN, which is something like: 140 | > `$(hostname).${SERVICE_NAME}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}"`. 141 | 142 | #### :dns 143 | 144 | In this mode, an IP-based pod A record is used. The longname will be something like: 145 | 146 | myapp@..pod. 147 | 148 | Getting `:dns` mode to work requires: 149 | 150 | 1. exposing pod IP from Kubernetes to the Erlang node 151 | 2. setting the name of Erlang node according to the exposed information 152 | 153 | First, expose required information from Kubernetes as environment variables of Erlang node: 154 | 155 | # deployment.yaml 156 | env: 157 | - name: NAMESPACE 158 | valueFrom: 159 | fieldRef: 160 | fieldPath: metadata.namespace 161 | - name: POD_IP 162 | valueFrom: 163 | fieldRef: 164 | fieldPath: status.podIP 165 | 166 | Then, set the name of Erlang node by using the exposed environment variables. If you use mix 167 | releases, you can configure the required options in `rel/env.sh.eex`: 168 | 169 | # rel/env.sh.eex 170 | export POD_A_RECORD=$(echo $POD_IP | sed 's/\./-/g') 171 | export CLUSTER_DOMAIN=cluster.local # modify this value according to your actual situation 172 | export RELEASE_DISTRIBUTION=name 173 | export RELEASE_NODE=<%= @release.name %>@${POD_A_RECORD}.${NAMESPACE}.pod.${CLUSTER_DOMAIN} 174 | 175 | ### Which mode is the best one? 176 | 177 | There is no best, only the best for you: 178 | 179 | + If you're not using a StatefulSet, use `:ip` or `:dns`. 180 | + If you're using a StatefulSet, use `:hostname`. 181 | 182 | And, there is one thing that can be taken into consideration. When using `:ip` or `:dns`, you 183 | can establish a remote shell (as well as run observer) by using `kubectl port-forward` in combination 184 | with some entries in `/etc/hosts`. 185 | 186 | ## Polling Interval 187 | 188 | The default interval to sync topologies is `#{@default_polling_interval}` 189 | (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. 190 | 191 | ## Getting cluster information 192 | 193 | > In general, you don't need to read this, the default values will work. 194 | 195 | This strategy fetchs information of endpoints or pods by accessing the REST API provided by 196 | Kubernetes. 197 | 198 | The base URL of the REST API has two parts: 199 | 200 | . 201 | 202 | `` is configured by following options: 203 | 204 | + `:kubernetes_master` - the default value is `#{@kubernetes_master}` 205 | 206 | `` is configured by following options and environment variables: 207 | 208 | + `:kubernetes_cluster_name` - the default value is `cluster`, and the final cluster domain will be `.local` 209 | + `CLUSTER_DOMAIN` - when this environment variable is provided, `:kubernetes_cluster_name` will be ignored 210 | 211 | > `` and `` also affect each other, checkout the source code for more 212 | > details. 213 | 214 | Besides the base URL of the REST API, a service account must be provided. The service account is 215 | configured by following options: 216 | 217 | + `:kubernetes_service_account_path` - the default value is `#{@service_account_path}` 218 | 219 | ## An example configuration 220 | 221 | config :libcluster, 222 | topologies: [ 223 | erlang_nodes_in_k8s: [ 224 | strategy: #{__MODULE__}, 225 | config: [ 226 | mode: :ip, 227 | kubernetes_node_basename: "myapp", 228 | kubernetes_selector: "app=myapp", 229 | kubernetes_namespace: "my_namespace", 230 | polling_interval: 10_000 231 | ] 232 | ] 233 | ] 234 | 235 | """ 236 | use GenServer 237 | use Cluster.Strategy 238 | import Cluster.Logger 239 | 240 | alias Cluster.Strategy.State 241 | 242 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 243 | 244 | @impl true 245 | def init([%State{meta: nil} = state]) do 246 | init([%State{state | :meta => MapSet.new()}]) 247 | end 248 | 249 | def init([%State{} = state]) do 250 | {:ok, load(state)} 251 | end 252 | 253 | @impl true 254 | def handle_info(:timeout, state) do 255 | handle_info(:load, state) 256 | end 257 | 258 | def handle_info(:load, %State{} = state) do 259 | {:noreply, load(state)} 260 | end 261 | 262 | def handle_info(_, state) do 263 | {:noreply, state} 264 | end 265 | 266 | defp load(%State{topology: topology} = state) do 267 | new_nodelist = MapSet.new(get_nodes(state)) 268 | removed = MapSet.difference(state.meta, new_nodelist) 269 | 270 | new_nodelist = 271 | case Cluster.Strategy.disconnect_nodes( 272 | topology, 273 | state.disconnect, 274 | state.list_nodes, 275 | MapSet.to_list(removed) 276 | ) do 277 | :ok -> 278 | new_nodelist 279 | 280 | {:error, bad_nodes} -> 281 | # Add back the nodes which should have been removed, but which couldn't be for some reason 282 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 283 | MapSet.put(acc, n) 284 | end) 285 | end 286 | 287 | new_nodelist = 288 | case Cluster.Strategy.connect_nodes( 289 | topology, 290 | state.connect, 291 | state.list_nodes, 292 | MapSet.to_list(new_nodelist) 293 | ) do 294 | :ok -> 295 | new_nodelist 296 | 297 | {:error, bad_nodes} -> 298 | # Remove the nodes which should have been added, but couldn't be for some reason 299 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 300 | MapSet.delete(acc, n) 301 | end) 302 | end 303 | 304 | Process.send_after(self(), :load, polling_interval(state)) 305 | 306 | %State{state | meta: new_nodelist} 307 | end 308 | 309 | defp polling_interval(%State{config: config}) do 310 | Keyword.get(config, :polling_interval, @default_polling_interval) 311 | end 312 | 313 | @spec get_token(String.t()) :: String.t() 314 | defp get_token(service_account_path) do 315 | path = Path.join(service_account_path, "token") 316 | 317 | case File.exists?(path) do 318 | true -> path |> File.read!() |> String.trim() 319 | false -> "" 320 | end 321 | end 322 | 323 | @spec get_ssl_opts(Path.t()) :: Keyword.t() 324 | defp get_ssl_opts(service_account_path) do 325 | path = Path.join(service_account_path, "ca.crt") 326 | 327 | case File.exists?(path) do 328 | true -> 329 | [ 330 | verify: :verify_peer, 331 | cacertfile: String.to_charlist(path) 332 | ] 333 | 334 | false -> 335 | [verify: :verify_none] 336 | end 337 | end 338 | 339 | @spec get_namespace(String.t(), String.t()) :: String.t() 340 | if Mix.env() == :test do 341 | defp get_namespace(_service_account_path, nil), do: "__libcluster_test" 342 | else 343 | defp get_namespace(service_account_path, nil) do 344 | path = Path.join(service_account_path, "namespace") 345 | 346 | if File.exists?(path) do 347 | path |> File.read!() |> String.trim() 348 | else 349 | "" 350 | end 351 | end 352 | end 353 | 354 | defp get_namespace(_, namespace), do: namespace 355 | 356 | @spec get_nodes(State.t()) :: [atom()] 357 | defp get_nodes(%State{topology: topology, config: config, meta: meta}) do 358 | service_account_path = 359 | Keyword.get(config, :kubernetes_service_account_path, @service_account_path) 360 | 361 | token = get_token(service_account_path) 362 | ssl_opts = get_ssl_opts(service_account_path) 363 | 364 | namespace = get_namespace(service_account_path, Keyword.get(config, :kubernetes_namespace)) 365 | app_name = Keyword.fetch!(config, :kubernetes_node_basename) 366 | cluster_name = Keyword.get(config, :kubernetes_cluster_name, "cluster") 367 | service_name = Keyword.get(config, :kubernetes_service_name) 368 | selector = Keyword.fetch!(config, :kubernetes_selector) 369 | ip_lookup_mode = Keyword.get(config, :kubernetes_ip_lookup_mode, :endpoints) 370 | 371 | use_cache = Keyword.get(config, :kubernetes_use_cached_resources, false) 372 | resource_version = if use_cache, do: 0, else: nil 373 | 374 | master_name = Keyword.get(config, :kubernetes_master, @kubernetes_master) 375 | cluster_domain = System.get_env("CLUSTER_DOMAIN", "#{cluster_name}.local") 376 | 377 | master = 378 | cond do 379 | String.ends_with?(master_name, cluster_domain) -> 380 | master_name 381 | 382 | String.ends_with?(master_name, ".") -> 383 | # The dot at the end is used to determine that the name is "final" 384 | master_name 385 | 386 | :else -> 387 | master_name <> "." <> cluster_domain 388 | end 389 | 390 | cond do 391 | app_name != nil and selector != nil -> 392 | query_params = 393 | [] 394 | |> apply_param(:labelSelector, selector) 395 | |> apply_param(:resourceVersion, resource_version) 396 | |> URI.encode_query(:rfc3986) 397 | 398 | path = 399 | case ip_lookup_mode do 400 | :endpoints -> 401 | "api/v1/namespaces/#{namespace}/endpoints?#{query_params}" 402 | 403 | :pods -> 404 | "api/v1/namespaces/#{namespace}/pods?#{query_params}" 405 | end 406 | 407 | headers = [{~c"authorization", ~c"Bearer #{token}"}] 408 | http_options = [ssl: ssl_opts, timeout: 15000] 409 | 410 | case :httpc.request(:get, {~c"https://#{master}/#{path}", headers}, http_options, []) do 411 | {:ok, {{_version, 200, _status}, _headers, body}} -> 412 | parse_response(ip_lookup_mode, Jason.decode!(body)) 413 | |> Enum.map(fn node_info -> 414 | format_node( 415 | Keyword.get(config, :mode, :ip), 416 | node_info, 417 | app_name, 418 | cluster_name, 419 | service_name 420 | ) 421 | end) 422 | 423 | {:ok, {{_version, 403, _status}, _headers, body}} -> 424 | %{"message" => msg} = Jason.decode!(body) 425 | warn(topology, "cannot query kubernetes (unauthorized): #{msg}") 426 | [] 427 | 428 | {:ok, {{_version, code, status}, _headers, body}} -> 429 | warn(topology, "cannot query kubernetes (#{code} #{status}): #{inspect(body)}") 430 | meta 431 | 432 | {:error, reason} -> 433 | error(topology, "request to kubernetes failed!: #{inspect(reason)}") 434 | meta 435 | end 436 | 437 | app_name == nil -> 438 | warn( 439 | topology, 440 | "kubernetes strategy is selected, but :kubernetes_node_basename is not configured!" 441 | ) 442 | 443 | [] 444 | 445 | selector == nil -> 446 | warn( 447 | topology, 448 | "kubernetes strategy is selected, but :kubernetes_selector is not configured!" 449 | ) 450 | 451 | [] 452 | 453 | :else -> 454 | warn(topology, "kubernetes strategy is selected, but is not configured!") 455 | [] 456 | end 457 | end 458 | 459 | defp apply_param(params, key, value) when value != nil do 460 | [{key, value} | params] 461 | end 462 | 463 | defp apply_param(params, _key, _value), do: params 464 | 465 | defp parse_response(:endpoints, resp) do 466 | case resp do 467 | %{"items" => items} when is_list(items) -> 468 | Enum.reduce(items, [], fn 469 | %{"subsets" => subsets}, acc when is_list(subsets) -> 470 | addrs = 471 | Enum.flat_map(subsets, fn 472 | %{"addresses" => addresses} when is_list(addresses) -> 473 | Enum.map(addresses, fn %{"ip" => ip, "targetRef" => %{"namespace" => namespace}} = 474 | address -> 475 | %{ip: ip, namespace: namespace, hostname: address["hostname"]} 476 | end) 477 | 478 | _ -> 479 | [] 480 | end) 481 | 482 | acc ++ addrs 483 | 484 | _, acc -> 485 | acc 486 | end) 487 | 488 | _ -> 489 | [] 490 | end 491 | end 492 | 493 | defp parse_response(:pods, resp) do 494 | case resp do 495 | %{"items" => items} when is_list(items) -> 496 | Enum.map(items, fn 497 | %{ 498 | "status" => %{"podIP" => ip}, 499 | "metadata" => %{"namespace" => ns}, 500 | "spec" => pod_spec 501 | } -> 502 | %{ip: ip, namespace: ns, hostname: pod_spec["hostname"]} 503 | 504 | _ -> 505 | nil 506 | end) 507 | |> Enum.filter(&(&1 != nil)) 508 | 509 | _ -> 510 | [] 511 | end 512 | end 513 | 514 | defp format_node(:ip, %{ip: ip}, app_name, _cluster_name, _service_name), 515 | do: :"#{app_name}@#{ip}" 516 | 517 | defp format_node( 518 | :hostname, 519 | %{hostname: hostname, namespace: namespace}, 520 | app_name, 521 | cluster_name, 522 | service_name 523 | ) do 524 | :"#{app_name}@#{hostname}.#{service_name}.#{namespace}.svc.#{cluster_name}.local" 525 | end 526 | 527 | defp format_node(:dns, %{ip: ip, namespace: namespace}, app_name, cluster_name, _service_name) do 528 | ip = String.replace(ip, ".", "-") 529 | :"#{app_name}@#{ip}.#{namespace}.pod.#{cluster_name}.local" 530 | end 531 | end 532 | -------------------------------------------------------------------------------- /lib/strategy/kubernetes_dns.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Kubernetes.DNS do 2 | @default_polling_interval 5_000 3 | 4 | @moduledoc """ 5 | This clustering strategy works by fetching IP addresses with the help of a headless service in 6 | current Kubernetes namespace. 7 | 8 | > This strategy requires exposing pods by a headless service. 9 | > If you want to avoid that, you could use `Cluster.Strategy.Kubernetes`. 10 | 11 | It assumes that all Erlang nodes are using longnames - `@`: 12 | 13 | + all nodes are using the same `` 14 | + all nodes are using unique `` 15 | 16 | In `@`: 17 | 18 | + `` would be the value configured by `:application_name` option. 19 | + `` would be the value which is controlled by following options: 20 | - `:service` 21 | - `:resolver` 22 | 23 | ## Getting `` 24 | 25 | As said above, the basename is configured by `:application_name` option. 26 | 27 | Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP 28 | release matches the name configured by `:application_name`. 29 | 30 | ## Getting `` 31 | 32 | It will fetch IP addresses of all pods under a headless service and attempt to connect. 33 | 34 | ## Setup 35 | 36 | Getting this strategy to work requires: 37 | 38 | 1. exposing pod IP from Kubernetes to the Erlang node. 39 | 2. setting a headless service for the pods 40 | 3. setting the name of Erlang node according to the exposed information 41 | 42 | First, expose required information from Kubernetes as environment variables of Erlang node: 43 | 44 | # deployment.yaml 45 | env: 46 | - name: POD_IP 47 | valueFrom: 48 | fieldRef: 49 | fieldPath: status.podIP 50 | 51 | Second, set a headless service for the pods: 52 | 53 | # deployment.yaml 54 | apiVersion: v1 55 | kind: Service 56 | metadata: 57 | name: myapp-headless 58 | spec: 59 | selector: 60 | app: myapp 61 | type: ClusterIP 62 | clusterIP: None 63 | 64 | Then, set the name of Erlang node by using the exposed environment variables. If you use mix releases, you 65 | can configure the required options in `rel/env.sh.eex`: 66 | 67 | # rel/env.sh.eex 68 | export RELEASE_DISTRIBUTION=name 69 | export RELEASE_NODE=<%= @release.name %>@${POD_IP} 70 | 71 | ## Polling Interval 72 | 73 | The default interval to sync topologies is `#{@default_polling_interval}` 74 | (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. 75 | 76 | ## An example configuration 77 | 78 | config :libcluster, 79 | topologies: [ 80 | erlang_nodes_in_k8s: [ 81 | strategy: #{__MODULE__}, 82 | config: [ 83 | service: "myapp-headless", 84 | application_name: "myapp", 85 | polling_interval: 10_000 86 | ] 87 | ] 88 | ] 89 | 90 | """ 91 | use GenServer 92 | use Cluster.Strategy 93 | import Cluster.Logger 94 | 95 | alias Cluster.Strategy.State 96 | 97 | @impl true 98 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 99 | 100 | @impl true 101 | def init([%State{meta: nil} = state]) do 102 | init([%State{state | :meta => MapSet.new()}]) 103 | end 104 | 105 | def init([%State{} = state]) do 106 | {:ok, load(state), 0} 107 | end 108 | 109 | @impl true 110 | def handle_info(:timeout, state) do 111 | handle_info(:load, state) 112 | end 113 | 114 | def handle_info(:load, state) do 115 | {:noreply, load(state)} 116 | end 117 | 118 | def handle_info(_, state) do 119 | {:noreply, state} 120 | end 121 | 122 | defp load(%State{topology: topology, meta: meta} = state) do 123 | new_nodelist = MapSet.new(get_nodes(state)) 124 | removed = MapSet.difference(meta, new_nodelist) 125 | 126 | new_nodelist = 127 | case Cluster.Strategy.disconnect_nodes( 128 | topology, 129 | state.disconnect, 130 | state.list_nodes, 131 | MapSet.to_list(removed) 132 | ) do 133 | :ok -> 134 | new_nodelist 135 | 136 | {:error, bad_nodes} -> 137 | # Add back the nodes which should have been removed, but which couldn't be for some reason 138 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 139 | MapSet.put(acc, n) 140 | end) 141 | end 142 | 143 | new_nodelist = 144 | case Cluster.Strategy.connect_nodes( 145 | topology, 146 | state.connect, 147 | state.list_nodes, 148 | MapSet.to_list(new_nodelist) 149 | ) do 150 | :ok -> 151 | new_nodelist 152 | 153 | {:error, bad_nodes} -> 154 | # Remove the nodes which should have been added, but couldn't be for some reason 155 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 156 | MapSet.delete(acc, n) 157 | end) 158 | end 159 | 160 | Process.send_after( 161 | self(), 162 | :load, 163 | polling_interval(state) 164 | ) 165 | 166 | %State{state | meta: new_nodelist} 167 | end 168 | 169 | @spec get_nodes(State.t()) :: [atom()] 170 | defp get_nodes(%State{topology: topology, config: config}) do 171 | app_name = Keyword.fetch!(config, :application_name) 172 | service = Keyword.fetch!(config, :service) 173 | resolver = Keyword.get(config, :resolver, &:inet_res.getbyname(&1, :a)) 174 | 175 | cond do 176 | app_name != nil and service != nil -> 177 | headless_service = to_charlist(service) 178 | 179 | case resolver.(headless_service) do 180 | {:ok, {:hostent, _fqdn, [], :inet, _value, addresses}} -> 181 | parse_response(addresses, app_name) 182 | 183 | {:error, reason} -> 184 | error(topology, "lookup against #{service} failed: #{inspect(reason)}") 185 | [] 186 | end 187 | 188 | app_name == nil -> 189 | warn( 190 | topology, 191 | "kubernetes.DNS strategy is selected, but :application_name is not configured!" 192 | ) 193 | 194 | [] 195 | 196 | service == nil -> 197 | warn(topology, "kubernetes strategy is selected, but :service is not configured!") 198 | [] 199 | 200 | :else -> 201 | warn(topology, "kubernetes strategy is selected, but is not configured!") 202 | [] 203 | end 204 | end 205 | 206 | defp polling_interval(%State{config: config}) do 207 | Keyword.get(config, :polling_interval, @default_polling_interval) 208 | end 209 | 210 | defp parse_response(addresses, app_name) do 211 | addresses 212 | |> Enum.map(&:inet_parse.ntoa(&1)) 213 | |> Enum.map(&"#{app_name}@#{&1}") 214 | |> Enum.map(&String.to_atom(&1)) 215 | end 216 | end 217 | -------------------------------------------------------------------------------- /lib/strategy/kubernetes_dns_srv.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Kubernetes.DNSSRV do 2 | @default_polling_interval 5_000 3 | 4 | @moduledoc """ 5 | This clustering strategy works by issuing a SRV query for the headless service where the StatefulSet 6 | containing your nodes is running. 7 | 8 | > This strategy requires deploying pods as a StatefulSet which is exposed by a headless service. 9 | > If you want to avoid that, you could use `Cluster.Strategy.Kubernetes.DNS`. 10 | 11 | It assumes that all Erlang nodes are using longnames - `@`: 12 | 13 | + all nodes are using the same `` 14 | + all nodes are using unique `` 15 | 16 | In `@`: 17 | 18 | + `` would be the value configured by `:application_name` option. 19 | + `` would be the value which is controlled by following options: 20 | - `:service` 21 | - `:namespace` 22 | - `:resolver` 23 | 24 | ## Getting `` 25 | 26 | As said above, the basename is configured by `:application_name` option. 27 | 28 | Just one thing to keep in mind - when building an OTP release, make sure that the name of the OTP 29 | release matches the name configured by `:application_name`. 30 | 31 | ## Getting `` 32 | 33 | > For more information, see the kubernetes stateful-application [documentation](https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#using-stable-network-identities) 34 | 35 | ## Setup 36 | 37 | Getting this strategy to work requires: 38 | 39 | 1. deploying pods as a StatefulSet (otherwise, hostname won't set for pods) 40 | 2. exposing above StatefulSet by a headless service (otherwise, the SRV query won't work as expected) 41 | 3. setting the name of Erlang node according to hostname of pods 42 | 43 | First, deploying pods as a StatefulSet which is exposed by a headless service. And here is an 44 | example of a corresponding Kubernetes definition: 45 | 46 | ```yaml 47 | apiVersion: v1 48 | kind: Service 49 | metadata: 50 | name: "myapp-headless" 51 | labels: 52 | app: myapp 53 | spec: 54 | ports: 55 | - port: 4000 56 | name: web 57 | clusterIP: None 58 | selector: 59 | app: myapp 60 | --- 61 | apiVersion: apps/v1 62 | kind: StatefulSet 63 | metadata: 64 | name: myapp 65 | spec: 66 | serviceName: "myapp-headless" 67 | replicas: 2 68 | selector: 69 | matchLabels: 70 | app: myapp 71 | template: 72 | metadata: 73 | labels: 74 | app: myapp 75 | spec: 76 | containers: 77 | - name: myapp 78 | image: myapp:v1.0.0 79 | imagePullPolicy: Always 80 | ports: 81 | - containerPort: 4000 82 | name: http 83 | protocol: TCP 84 | ``` 85 | 86 | Then, set the name of Erlang node by using the hostname of pod. If you use mix releases, you 87 | can configure the required options in `rel/env.sh.eex`: 88 | 89 | # rel/env.sh.eex 90 | export RELEASE_DISTRIBUTION=name 91 | export RELEASE_NODE=<%= @release.name %>@$(hostname -f) 92 | 93 | ## Polling Interval 94 | 95 | The default interval to sync topologies is `#{@default_polling_interval}` 96 | (#{div(@default_polling_interval, 1000)} seconds). You can configure it with `:polling_interval` option. 97 | 98 | ## An example configuration 99 | 100 | config :libcluster, 101 | topologies: [ 102 | erlang_nodes_in_k8s: [ 103 | strategy: #{__MODULE__}, 104 | config: [ 105 | service: "myapp-headless", 106 | application_name: "myapp", 107 | namespace: "default", 108 | polling_interval: 10_000 109 | ] 110 | ] 111 | ] 112 | 113 | ## An example of how this strategy extracts topology information from DNS 114 | 115 | ```sh 116 | $ hostname -f 117 | myapp-1.myapp-headless.default.svc.cluster.local 118 | 119 | # An SRV query for a headless service returns multiple entries 120 | $ dig SRV myapp-headless.default.svc.cluster.local 121 | 122 | ; <<>> DiG 9.14.3 <<>> SRV myapp-headless.default.svc.cluster.local 123 | ;; global options: +cmd 124 | ;; Got answer: 125 | ;; WARNING: .local is reserved for Multicast DNS 126 | ;; You are currently testing what happens when an mDNS query is leaked to DNS 127 | ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 7169 128 | ;; flags: qr aa rd ra; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 2 129 | 130 | ;; QUESTION SECTION: 131 | ;myapp-headless.default.svc.cluster.local. IN SRV 132 | 133 | ;; ANSWER SECTION: 134 | myapp-headless.default.svc.cluster.local. 30 IN SRV 10 50 0 myapp-0.myapp-headless.default.svc.cluster.local. 135 | myapp-headless.default.svc.cluster.local. 30 IN SRV 10 50 0 myapp-1.myapp-headless.default.svc.cluster.local. 136 | 137 | ;; ADDITIONAL SECTION: 138 | myapp-0.myapp-headless.default.svc.cluster.local. 30 IN A 10.1.0.95 139 | myapp--1.myapp-headless.default.svc.cluster.local. 30 IN A 10.1.0.96 140 | 141 | ;; Query time: 0 msec 142 | ;; SERVER: 10.96.0.10#53(10.96.0.10) 143 | ;; WHEN: Wed Jul 03 11:55:27 UTC 2019 144 | ;; MSG SIZE rcvd: 167 145 | ``` 146 | 147 | """ 148 | use GenServer 149 | use Cluster.Strategy 150 | import Cluster.Logger 151 | 152 | alias Cluster.Strategy.State 153 | 154 | @impl true 155 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 156 | 157 | @impl true 158 | def init([%State{meta: nil} = state]) do 159 | init([%State{state | :meta => MapSet.new()}]) 160 | end 161 | 162 | def init([%State{} = state]) do 163 | {:ok, load(state), 0} 164 | end 165 | 166 | @impl true 167 | def handle_info(:timeout, state) do 168 | handle_info(:load, state) 169 | end 170 | 171 | def handle_info(:load, state) do 172 | {:noreply, load(state)} 173 | end 174 | 175 | def handle_info(_, state) do 176 | {:noreply, state} 177 | end 178 | 179 | defp load(%State{topology: topology, meta: meta} = state) do 180 | new_nodelist = MapSet.new(get_nodes(state)) 181 | removed = MapSet.difference(meta, new_nodelist) 182 | 183 | new_nodelist = 184 | case Cluster.Strategy.disconnect_nodes( 185 | topology, 186 | state.disconnect, 187 | state.list_nodes, 188 | MapSet.to_list(removed) 189 | ) do 190 | :ok -> 191 | new_nodelist 192 | 193 | {:error, bad_nodes} -> 194 | # Add back the nodes which should have been removed, but which couldn't be for some reason 195 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 196 | MapSet.put(acc, n) 197 | end) 198 | end 199 | 200 | new_nodelist = 201 | case Cluster.Strategy.connect_nodes( 202 | topology, 203 | state.connect, 204 | state.list_nodes, 205 | MapSet.to_list(new_nodelist) 206 | ) do 207 | :ok -> 208 | new_nodelist 209 | 210 | {:error, bad_nodes} -> 211 | # Remove the nodes which should have been added, but couldn't be for some reason 212 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 213 | MapSet.delete(acc, n) 214 | end) 215 | end 216 | 217 | Process.send_after( 218 | self(), 219 | :load, 220 | polling_interval(state) 221 | ) 222 | 223 | %State{state | :meta => new_nodelist} 224 | end 225 | 226 | @spec get_nodes(State.t()) :: [atom()] 227 | defp get_nodes(%State{topology: topology, config: config}) do 228 | app_name = Keyword.fetch!(config, :application_name) 229 | service = Keyword.fetch!(config, :service) 230 | namespace = Keyword.fetch!(config, :namespace) 231 | 232 | service_k8s_path = 233 | "#{service}.#{namespace}.svc.#{System.get_env("CLUSTER_DOMAIN", "cluster.local.")}" 234 | 235 | resolver = Keyword.get(config, :resolver, &:inet_res.getbyname(&1, :srv)) 236 | 237 | cond do 238 | app_name != nil and service != nil -> 239 | headless_service = to_charlist(service_k8s_path) 240 | 241 | case resolver.(headless_service) do 242 | {:ok, {:hostent, _, _, :srv, _count, addresses}} -> 243 | parse_response(addresses, app_name) 244 | 245 | {:error, reason} -> 246 | error( 247 | topology, 248 | "#{inspect(headless_service)} : lookup against #{service} failed: #{inspect(reason)}" 249 | ) 250 | 251 | [] 252 | end 253 | 254 | app_name == nil -> 255 | warn( 256 | topology, 257 | "kubernetes.DNS strategy is selected, but :application_name is not configured!" 258 | ) 259 | 260 | [] 261 | 262 | service == nil -> 263 | warn(topology, "kubernetes strategy is selected, but :service is not configured!") 264 | [] 265 | 266 | :else -> 267 | warn(topology, "kubernetes strategy is selected, but is not configured!") 268 | [] 269 | end 270 | end 271 | 272 | defp polling_interval(%State{config: config}) do 273 | Keyword.get(config, :polling_interval, @default_polling_interval) 274 | end 275 | 276 | defp parse_response(addresses, app_name) do 277 | addresses 278 | |> Enum.map(&:erlang.list_to_binary(elem(&1, 3))) 279 | |> Enum.map(&"#{app_name}@#{&1}") 280 | |> Enum.map(&String.to_atom(&1)) 281 | end 282 | end 283 | -------------------------------------------------------------------------------- /lib/strategy/local_epmd.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.LocalEpmd do 2 | @moduledoc """ 3 | This clustering strategy relies on Erlang's built-in distribution protocol. 4 | 5 | Unlike Cluster.Strategy.Epmd, this strategy assumes that all nodes are on 6 | the local host and can be discovered by epmd. 7 | 8 | Make sure `epmd` is started before you start your application, or startup 9 | will fail. When running with `mix`, you can do this automatically by passing 10 | the `--name` or `--sname` flag to start distribution. 11 | 12 | It should be configured as follows: 13 | 14 | config :libcluster, 15 | topologies: [ 16 | local_epmd_example: [ 17 | strategy: #{__MODULE__}]] 18 | 19 | """ 20 | use Cluster.Strategy 21 | 22 | alias Cluster.Strategy.State 23 | 24 | def start_link([%State{} = state]) do 25 | nodes = discover_nodes() 26 | 27 | Cluster.Strategy.connect_nodes(state.topology, state.connect, state.list_nodes, nodes) 28 | :ignore 29 | end 30 | 31 | defp discover_nodes do 32 | suffix = get_host_suffix(Node.self()) 33 | 34 | {:ok, names} = :erl_epmd.names() 35 | for {n, _} <- names, do: List.to_atom(n ++ suffix) 36 | end 37 | 38 | defp get_host_suffix(self) do 39 | self = Atom.to_charlist(self) 40 | [_, suffix] = :string.split(self, ~c"@") 41 | ~c"@" ++ suffix 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /lib/strategy/rancher.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.Rancher do 2 | @moduledoc """ 3 | This clustering strategy is specific to the Rancher container platform. 4 | It works by querying the platform's metadata API for containers belonging to 5 | the same service as the node and attempts to connect them. 6 | (see: http://rancher.com/docs/rancher/latest/en/rancher-services/metadata-service/) 7 | 8 | It assumes that all nodes share a base name and are using longnames of the form 9 | `` where the `` is unique for each node. 10 | 11 | A way to assign a name to a node on boot in an app running as a Distillery release is: 12 | 13 | Create a wrapper script which will interpolate the current ip of the container. 14 | 15 | ```sh 16 | #!/bin/sh 17 | 18 | export CONTAINER_IP="$(hostname -I | cut -f1 -d' ')" 19 | export REPLACE_OS_VARS=true 20 | 21 | /app/bin/app "$@" 22 | ``` 23 | 24 | ``` 25 | # vm.args 26 | -name app@${CONTAINER_IP} 27 | ``` 28 | 29 | An example configuration is below: 30 | 31 | 32 | config :libcluster, 33 | topologies: [ 34 | rancher_example: [ 35 | strategy: #{__MODULE__}, 36 | config: [ 37 | node_basename: "myapp", 38 | polling_interval: 10_000]]] 39 | """ 40 | 41 | use GenServer 42 | use Cluster.Strategy 43 | import Cluster.Logger 44 | 45 | alias Cluster.Strategy.State 46 | 47 | @default_polling_interval 5_000 48 | @rancher_metadata_base_url "http://rancher-metadata" 49 | 50 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 51 | 52 | @impl true 53 | def init([%State{meta: nil} = state]) do 54 | init([%State{state | :meta => MapSet.new()}]) 55 | end 56 | 57 | def init([%State{} = state]) do 58 | {:ok, load(state)} 59 | end 60 | 61 | @impl true 62 | def handle_info(:timeout, state) do 63 | handle_info(:load, state) 64 | end 65 | 66 | def handle_info(:load, %State{} = state) do 67 | {:noreply, load(state)} 68 | end 69 | 70 | def handle_info(_, state) do 71 | {:noreply, state} 72 | end 73 | 74 | defp load( 75 | %State{ 76 | topology: topology, 77 | connect: connect, 78 | disconnect: disconnect, 79 | list_nodes: list_nodes 80 | } = state 81 | ) do 82 | new_nodelist = MapSet.new(get_nodes(state)) 83 | removed = MapSet.difference(state.meta, new_nodelist) 84 | 85 | new_nodelist = 86 | case Cluster.Strategy.disconnect_nodes( 87 | topology, 88 | disconnect, 89 | list_nodes, 90 | MapSet.to_list(removed) 91 | ) do 92 | :ok -> 93 | new_nodelist 94 | 95 | {:error, bad_nodes} -> 96 | # Add back the nodes which should have been removed, but which couldn't be for some reason 97 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 98 | MapSet.put(acc, n) 99 | end) 100 | end 101 | 102 | new_nodelist = 103 | case Cluster.Strategy.connect_nodes( 104 | topology, 105 | connect, 106 | list_nodes, 107 | MapSet.to_list(new_nodelist) 108 | ) do 109 | :ok -> 110 | new_nodelist 111 | 112 | {:error, bad_nodes} -> 113 | # Remove the nodes which should have been added, but couldn't be for some reason 114 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 115 | MapSet.delete(acc, n) 116 | end) 117 | end 118 | 119 | Process.send_after(self(), :load, polling_interval(state)) 120 | 121 | %{state | :meta => new_nodelist} 122 | end 123 | 124 | defp polling_interval(%{config: config}) do 125 | Keyword.get(config, :polling_interval, @default_polling_interval) 126 | end 127 | 128 | @spec get_nodes(State.t()) :: [atom()] 129 | defp get_nodes(%State{topology: topology, config: config}) do 130 | case Keyword.fetch!(config, :node_basename) do 131 | app_name when is_binary(app_name) and app_name != "" -> 132 | endpoints_path = "latest/self/service" 133 | headers = [{~c"accept", ~c"application/json"}] 134 | 135 | case :httpc.request( 136 | :get, 137 | {~c"#{@rancher_metadata_base_url}/#{endpoints_path}", headers}, 138 | [], 139 | [] 140 | ) do 141 | {:ok, {{_version, 200, _status}, _headers, body}} -> 142 | parse_response(app_name, Jason.decode!(body)) 143 | 144 | {:ok, {{_version, code, status}, _headers, body}} -> 145 | warn( 146 | topology, 147 | "cannot query Rancher Metadata API (#{code} #{status}): #{inspect(body)}" 148 | ) 149 | 150 | [] 151 | 152 | {:error, reason} -> 153 | error(topology, "request to Rancher Metadata API failed!: #{inspect(reason)}") 154 | [] 155 | end 156 | 157 | app_name -> 158 | warn( 159 | topology, 160 | "rancher strategy is selected, but :node_basename is invalid, got: #{inspect(app_name)}" 161 | ) 162 | 163 | [] 164 | end 165 | end 166 | 167 | defp parse_response(app_name, resp) do 168 | case resp do 169 | %{"containers" => containers} -> 170 | Enum.map(containers, fn %{"ips" => [ip | _]} -> :"#{app_name}@#{ip}" end) 171 | 172 | _ -> 173 | [] 174 | end 175 | end 176 | end 177 | -------------------------------------------------------------------------------- /lib/strategy/state.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.State do 2 | @moduledoc """ 3 | The state of one strategy. 4 | """ 5 | 6 | @type t :: %__MODULE__{ 7 | topology: atom, 8 | connect: {module, atom, [term]}, 9 | disconnect: {module, atom, [term]}, 10 | list_nodes: {module, atom, [:connected] | [:connected | [any]]}, 11 | meta: term, 12 | config: [{atom, term}] 13 | } 14 | 15 | defstruct topology: nil, 16 | connect: nil, 17 | disconnect: nil, 18 | list_nodes: nil, 19 | meta: nil, 20 | config: [] 21 | end 22 | -------------------------------------------------------------------------------- /lib/strategy/strategy.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy do 2 | @moduledoc """ 3 | This module defines the behaviour for implementing clustering strategies. 4 | """ 5 | defmacro __using__(_) do 6 | quote do 7 | @behaviour Cluster.Strategy 8 | 9 | @impl true 10 | def child_spec(args) do 11 | %{id: __MODULE__, type: :worker, start: {__MODULE__, :start_link, [args]}} 12 | end 13 | 14 | defoverridable child_spec: 1 15 | end 16 | end 17 | 18 | @type topology :: atom 19 | @type bad_nodes :: [{node, reason :: term}] 20 | @type mfa_tuple :: {module, atom, [term]} 21 | @type strategy_args :: [Cluster.Strategy.State.t()] 22 | 23 | # Required for supervision of the strategy 24 | @callback child_spec(strategy_args) :: Supervisor.child_spec() 25 | # Starts the strategy 26 | @callback start_link(strategy_args) :: {:ok, pid} | :ignore | {:error, reason :: term} 27 | 28 | @doc """ 29 | Given a list of node names, attempts to connect to all of them. 30 | Returns `:ok` if all nodes connected, or `{:error, [{node, reason}, ..]}` 31 | if we failed to connect to some nodes. 32 | 33 | All failures are logged. 34 | """ 35 | @spec connect_nodes(topology, mfa_tuple, mfa_tuple, [atom()]) :: :ok | {:error, bad_nodes} 36 | def connect_nodes(topology, {_, _, _} = connect, {_, _, _} = list_nodes, nodes) 37 | when is_list(nodes) do 38 | {connect_mod, connect_fun, connect_args} = connect 39 | {list_mod, list_fun, list_args} = list_nodes 40 | ensure_exported!(list_mod, list_fun, length(list_args)) 41 | current_node = Node.self() 42 | 43 | need_connect = 44 | nodes 45 | |> difference(apply(list_mod, list_fun, list_args)) 46 | |> Enum.reject(fn n -> current_node == n end) 47 | 48 | bad_nodes = 49 | Enum.reduce(need_connect, [], fn n, acc -> 50 | fargs = connect_args ++ [n] 51 | ensure_exported!(connect_mod, connect_fun, length(fargs)) 52 | 53 | start = System.monotonic_time() 54 | 55 | case apply(connect_mod, connect_fun, fargs) do 56 | true -> 57 | :telemetry.execute( 58 | [:libcluster, :connect_node, :ok], 59 | %{duration: System.monotonic_time() - start}, 60 | %{node: n, topology: topology} 61 | ) 62 | 63 | Cluster.Logger.info(topology, "connected to #{inspect(n)}") 64 | acc 65 | 66 | false -> 67 | :telemetry.execute( 68 | [:libcluster, :connect_node, :error], 69 | %{}, 70 | %{node: n, topology: topology, reason: :unreachable} 71 | ) 72 | 73 | Cluster.Logger.warn(topology, "unable to connect to #{inspect(n)}") 74 | [{n, false} | acc] 75 | 76 | :ignored -> 77 | :telemetry.execute( 78 | [:libcluster, :connect_node, :error], 79 | %{}, 80 | %{node: n, topology: topology, reason: :not_part_of_network} 81 | ) 82 | 83 | Cluster.Logger.warn( 84 | topology, 85 | "unable to connect to #{inspect(n)}: not part of network" 86 | ) 87 | 88 | [{n, :ignored} | acc] 89 | end 90 | end) 91 | 92 | case bad_nodes do 93 | [] -> :ok 94 | _ -> {:error, bad_nodes} 95 | end 96 | end 97 | 98 | @doc """ 99 | Given a list of node names, attempts to disconnect from all of them. 100 | Returns `:ok` if all nodes disconnected, or `{:error, [{node, reason}, ..]}` 101 | if we failed to disconnect from some nodes. 102 | 103 | All failures are logged. 104 | """ 105 | @spec disconnect_nodes(topology, mfa_tuple, mfa_tuple, [atom()]) :: :ok | {:error, bad_nodes} 106 | def disconnect_nodes(topology, {_, _, _} = disconnect, {_, _, _} = list_nodes, nodes) 107 | when is_list(nodes) do 108 | {disconnect_mod, disconnect_fun, disconnect_args} = disconnect 109 | {list_mod, list_fun, list_args} = list_nodes 110 | ensure_exported!(list_mod, list_fun, length(list_args)) 111 | current_node = Node.self() 112 | 113 | need_disconnect = 114 | nodes 115 | |> intersection(apply(list_mod, list_fun, list_args)) 116 | |> Enum.reject(fn n -> current_node == n end) 117 | 118 | bad_nodes = 119 | Enum.reduce(need_disconnect, [], fn n, acc -> 120 | fargs = disconnect_args ++ [n] 121 | ensure_exported!(disconnect_mod, disconnect_fun, length(fargs)) 122 | 123 | start = System.monotonic_time() 124 | 125 | case apply(disconnect_mod, disconnect_fun, fargs) do 126 | true -> 127 | :telemetry.execute( 128 | [:libcluster, :disconnect_node, :ok], 129 | %{duration: System.monotonic_time() - start}, 130 | %{node: n, topology: topology} 131 | ) 132 | 133 | Cluster.Logger.info(topology, "disconnected from #{inspect(n)}") 134 | acc 135 | 136 | false -> 137 | :telemetry.execute( 138 | [:libcluster, :disconnect_node, :error], 139 | %{}, 140 | %{node: n, topology: topology, reason: :already_disconnected} 141 | ) 142 | 143 | Cluster.Logger.warn( 144 | topology, 145 | "disconnect from #{inspect(n)} failed because we're already disconnected" 146 | ) 147 | 148 | acc 149 | 150 | :ignored -> 151 | :telemetry.execute( 152 | [:libcluster, :disconnect_node, :error], 153 | %{}, 154 | %{node: n, topology: topology, reason: :not_part_of_network} 155 | ) 156 | 157 | Cluster.Logger.warn( 158 | topology, 159 | "disconnect from #{inspect(n)} failed because it is not part of the network" 160 | ) 161 | 162 | acc 163 | 164 | reason -> 165 | :telemetry.execute( 166 | [:libcluster, :disconnect_node, :error], 167 | %{}, 168 | %{node: n, topology: topology, reason: inspect(reason)} 169 | ) 170 | 171 | Cluster.Logger.warn( 172 | topology, 173 | "disconnect from #{inspect(n)} failed with: #{inspect(reason)}" 174 | ) 175 | 176 | [{n, reason} | acc] 177 | end 178 | end) 179 | 180 | case bad_nodes do 181 | [] -> :ok 182 | _ -> {:error, bad_nodes} 183 | end 184 | end 185 | 186 | def intersection(_a, []), do: [] 187 | def intersection([], _b), do: [] 188 | 189 | def intersection(a, b) when is_list(a) and is_list(b) do 190 | a |> MapSet.new() |> MapSet.intersection(MapSet.new(b)) 191 | end 192 | 193 | def difference(a, []), do: a 194 | def difference([], _b), do: [] 195 | 196 | def difference(a, b) when is_list(a) and is_list(b) do 197 | a |> MapSet.new() |> MapSet.difference(MapSet.new(b)) 198 | end 199 | 200 | defp ensure_exported!(mod, fun, arity) do 201 | unless function_exported?(mod, fun, arity) do 202 | raise "#{mod}.#{fun}/#{arity} is undefined!" 203 | end 204 | end 205 | end 206 | -------------------------------------------------------------------------------- /lib/supervisor.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Supervisor do 2 | @moduledoc """ 3 | This module handles supervising the configured topologies, and is designed 4 | to support being started within your own supervision tree, as shown below: 5 | 6 | defmodule MyApp.App do 7 | use Application 8 | 9 | def start(_type, _args) do 10 | topologies = [ 11 | example: [ 12 | strategy: Cluster.Strategy.Epmd, 13 | config: [hosts: [:"a@127.0.0.1", :"b@127.0.0.1"]], 14 | ] 15 | ] 16 | children = [ 17 | {Cluster.Supervisor, [topologies, [name: MyApp.ClusterSupervisor]]}, 18 | ..other children.. 19 | ] 20 | Supervisor.start_link(children, strategy: :one_for_one, name: MyApp.Supervisor) 21 | end 22 | end 23 | 24 | The `topologies` configuration structure shown above can be built manually, 25 | like shown, so that you can load config at runtime in a way that best 26 | suits your application; or if you don't need to do any special config 27 | handling, you can use the Mix config file, and just use 28 | `Application.get_env(:libcluster, :topologies)`. That config would look like so: 29 | 30 | config :libcluster, 31 | topologies: [ 32 | example: [...] 33 | ] 34 | 35 | Use the method most convenient for you. 36 | """ 37 | use Supervisor 38 | 39 | @doc """ 40 | Start a new instance of this supervisor. This is the callback indicated in 41 | the child specification returned by `child_spec/1`. It expects a list of 42 | the form `[config, supervisor_opts]`, or `[config]`. The former allows you 43 | to provide options for the supervisor like with `Supervisor.start_link/3`. 44 | """ 45 | def start_link([_config, opts] = args) do 46 | Supervisor.start_link(__MODULE__, args, opts) 47 | end 48 | 49 | def start_link([config]) do 50 | start_link([config, []]) 51 | end 52 | 53 | @doc false 54 | @impl Supervisor 55 | def init([config, opts]) do 56 | opts = Keyword.put(opts, :strategy, :one_for_one) 57 | children = get_configured_topologies(config) 58 | Supervisor.init(children, opts) 59 | end 60 | 61 | defp get_configured_topologies(config) do 62 | for {topology, spec} <- config do 63 | strategy = Keyword.fetch!(spec, :strategy) 64 | state = build_initial_state([{:topology, topology} | spec]) 65 | 66 | %{ 67 | id: state.topology, 68 | start: {strategy, :start_link, [[state]]} 69 | } 70 | end 71 | end 72 | 73 | defp build_initial_state(spec) do 74 | topology = Keyword.fetch!(spec, :topology) 75 | config = Keyword.get(spec, :config, []) 76 | connect_mfa = Keyword.get(spec, :connect, {:net_kernel, :connect_node, []}) 77 | disconnect_mfa = Keyword.get(spec, :disconnect, {:erlang, :disconnect_node, []}) 78 | list_nodes_mfa = Keyword.get(spec, :list_nodes, {:erlang, :nodes, [:connected]}) 79 | 80 | %Cluster.Strategy.State{ 81 | topology: topology, 82 | connect: connect_mfa, 83 | disconnect: disconnect_mfa, 84 | list_nodes: list_nodes_mfa, 85 | config: config, 86 | meta: nil 87 | } 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Mixfile do 2 | use Mix.Project 3 | 4 | @version "3.5.0" 5 | @source_url "https://github.com/bitwalker/libcluster" 6 | 7 | def project do 8 | [ 9 | app: :libcluster, 10 | version: @version, 11 | elixir: "~> 1.13", 12 | build_embedded: Mix.env() == :prod, 13 | start_permanent: Mix.env() == :prod, 14 | description: """ 15 | Automatic Erlang cluster formation and management for Elixir/Erlang 16 | applications 17 | """, 18 | package: package(), 19 | docs: docs(), 20 | deps: deps(), 21 | elixirc_paths: elixirc_paths(Mix.env()), 22 | dialyzer: [ 23 | flags: ~w(-Wunmatched_returns -Werror_handling -Wrace_conditions -Wno_opaque -Wunderspecs) 24 | ], 25 | preferred_cli_env: [ 26 | vcr: :test, 27 | "vcr.delete": :test, 28 | "vcr.check": :test, 29 | "vcr.show": :test 30 | ] 31 | ] 32 | end 33 | 34 | def application do 35 | [extra_applications: [:logger, :inets, :jason, :crypto, :ssl]] 36 | end 37 | 38 | defp deps do 39 | [ 40 | {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, 41 | {:dialyxir, "~> 1.0", only: :dev, runtime: false}, 42 | {:exvcr, "~> 0.11", only: :test, runtime: false}, 43 | {:jason, "~> 1.1"}, 44 | {:telemetry, "~> 1.3"} 45 | ] 46 | end 47 | 48 | defp package do 49 | [ 50 | files: ["lib", "mix.exs", "README.md", "LICENSE.md", "CHANGELOG.md"], 51 | maintainers: ["Paul Schoenfelder"], 52 | licenses: ["MIT"], 53 | links: %{ 54 | "Changelog" => "https://hexdocs.pm/libcluster/changelog.html", 55 | GitHub: @source_url 56 | } 57 | ] 58 | end 59 | 60 | defp docs do 61 | [ 62 | extras: ["CHANGELOG.md", "README.md"], 63 | main: "readme", 64 | source_url: @source_url, 65 | source_ref: @version, 66 | formatter_opts: [gfm: true] 67 | ] 68 | end 69 | 70 | defp elixirc_paths(:test), do: ["lib", "test/support"] 71 | defp elixirc_paths(_), do: ["lib"] 72 | end 73 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, 3 | "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, 4 | "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, 5 | "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"}, 6 | "exactor": {:hex, :exactor, "2.2.4", "5efb4ddeb2c48d9a1d7c9b465a6fffdd82300eb9618ece5d34c3334d5d7245b1", [:mix], [], "hexpm", "1222419f706e01bfa1095aec9acf6421367dcfab798a6f67c54cf784733cd6b5"}, 7 | "exjsx": {:hex, :exjsx, "4.0.0", "60548841e0212df401e38e63c0078ec57b33e7ea49b032c796ccad8cde794b5c", [:mix], [{:jsx, "~> 2.8.0", [hex: :jsx, repo: "hexpm", optional: false]}], "hexpm", "32e95820a97cffea67830e91514a2ad53b888850442d6d395f53a1ac60c82e07"}, 8 | "exvcr": {:hex, :exvcr, "0.15.1", "772db4d065f5136c6a984c302799a79e4ade3e52701c95425fa2229dd6426886", [:mix], [{:exactor, "~> 2.2", [hex: :exactor, repo: "hexpm", optional: false]}, {:exjsx, "~> 4.0", [hex: :exjsx, repo: "hexpm", optional: false]}, {:finch, "~> 0.16", [hex: :finch, repo: "hexpm", optional: true]}, {:httpoison, "~> 1.0 or ~> 2.0", [hex: :httpoison, repo: "hexpm", optional: true]}, {:httpotion, "~> 3.1", [hex: :httpotion, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:meck, "~> 0.8", [hex: :meck, repo: "hexpm", optional: false]}], "hexpm", "de4fc18b1d672d9b72bc7468735e19779aa50ea963a1f859ef82cd9e294b13e3"}, 9 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 10 | "jsx": {:hex, :jsx, "2.8.3", "a05252d381885240744d955fbe3cf810504eb2567164824e19303ea59eef62cf", [:mix, :rebar3], [], "hexpm", "fc3499fed7a726995aa659143a248534adc754ebd16ccd437cd93b649a95091f"}, 11 | "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, 12 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 13 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, 14 | "meck": {:hex, :meck, "0.9.2", "85ccbab053f1db86c7ca240e9fc718170ee5bda03810a6292b5306bf31bae5f5", [:rebar3], [], "hexpm", "81344f561357dc40a8344afa53767c32669153355b626ea9fcbc8da6b3045826"}, 15 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 16 | "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, 17 | } 18 | -------------------------------------------------------------------------------- /priv/endpoint-viewer.yaml: -------------------------------------------------------------------------------- 1 | # Please edit the object below. Lines beginning with a '#' will be ignored, 2 | # and an empty file will abort the edit. If an error occurs while saving this file will be 3 | # reopened with the relevant failures. 4 | # 5 | apiVersion: v1 6 | kind: Role 7 | metadata: 8 | name: endpoint-viewer 9 | rules: 10 | - apiGroups: null 11 | attributeRestrictions: null 12 | resources: 13 | - endpoints 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | -------------------------------------------------------------------------------- /test/app_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.AppTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | defmodule TestStrategy do 7 | @moduledoc false 8 | use Cluster.Strategy 9 | 10 | def start_link([%Cluster.Strategy.State{config: config} = state]) do 11 | config 12 | |> Keyword.fetch!(:caller) 13 | |> send({:opts, state}) 14 | 15 | :ignore 16 | end 17 | end 18 | 19 | describe "start/2" do 20 | test "calls strategy with right arguments" do 21 | Cluster.Supervisor.start_link([ 22 | [ 23 | test: [ 24 | strategy: TestStrategy, 25 | config: [ 26 | caller: self() 27 | ] 28 | ] 29 | ] 30 | ]) 31 | 32 | assert_receive {:opts, state} 33 | assert :test == state.topology 34 | assert {:net_kernel, :connect_node, []} = state.connect 35 | assert {:erlang, :disconnect_node, []} = state.disconnect 36 | assert {:erlang, :nodes, [:connected]} = state.list_nodes 37 | assert [caller: _] = state.config 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/dns_poll_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.DNSPollTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case, async: true 5 | import ExUnit.CaptureLog 6 | 7 | alias Cluster.Nodes 8 | alias Cluster.Strategy.DNSPoll 9 | 10 | describe "start_link/1" do 11 | test "adds new nodes" do 12 | capture_log(fn -> 13 | [ 14 | %Cluster.Strategy.State{ 15 | topology: :dns_poll, 16 | config: [ 17 | polling_interval: 100, 18 | query: "app", 19 | node_basename: "node", 20 | resolver: fn _query -> 21 | [{10, 0, 0, 1}, {10, 0, 0, 2}, {10761, 33408, 1, 41584, 47349, 47607, 34961, 243}] 22 | end 23 | ], 24 | connect: {Nodes, :connect, [self()]}, 25 | disconnect: {Nodes, :disconnect, [self()]}, 26 | list_nodes: {Nodes, :list_nodes, [[]]} 27 | } 28 | ] 29 | |> DNSPoll.start_link() 30 | 31 | assert_receive {:connect, :"node@10.0.0.1"}, 100 32 | assert_receive {:connect, :"node@10.0.0.2"}, 100 33 | assert_receive {:connect, :"node@2a09:8280:1:a270:b8f5:b9f7:8891:f3"}, 100 34 | end) 35 | end 36 | end 37 | 38 | test "removes nodes" do 39 | capture_log(fn -> 40 | [ 41 | %Cluster.Strategy.State{ 42 | topology: :dns_poll, 43 | config: [ 44 | polling_interval: 100, 45 | query: "app", 46 | node_basename: "node", 47 | resolver: fn _query -> [{10, 0, 0, 1}] end 48 | ], 49 | connect: {Nodes, :connect, [self()]}, 50 | disconnect: {Nodes, :disconnect, [self()]}, 51 | list_nodes: {Nodes, :list_nodes, [[:"node@10.0.0.1", :"node@10.0.0.2"]]}, 52 | meta: MapSet.new([:"node@10.0.0.1", :"node@10.0.0.2"]) 53 | } 54 | ] 55 | |> DNSPoll.start_link() 56 | 57 | assert_receive {:disconnect, :"node@10.0.0.2"}, 100 58 | end) 59 | end 60 | 61 | test "keeps state" do 62 | capture_log(fn -> 63 | [ 64 | %Cluster.Strategy.State{ 65 | topology: :dns_poll, 66 | config: [ 67 | polling_interval: 100, 68 | query: "app", 69 | node_basename: "node", 70 | resolver: fn _query -> [{10, 0, 0, 1}] end 71 | ], 72 | connect: {Nodes, :connect, [self()]}, 73 | disconnect: {Nodes, :disconnect, [self()]}, 74 | list_nodes: {Nodes, :list_nodes, [[:"node@10.0.0.1"]]}, 75 | meta: MapSet.new([:"node@10.0.0.1"]) 76 | } 77 | ] 78 | |> DNSPoll.start_link() 79 | 80 | refute_receive {:disconnect, _}, 100 81 | refute_receive {:connect, _}, 100 82 | end) 83 | end 84 | 85 | test "does not connect to anything with missing config params" do 86 | capture_log(fn -> 87 | [ 88 | %Cluster.Strategy.State{ 89 | topology: :dns_poll, 90 | config: [ 91 | polling_interval: 100, 92 | resolver: fn _query -> [{10, 0, 0, 1}] end 93 | ], 94 | connect: {Nodes, :connect, [self()]}, 95 | disconnect: {Nodes, :disconnect, [self()]}, 96 | list_nodes: {Nodes, :list_nodes, [[]]} 97 | } 98 | ] 99 | |> DNSPoll.start_link() 100 | 101 | refute_receive {:disconnect, _}, 100 102 | refute_receive {:connect, _}, 100 103 | end) 104 | end 105 | 106 | test "does not connect to anything with invalid config params" do 107 | capture_log(fn -> 108 | [ 109 | %Cluster.Strategy.State{ 110 | topology: :dns_poll, 111 | config: [ 112 | query: :app, 113 | node_basename: "", 114 | polling_interval: 100, 115 | resolver: fn _query -> [{10, 0, 0, 1}] end 116 | ], 117 | connect: {Nodes, :connect, [self()]}, 118 | disconnect: {Nodes, :disconnect, [self()]}, 119 | list_nodes: {Nodes, :list_nodes, [[]]} 120 | } 121 | ] 122 | |> DNSPoll.start_link() 123 | 124 | refute_receive {:disconnect, _}, 100 125 | refute_receive {:connect, _}, 100 126 | end) 127 | end 128 | 129 | test "looks up both A and AAAA records" do 130 | result = DNSPoll.lookup_all_ips("example.org" |> String.to_charlist()) 131 | sizes = result |> Enum.map(fn ip -> tuple_size(ip) end) |> Enum.uniq() |> Enum.sort() 132 | assert(sizes == [4, 8]) 133 | end 134 | end 135 | -------------------------------------------------------------------------------- /test/epmd_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.EpmdTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | alias Cluster.Strategy.Epmd 7 | 8 | require Cluster.Nodes 9 | 10 | describe "start_link/1" do 11 | @tag capture_log: true 12 | test "starts GenServer and connects nodes" do 13 | {:ok, pid} = 14 | Epmd.start_link([ 15 | %Cluster.Strategy.State{ 16 | topology: :name, 17 | config: [hosts: [:foo@bar]], 18 | connect: {Cluster.Nodes, :connect, [self()]}, 19 | list_nodes: {Cluster.Nodes, :list_nodes, [[]]} 20 | } 21 | ]) 22 | 23 | assert is_pid(pid) 24 | 25 | assert_receive {:connect, :foo@bar}, 5_000 26 | end 27 | 28 | @tag capture_log: true 29 | test "reconnects every time the configured timeout was reached" do 30 | timeout = 500 31 | start_timestamp = NaiveDateTime.utc_now() 32 | 33 | {:ok, _pid} = 34 | Epmd.start_link([ 35 | %Cluster.Strategy.State{ 36 | topology: :name, 37 | config: [hosts: [:foo@bar], timeout: timeout], 38 | connect: {Cluster.Nodes, :connect, [self()]}, 39 | list_nodes: {Cluster.Nodes, :list_nodes, [[]]} 40 | } 41 | ]) 42 | 43 | # Initial connect 44 | assert_receive {:connect, :foo@bar}, 5_000 45 | 46 | # First reconnect should not have happened right away, 47 | # but it should happen after a timeout 48 | refute_received {:connect, _} 49 | assert_receive {:connect, :foo@bar}, 2 * timeout 50 | 51 | # A consecutive reconnect should not have happened right away, 52 | # but it should happen after a timeout 53 | refute_received {:connect, _} 54 | assert_receive {:connect, :foo@bar}, 2 * timeout 55 | 56 | duration = NaiveDateTime.diff(NaiveDateTime.utc_now(), start_timestamp, :millisecond) 57 | assert duration > 2 * timeout 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /test/fixtures/kubernetes/service_account/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitwalker/libcluster/6d8c11f57b9bde9a047e2ea9276f19b58c371466/test/fixtures/kubernetes/service_account/.gitkeep -------------------------------------------------------------------------------- /test/fixtures/vcr_cassettes/kubernetes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "request": { 4 | "body": "", 5 | "headers": { 6 | "authorization": "***" 7 | }, 8 | "method": "get", 9 | "options": { 10 | "httpc_options": [], 11 | "http_options": { 12 | "ssl": "[verify: :verify_none]" 13 | } 14 | }, 15 | "request_body": "", 16 | "url": "https://cluster.localhost./api/v1/namespaces/__libcluster_test/endpoints?labelSelector=app=test_selector" 17 | }, 18 | "response": { 19 | "binary": false, 20 | "body": "{\"kind\":\"EndpointsList\",\"apiVersion\":\"v1\",\"metadata\":{\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"resourceVersion\":\"17042410\"},\"items\":[{\"metadata\":{\"name\":\"development-development\",\"namespace\":\"airatel-service-localization\",\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"uid\":\"7e3faf1e-0294-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037787\",\"creationTimestamp\":\"2018-01-26T12:29:03Z\",\"labels\":{\"app\":\"development\",\"chart\":\"CHART_PLACEHOLDER\"}},\"subsets\":[{\"addresses\":[{\"hostname\":\"my-hostname-0\",\"ip\":\"10.48.33.136\",\"nodeName\":\"gke-jshmrtn-cluster-default-pool-a61da41f-db9x\",\"targetRef\":{\"kind\":\"Pod\",\"namespace\":\"airatel-service-localization\",\"name\":\"development-4292695165-mgq9f\",\"uid\":\"eb0f3e80-0295-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037783\"}}],\"ports\":[{\"name\":\"web\",\"port\":8443,\"protocol\":\"TCP\"}]}]}]}\n", 21 | "headers": { 22 | "date": "Fri, 26 Jan 2018 13:18:46 GMT", 23 | "content-length": "877", 24 | "content-type": "application/json" 25 | }, 26 | "status_code": [ 27 | "HTTP/1.1", 28 | 200, 29 | "OK" 30 | ], 31 | "type": "ok" 32 | } 33 | }, 34 | { 35 | "request": { 36 | "body": "", 37 | "headers": { 38 | "authorization": "***" 39 | }, 40 | "method": "get", 41 | "options": { 42 | "httpc_options": [], 43 | "http_options": { 44 | "ssl": "[verify: :verify_none]" 45 | } 46 | }, 47 | "request_body": "", 48 | "url": "https://cluster.localhost./api/v1/namespaces/__libcluster_test/endpoints?labelSelector=app=test_selector&resourceVersion=0" 49 | }, 50 | "response": { 51 | "binary": false, 52 | "body": "{\"kind\":\"EndpointsList\",\"apiVersion\":\"v1\",\"metadata\":{\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"resourceVersion\":\"17042410\"},\"items\":[{\"metadata\":{\"name\":\"development-development\",\"namespace\":\"airatel-service-localization\",\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"uid\":\"7e3faf1e-0294-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037787\",\"creationTimestamp\":\"2018-01-26T12:29:03Z\",\"labels\":{\"app\":\"development\",\"chart\":\"CHART_PLACEHOLDER\"}},\"subsets\":[{\"addresses\":[{\"hostname\":\"my-hostname-0\",\"ip\":\"10.48.33.136\",\"nodeName\":\"gke-jshmrtn-cluster-default-pool-a61da41f-db9x\",\"targetRef\":{\"kind\":\"Pod\",\"namespace\":\"airatel-service-localization\",\"name\":\"development-4292695165-mgq9f\",\"uid\":\"eb0f3e80-0295-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037783\"}}],\"ports\":[{\"name\":\"web\",\"port\":8443,\"protocol\":\"TCP\"}]}]}]}\n", 53 | "headers": { 54 | "date": "Fri, 26 Jan 2018 13:18:46 GMT", 55 | "content-length": "877", 56 | "content-type": "application/json" 57 | }, 58 | "status_code": [ 59 | "HTTP/1.1", 60 | 200, 61 | "OK" 62 | ], 63 | "type": "ok" 64 | } 65 | }, 66 | { 67 | "request": { 68 | "body": "", 69 | "headers": { 70 | "authorization": "***" 71 | }, 72 | "method": "get", 73 | "options": { 74 | "httpc_options": [], 75 | "http_options": { 76 | "ssl": "[verify: :verify_none]" 77 | } 78 | }, 79 | "request_body": "", 80 | "url": "https://cluster.localhost./api/v1/namespaces/airatel-service-test/endpoints?labelSelector=app=test_selector" 81 | }, 82 | "response": { 83 | "binary": false, 84 | "body": "{\"apiVersion\":\"v1\",\"items\":[{\"metadata\":{\"creationTimestamp\":\"2018-01-26T12:29:03Z\",\"labels\":{\"app\":\"development\",\"chart\":\"CHART_PLACEHOLDER\"},\"name\":\"development-development\",\"namespace\":\"airatel-service-test\",\"resourceVersion\":\"17037787\",\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"uid\":\"7e3faf1e-0294-11e8-bcad-42010a9c01cc\"},\"subsets\":[{\"addresses\":[{\"ip\":\"10.48.33.134\",\"nodeName\":\"gke-jshmrtn-cluster-default-pool-a61da41f-db9x\",\"targetRef\":{\"kind\":\"Pod\",\"name\":\"development-4292695165-mgq9a\",\"namespace\":\"airatel-service-test\",\"resourceVersion\":\"17037783\",\"uid\":\"eb0f3e80-0295-11e8-bcad-42010a9c01cd\"}}],\"ports\":[{\"name\":\"web\",\"port\":8443,\"protocol\":\"TCP\"}]}]}],\"kind\":\"EndpointsList\",\"metadata\":{\"resourceVersion\":\"17042410\",\"selfLink\":\"SELFLINK_PLACEHOLDER\"}}", 85 | "headers": { 86 | "date": "Fri, 26 Jan 2018 13:18:46 GMT", 87 | "content-length": "877", 88 | "content-type": "application/json" 89 | }, 90 | "status_code": [ 91 | "HTTP/1.1", 92 | 200, 93 | "OK" 94 | ], 95 | "type": "ok" 96 | } 97 | } 98 | ] 99 | -------------------------------------------------------------------------------- /test/fixtures/vcr_cassettes/kubernetes_pods.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "request": { 4 | "body": "", 5 | "headers": { 6 | "authorization": "***" 7 | }, 8 | "method": "get", 9 | "options": { 10 | "httpc_options": [], 11 | "http_options": { 12 | "ssl": "[verify: :verify_none]" 13 | } 14 | }, 15 | "request_body": "", 16 | "url": "https://cluster.localhost./api/v1/namespaces/__libcluster_test/pods?labelSelector=app=test_selector" 17 | }, 18 | "response": { 19 | "binary": false, 20 | "body": "{\"kind\":\"PodList\",\"apiVersion\":\"v1\",\"metadata\":{\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"resourceVersion\":\"17042410\"},\"items\":[{\"metadata\":{\"name\":\"development-development\",\"namespace\":\"airatel-service-localization\",\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"uid\":\"7e3faf1e-0294-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037787\",\"creationTimestamp\":\"2018-01-26T12:29:03Z\",\"labels\":{\"app\":\"development\",\"chart\":\"CHART_PLACEHOLDER\"}},\"spec\": { \"hostname\": \"my-hostname-0\" },\"status\":{\"podIP\": \"10.48.33.136\"}}]}\n", 21 | "headers": { 22 | "date": "Fri, 26 Jan 2018 13:18:46 GMT", 23 | "content-length": "877", 24 | "content-type": "application/json" 25 | }, 26 | "status_code": [ 27 | "HTTP/1.1", 28 | 200, 29 | "OK" 30 | ], 31 | "type": "ok" 32 | } 33 | }, 34 | { 35 | "request": { 36 | "body": "", 37 | "headers": { 38 | "authorization": "***" 39 | }, 40 | "method": "get", 41 | "options": { 42 | "httpc_options": [], 43 | "http_options": { 44 | "ssl": "[verify: :verify_none]" 45 | } 46 | }, 47 | "request_body": "", 48 | "url": "https://cluster.localhost./api/v1/namespaces/__libcluster_test/pods?labelSelector=app=test_selector&resourceVersion=0" 49 | }, 50 | "response": { 51 | "binary": false, 52 | "body": "{\"kind\":\"PodList\",\"apiVersion\":\"v1\",\"metadata\":{\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"resourceVersion\":\"17042410\"},\"items\":[{\"metadata\":{\"name\":\"development-development\",\"namespace\":\"airatel-service-localization\",\"selfLink\":\"SELFLINK_PLACEHOLDER\",\"uid\":\"7e3faf1e-0294-11e8-bcad-42010a9c01cc\",\"resourceVersion\":\"17037787\",\"creationTimestamp\":\"2018-01-26T12:29:03Z\",\"labels\":{\"app\":\"development\",\"chart\":\"CHART_PLACEHOLDER\"}},\"spec\": { \"hostname\": \"my-hostname-0\" },\"status\":{\"podIP\": \"10.48.33.136\"}}]}\n", 53 | "headers": { 54 | "date": "Fri, 26 Jan 2018 13:18:46 GMT", 55 | "content-length": "877", 56 | "content-type": "application/json" 57 | }, 58 | "status_code": [ 59 | "HTTP/1.1", 60 | 200, 61 | "OK" 62 | ], 63 | "type": "ok" 64 | } 65 | } 66 | ] 67 | -------------------------------------------------------------------------------- /test/gossip_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.GossipTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | alias Cluster.Strategy.Gossip 7 | 8 | require Cluster.Nodes 9 | 10 | describe "start_link/1" do 11 | @tag capture_log: true 12 | test "starts GenServer and connects nodes" do 13 | {:ok, pid} = 14 | Gossip.start_link([ 15 | %Cluster.Strategy.State{ 16 | topology: :gossip, 17 | config: [ 18 | port: 45892, 19 | if_addr: "127.0.0.1", 20 | multicast_if: "192.168.1.1", 21 | multicast_addr: "233.252.1.32", 22 | secret: "password" 23 | ], 24 | connect: {Cluster.Nodes, :connect, [self()]}, 25 | list_nodes: {Cluster.Nodes, :list_nodes, [[]]} 26 | } 27 | ]) 28 | 29 | Process.sleep(1_000) 30 | 31 | assert is_pid(pid) 32 | assert Process.alive?(pid) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /test/kubernetes_dns_srv_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.KubernetesSRVDNSTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case, async: true 5 | import ExUnit.CaptureLog 6 | 7 | alias Cluster.Strategy.Kubernetes.DNSSRV 8 | alias Cluster.Strategy.State 9 | alias Cluster.Nodes 10 | 11 | require Cluster.Nodes 12 | 13 | describe "start_link/1" do 14 | test "adds new nodes" do 15 | capture_log(fn -> 16 | [ 17 | %State{ 18 | topology: :k8s_dns_example, 19 | config: [ 20 | polling_interval: 100, 21 | service: "elixir-plug-poc", 22 | namespace: "default", 23 | application_name: "node", 24 | resolver: fn _query -> 25 | {:ok, 26 | {:hostent, ~c"elixir-plug-poc.default.svc.cluster.local", [], :srv, 2, 27 | [ 28 | {10, 50, 0, ~c"elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local"}, 29 | {10, 50, 0, ~c"elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"} 30 | ]}} 31 | end 32 | ], 33 | connect: {Nodes, :connect, [self()]}, 34 | disconnect: {Nodes, :disconnect, [self()]}, 35 | list_nodes: {Nodes, :list_nodes, [[]]} 36 | } 37 | ] 38 | |> DNSSRV.start_link() 39 | 40 | assert_receive {:connect, 41 | :"node@elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local"}, 42 | 100 43 | 44 | assert_receive {:connect, 45 | :"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"}, 46 | 100 47 | end) 48 | end 49 | 50 | test "removes nodes" do 51 | capture_log(fn -> 52 | [ 53 | %State{ 54 | topology: :k8s_dns_example, 55 | config: [ 56 | polling_interval: 100, 57 | service: "elixir-plug-poc", 58 | namespace: "default", 59 | application_name: "node", 60 | resolver: fn _query -> 61 | {:ok, 62 | {:hostent, ~c"elixir-plug-poc.default.svc.cluster.local", [], :srv, 1, 63 | [ 64 | {10, 50, 0, ~c"elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local"} 65 | ]}} 66 | end 67 | ], 68 | connect: {Nodes, :connect, [self()]}, 69 | disconnect: {Nodes, :disconnect, [self()]}, 70 | list_nodes: 71 | {Nodes, :list_nodes, 72 | [ 73 | [ 74 | :"node@elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local", 75 | :"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local" 76 | ] 77 | ]}, 78 | meta: 79 | MapSet.new([ 80 | :"node@elixir-plug-poc-0.elixir-plug-poc.default.svc.cluster.local", 81 | :"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local" 82 | ]) 83 | } 84 | ] 85 | |> DNSSRV.start_link() 86 | 87 | assert_receive {:disconnect, 88 | :"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"}, 89 | 100 90 | end) 91 | end 92 | 93 | test "keeps state" do 94 | capture_log(fn -> 95 | [ 96 | %State{ 97 | topology: :k8s_dns_example, 98 | config: [ 99 | polling_interval: 100, 100 | service: "app", 101 | namespace: "default", 102 | application_name: "node", 103 | resolver: fn _query -> 104 | {:ok, 105 | {:hostent, ~c"elixir-plug-poc.default.svc.cluster.local", [], :srv, 2, 106 | [ 107 | {10, 50, 0, ~c"elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"} 108 | ]}} 109 | end 110 | ], 111 | connect: {Nodes, :connect, [self()]}, 112 | disconnect: {Nodes, :disconnect, [self()]}, 113 | list_nodes: 114 | {Nodes, :list_nodes, 115 | [[:"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"]]}, 116 | meta: 117 | MapSet.new([:"node@elixir-plug-poc-1.elixir-plug-poc.default.svc.cluster.local"]) 118 | } 119 | ] 120 | |> DNSSRV.start_link() 121 | 122 | refute_receive {:disconnect, _}, 100 123 | refute_receive {:connect, _}, 100 124 | end) 125 | end 126 | 127 | test "does not connect to anything if name is not resolved" do 128 | capture_log(fn -> 129 | [ 130 | %State{ 131 | topology: :k8s_dns_example, 132 | config: [ 133 | polling_interval: 100, 134 | service: "app", 135 | namespace: "default", 136 | application_name: "node", 137 | resolver: fn _query -> {:error, :nxdomain} end 138 | ], 139 | connect: {Nodes, :connect, [self()]}, 140 | disconnect: {Nodes, :disconnect, [self()]}, 141 | list_nodes: {Nodes, :list_nodes, [[]]} 142 | } 143 | ] 144 | |> DNSSRV.start_link() 145 | 146 | refute_receive {:connect, _}, 100 147 | end) 148 | end 149 | end 150 | end 151 | -------------------------------------------------------------------------------- /test/kubernetes_dns_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.KubernetesDNSTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case, async: true 5 | import ExUnit.CaptureLog 6 | 7 | alias Cluster.Strategy.Kubernetes.DNS 8 | alias Cluster.Strategy.State 9 | alias Cluster.Nodes 10 | 11 | require Cluster.Nodes 12 | 13 | describe "start_link/1" do 14 | test "adds new nodes" do 15 | capture_log(fn -> 16 | [ 17 | %State{ 18 | topology: :k8s_dns_example, 19 | config: [ 20 | polling_interval: 100, 21 | service: "app", 22 | application_name: "node", 23 | resolver: fn _query -> 24 | {:ok, {:hostent, ~c"app", [], :inet, 4, [{10, 0, 0, 1}, {10, 0, 0, 2}]}} 25 | end 26 | ], 27 | connect: {Nodes, :connect, [self()]}, 28 | disconnect: {Nodes, :disconnect, [self()]}, 29 | list_nodes: {Nodes, :list_nodes, [[]]} 30 | } 31 | ] 32 | |> DNS.start_link() 33 | 34 | assert_receive {:connect, :"node@10.0.0.1"}, 100 35 | assert_receive {:connect, :"node@10.0.0.2"}, 100 36 | end) 37 | end 38 | 39 | test "removes nodes" do 40 | capture_log(fn -> 41 | [ 42 | %State{ 43 | topology: :k8s_dns_example, 44 | config: [ 45 | polling_interval: 100, 46 | service: "app", 47 | application_name: "node", 48 | resolver: fn _query -> {:ok, {:hostent, ~c"app", [], :inet, 4, [{10, 0, 0, 1}]}} end 49 | ], 50 | connect: {Nodes, :connect, [self()]}, 51 | disconnect: {Nodes, :disconnect, [self()]}, 52 | list_nodes: {Nodes, :list_nodes, [[:"node@10.0.0.1", :"node@10.0.0.2"]]}, 53 | meta: MapSet.new([:"node@10.0.0.1", :"node@10.0.0.2"]) 54 | } 55 | ] 56 | |> DNS.start_link() 57 | 58 | assert_receive {:disconnect, :"node@10.0.0.2"}, 100 59 | end) 60 | end 61 | 62 | test "keeps state" do 63 | capture_log(fn -> 64 | [ 65 | %State{ 66 | topology: :k8s_dns_example, 67 | config: [ 68 | polling_interval: 100, 69 | service: "app", 70 | application_name: "node", 71 | resolver: fn _query -> {:ok, {:hostent, ~c"app", [], :inet, 4, [{10, 0, 0, 1}]}} end 72 | ], 73 | connect: {Nodes, :connect, [self()]}, 74 | disconnect: {Nodes, :disconnect, [self()]}, 75 | list_nodes: {Nodes, :list_nodes, [[:"node@10.0.0.1"]]}, 76 | meta: MapSet.new([:"node@10.0.0.1"]) 77 | } 78 | ] 79 | |> DNS.start_link() 80 | 81 | refute_receive {:disconnect, _}, 100 82 | refute_receive {:connect, _}, 100 83 | end) 84 | end 85 | 86 | test "does not connect to anything if name is not resolved" do 87 | capture_log(fn -> 88 | [ 89 | %State{ 90 | topology: :k8s_dns_example, 91 | config: [ 92 | polling_interval: 100, 93 | service: "app", 94 | application_name: "node", 95 | resolver: fn _query -> {:error, :nxdomain} end 96 | ], 97 | connect: {Nodes, :connect, [self()]}, 98 | disconnect: {Nodes, :disconnect, [self()]}, 99 | list_nodes: {Nodes, :list_nodes, [[]]} 100 | } 101 | ] 102 | |> DNS.start_link() 103 | 104 | refute_receive {:connect, _}, 100 105 | end) 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /test/kubernetes_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Strategy.KubernetesTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case, async: false 5 | use ExVCR.Mock, adapter: ExVCR.Adapter.Httpc 6 | 7 | alias Cluster.Strategy.Kubernetes 8 | alias Cluster.Nodes 9 | 10 | require Cluster.Nodes 11 | 12 | import ExUnit.CaptureLog 13 | 14 | setup do 15 | cassettes_path = Path.join([__DIR__, "fixtures", "vcr_cassettes"]) 16 | ExVCR.Config.cassette_library_dir(cassettes_path, cassettes_path) 17 | ExVCR.Config.filter_request_headers("authorization") 18 | ExVCR.Config.filter_url_params(true) 19 | 20 | ExVCR.Config.filter_sensitive_data( 21 | "\"selfLink\":\"[^\"]+\"", 22 | "\"selfLink\":\"SELFLINK_PLACEHOLDER\"" 23 | ) 24 | 25 | ExVCR.Config.filter_sensitive_data("\"chart\":\"[^\"]+\"", "\"chart\":\"CHART_PLACEHOLDER\"") 26 | 27 | :ok 28 | end 29 | 30 | describe "start_link/1" do 31 | test "calls right functions" do 32 | use_cassette "kubernetes", custom: true do 33 | capture_log(fn -> 34 | start_supervised!({Kubernetes, 35 | [ 36 | %Cluster.Strategy.State{ 37 | topology: :name, 38 | config: [ 39 | kubernetes_node_basename: "test_basename", 40 | kubernetes_selector: "app=test_selector", 41 | # If you want to run the test freshly, you'll need to create a DNS Entry 42 | kubernetes_master: "cluster.localhost.", 43 | kubernetes_service_account_path: 44 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 45 | ], 46 | connect: {Nodes, :connect, [self()]}, 47 | disconnect: {Nodes, :disconnect, [self()]}, 48 | list_nodes: {Nodes, :list_nodes, [[]]} 49 | } 50 | ]}) 51 | 52 | assert_receive {:connect, _}, 5_000 53 | end) 54 | end 55 | end 56 | 57 | test "connect nodes from different namespaces" do 58 | use_cassette "kubernetes", custom: true do 59 | capture_log(fn -> 60 | start_supervised!({Kubernetes, 61 | [ 62 | %Cluster.Strategy.State{ 63 | topology: :name, 64 | config: [ 65 | kubernetes_node_basename: "test_basename", 66 | kubernetes_selector: "app=test_selector", 67 | kubernetes_namespace: "airatel-service-test", 68 | # If you want to run the test freshly, you'll need to create a DNS Entry 69 | kubernetes_master: "cluster.localhost.", 70 | kubernetes_service_account_path: 71 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 72 | ], 73 | connect: {Nodes, :connect, [self()]}, 74 | disconnect: {Nodes, :disconnect, [self()]}, 75 | list_nodes: {Nodes, :list_nodes, [[]]} 76 | } 77 | ]}) 78 | 79 | assert_receive {:connect, _}, 5_000 80 | end) 81 | end 82 | end 83 | 84 | test "works with cached resources" do 85 | use_cassette "kubernetes", custom: true do 86 | capture_log(fn -> 87 | start_supervised!({Kubernetes, 88 | [ 89 | %Cluster.Strategy.State{ 90 | topology: :name, 91 | config: [ 92 | kubernetes_node_basename: "test_basename", 93 | kubernetes_selector: "app=test_selector", 94 | kubernetes_use_cached_resources: true, 95 | # If you want to run the test freshly, you'll need to create a DNS Entry 96 | kubernetes_master: "cluster.localhost.", 97 | kubernetes_service_account_path: 98 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 99 | ], 100 | connect: {Nodes, :connect, [self()]}, 101 | disconnect: {Nodes, :disconnect, [self()]}, 102 | list_nodes: {Nodes, :list_nodes, [[]]} 103 | } 104 | ]}) 105 | 106 | assert_receive {:connect, _}, 5_000 107 | end) 108 | end 109 | end 110 | 111 | test "works with no cached resources" do 112 | use_cassette "kubernetes", custom: true do 113 | capture_log(fn -> 114 | start_supervised!({Kubernetes, 115 | [ 116 | %Cluster.Strategy.State{ 117 | topology: :name, 118 | config: [ 119 | kubernetes_node_basename: "test_basename", 120 | kubernetes_selector: "app=test_selector", 121 | kubernetes_use_cached_resources: false, 122 | # If you want to run the test freshly, you'll need to create a DNS Entry 123 | kubernetes_master: "cluster.localhost.", 124 | kubernetes_service_account_path: 125 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 126 | ], 127 | connect: {Nodes, :connect, [self()]}, 128 | disconnect: {Nodes, :disconnect, [self()]}, 129 | list_nodes: {Nodes, :list_nodes, [[]]} 130 | } 131 | ]}) 132 | 133 | assert_receive {:connect, _}, 5_000 134 | end) 135 | end 136 | end 137 | 138 | test "works with dns and cluster_name" do 139 | use_cassette "kubernetes", custom: true do 140 | capture_log(fn -> 141 | start_supervised!({Kubernetes, 142 | [ 143 | %Cluster.Strategy.State{ 144 | topology: :name, 145 | config: [ 146 | kubernetes_node_basename: "test_basename", 147 | kubernetes_cluster_name: "my_cluster", 148 | mode: :dns, 149 | kubernetes_selector: "app=test_selector", 150 | # If you want to run the test freshly, you'll need to create a DNS Entry 151 | kubernetes_master: "cluster.localhost.", 152 | kubernetes_service_account_path: 153 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 154 | ], 155 | connect: {Nodes, :connect, [self()]}, 156 | disconnect: {Nodes, :disconnect, [self()]}, 157 | list_nodes: {Nodes, :list_nodes, [[]]} 158 | } 159 | ]}) 160 | 161 | assert_receive {:connect, 162 | :"test_basename@10-48-33-136.airatel-service-localization.pod.my_cluster.local"}, 163 | 5_000 164 | end) 165 | end 166 | end 167 | 168 | test "works with hostname and cluster_name" do 169 | use_cassette "kubernetes", custom: true do 170 | capture_log(fn -> 171 | start_supervised!({Kubernetes, 172 | [ 173 | %Cluster.Strategy.State{ 174 | topology: :name, 175 | config: [ 176 | kubernetes_node_basename: "test_basename", 177 | kubernetes_cluster_name: "my_cluster", 178 | mode: :hostname, 179 | kubernetes_selector: "app=test_selector", 180 | kubernetes_service_name: "my_service", 181 | # If you want to run the test freshly, you'll need to create a DNS Entry 182 | kubernetes_master: "cluster.localhost.", 183 | kubernetes_service_account_path: 184 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 185 | ], 186 | connect: {Nodes, :connect, [self()]}, 187 | disconnect: {Nodes, :disconnect, [self()]}, 188 | list_nodes: {Nodes, :list_nodes, [[]]} 189 | } 190 | ]}) 191 | 192 | assert_receive {:connect, 193 | :"test_basename@my-hostname-0.my_service.airatel-service-localization.svc.my_cluster.local"}, 194 | 5_000 195 | end) 196 | end 197 | end 198 | 199 | test "works with hostname and cluster_name in ip_lookup_mode: :pods" do 200 | use_cassette "kubernetes_pods", custom: true do 201 | capture_log(fn -> 202 | start_supervised!({Kubernetes, 203 | [ 204 | %Cluster.Strategy.State{ 205 | topology: :name, 206 | config: [ 207 | kubernetes_ip_lookup_mode: :pods, 208 | kubernetes_node_basename: "test_basename", 209 | kubernetes_cluster_name: "my_cluster", 210 | mode: :hostname, 211 | kubernetes_selector: "app=test_selector", 212 | kubernetes_service_name: "my_service", 213 | # If you want to run the test freshly, you'll need to create a DNS Entry 214 | kubernetes_master: "cluster.localhost.", 215 | kubernetes_service_account_path: 216 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 217 | ], 218 | connect: {Nodes, :connect, [self()]}, 219 | disconnect: {Nodes, :disconnect, [self()]}, 220 | list_nodes: {Nodes, :list_nodes, [[]]} 221 | } 222 | ]}) 223 | 224 | assert_receive {:connect, 225 | :"test_basename@my-hostname-0.my_service.airatel-service-localization.svc.my_cluster.local"}, 226 | 5_000 227 | end) 228 | end 229 | end 230 | 231 | test "works with pods" do 232 | use_cassette "kubernetes_pods", custom: true do 233 | capture_log(fn -> 234 | start_supervised!({Kubernetes, 235 | [ 236 | %Cluster.Strategy.State{ 237 | topology: :name, 238 | config: [ 239 | kubernetes_node_basename: "test_basename", 240 | kubernetes_selector: "app=test_selector", 241 | # If you want to run the test freshly, you'll need to create a DNS Entry 242 | kubernetes_master: "cluster.localhost.", 243 | kubernetes_ip_lookup_mode: :pods, 244 | kubernetes_service_account_path: 245 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 246 | ], 247 | connect: {Nodes, :connect, [self()]}, 248 | disconnect: {Nodes, :disconnect, [self()]}, 249 | list_nodes: {Nodes, :list_nodes, [[]]} 250 | } 251 | ]}) 252 | 253 | assert_receive {:connect, :"test_basename@10.48.33.136"}, 5_000 254 | end) 255 | end 256 | end 257 | 258 | test "works with pods and cached resources" do 259 | use_cassette "kubernetes_pods", custom: true do 260 | capture_log(fn -> 261 | start_supervised!({Kubernetes, 262 | [ 263 | %Cluster.Strategy.State{ 264 | topology: :name, 265 | config: [ 266 | kubernetes_node_basename: "test_basename", 267 | kubernetes_selector: "app=test_selector", 268 | # If you want to run the test freshly, you'll need to create a DNS Entry 269 | kubernetes_master: "cluster.localhost.", 270 | kubernetes_ip_lookup_mode: :pods, 271 | kubernetes_use_cached_resources: true, 272 | kubernetes_service_account_path: 273 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 274 | ], 275 | connect: {Nodes, :connect, [self()]}, 276 | disconnect: {Nodes, :disconnect, [self()]}, 277 | list_nodes: {Nodes, :list_nodes, [[]]} 278 | } 279 | ]}) 280 | 281 | assert_receive {:connect, :"test_basename@10.48.33.136"}, 5_000 282 | end) 283 | end 284 | end 285 | 286 | test "works with pods and dns" do 287 | use_cassette "kubernetes_pods", custom: true do 288 | capture_log(fn -> 289 | start_supervised!({Kubernetes, 290 | [ 291 | %Cluster.Strategy.State{ 292 | topology: :name, 293 | config: [ 294 | kubernetes_node_basename: "test_basename", 295 | kubernetes_selector: "app=test_selector", 296 | # If you want to run the test freshly, you'll need to create a DNS Entry 297 | kubernetes_master: "cluster.localhost.", 298 | kubernetes_ip_lookup_mode: :pods, 299 | mode: :dns, 300 | kubernetes_service_account_path: 301 | Path.join([__DIR__, "fixtures", "kubernetes", "service_account"]) 302 | ], 303 | connect: {Nodes, :connect, [self()]}, 304 | disconnect: {Nodes, :disconnect, [self()]}, 305 | list_nodes: {Nodes, :list_nodes, [[]]} 306 | } 307 | ]}) 308 | 309 | assert_receive {:connect, 310 | :"test_basename@10-48-33-136.airatel-service-localization.pod.cluster.local"}, 311 | 5_000 312 | end) 313 | end 314 | end 315 | end 316 | end 317 | -------------------------------------------------------------------------------- /test/logger_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.LoggerTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | import ExUnit.CaptureLog 7 | 8 | alias Cluster.Logger 9 | Application.put_env(:libcluster, :debug, true) 10 | 11 | for level <- [:debug, :info, :warn, :error] do 12 | describe "#{level}/2" do 13 | test "logs correctly" do 14 | output = 15 | capture_log(fn -> 16 | apply(Logger, unquote(level), [ 17 | __MODULE__, 18 | "some message" 19 | ]) 20 | end) 21 | 22 | assert output =~ ~r/\[#{unquote(level)}(ing)?\]/ 23 | assert output =~ "[libcluster:Elixir.Cluster.LoggerTest] some message" 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /test/strategy_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Cluster.StrategyTest do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | alias Cluster.Strategy 7 | alias Cluster.Nodes 8 | alias Cluster.Telemetry 9 | 10 | require Cluster.Nodes 11 | 12 | import ExUnit.CaptureLog 13 | 14 | describe "connect_nodes/4" do 15 | test "does not connect existing nodes again" do 16 | connect = {Nodes, :connect, [self()]} 17 | list_nodes = {Nodes, :list_nodes, [[Node.self()]]} 18 | 19 | assert :ok = Strategy.connect_nodes(__MODULE__, connect, list_nodes, [Node.self()]) 20 | 21 | refute_receive {:connect, _} 22 | end 23 | 24 | test "does connect new nodes" do 25 | connect = {Nodes, :connect, [self()]} 26 | list_nodes = {Nodes, :list_nodes, [[Node.self()]]} 27 | 28 | Telemetry.setup_telemetry([:libcluster, :connect_node, :ok]) 29 | 30 | assert capture_log(fn -> 31 | assert :ok = 32 | Strategy.connect_nodes(__MODULE__, connect, list_nodes, [:"foo@some.host"]) 33 | end) =~ "connected to :\"foo@some.host\"" 34 | 35 | assert_receive {:connect, :"foo@some.host"} 36 | 37 | assert_receive {:telemetry_event, 38 | {[:libcluster, :connect_node, :ok], %{duration: _}, 39 | %{node: :"foo@some.host", topology: _}, _}} 40 | end 41 | 42 | test "handles connect failure" do 43 | connect = {Nodes, :connect, [self(), false]} 44 | list_nodes = {Nodes, :list_nodes, [[Node.self()]]} 45 | 46 | Telemetry.setup_telemetry([:libcluster, :connect_node, :error]) 47 | 48 | assert capture_log(fn -> 49 | assert {:error, ["foo@some.host": false]} = 50 | Strategy.connect_nodes(__MODULE__, connect, list_nodes, [:"foo@some.host"]) 51 | end) =~ "unable to connect to :\"foo@some.host\"" 52 | 53 | assert_receive {:connect, :"foo@some.host"} 54 | 55 | assert_receive {:telemetry_event, 56 | {[:libcluster, :connect_node, :error], %{}, 57 | %{node: :"foo@some.host", topology: _, reason: :unreachable}, _}} 58 | end 59 | 60 | test "handles connect ignore" do 61 | connect = {Nodes, :connect, [self(), :ignored]} 62 | list_nodes = {Nodes, :list_nodes, [[Node.self()]]} 63 | 64 | Telemetry.setup_telemetry([:libcluster, :connect_node, :error]) 65 | 66 | assert capture_log(fn -> 67 | assert {:error, ["foo@some.host": :ignored]} = 68 | Strategy.connect_nodes(__MODULE__, connect, list_nodes, [:"foo@some.host"]) 69 | end) =~ "unable to connect to :\"foo@some.host\"" 70 | 71 | assert_receive {:connect, :"foo@some.host"} 72 | 73 | assert_receive {:telemetry_event, 74 | {[:libcluster, :connect_node, :error], %{}, 75 | %{node: :"foo@some.host", topology: _, reason: :not_part_of_network}, _}} 76 | end 77 | end 78 | 79 | describe "disconnect_nodes/4" do 80 | test "does not disconnect missing noded" do 81 | disconnect = {Nodes, :disconnect, [self()]} 82 | list_nodes = {Nodes, :list_nodes, [[]]} 83 | 84 | assert :ok = Strategy.disconnect_nodes(__MODULE__, disconnect, list_nodes, [Node.self()]) 85 | 86 | refute_receive {:disconnect, _} 87 | end 88 | 89 | test "does disconnect new nodes" do 90 | disconnect = {Nodes, :disconnect, [self()]} 91 | list_nodes = {Nodes, :list_nodes, [[:"foo@some.host"]]} 92 | 93 | Telemetry.setup_telemetry([:libcluster, :disconnect_node, :ok]) 94 | 95 | assert capture_log(fn -> 96 | assert :ok = 97 | Strategy.disconnect_nodes(__MODULE__, disconnect, list_nodes, [ 98 | :"foo@some.host" 99 | ]) 100 | end) =~ "disconnected from :\"foo@some.host\"" 101 | 102 | assert_receive {:disconnect, :"foo@some.host"} 103 | 104 | assert_receive {:telemetry_event, 105 | {[:libcluster, :disconnect_node, :ok], %{duration: _}, 106 | %{node: :"foo@some.host", topology: _}, _}} 107 | end 108 | 109 | test "handles disconnect error" do 110 | disconnect = {Nodes, :disconnect, [self(), :failed]} 111 | list_nodes = {Nodes, :list_nodes, [[:"foo@some.host"]]} 112 | 113 | Telemetry.setup_telemetry([:libcluster, :disconnect_node, :error]) 114 | 115 | assert capture_log(fn -> 116 | assert {:error, ["foo@some.host": :failed]} = 117 | Strategy.disconnect_nodes(__MODULE__, disconnect, list_nodes, [ 118 | :"foo@some.host" 119 | ]) 120 | end) =~ 121 | "disconnect from :\"foo@some.host\" failed with: :failed" 122 | 123 | assert_receive {:disconnect, :"foo@some.host"} 124 | 125 | assert_receive {:telemetry_event, 126 | {[:libcluster, :disconnect_node, :error], %{}, 127 | %{node: :"foo@some.host", topology: _, reason: ":failed"}, _}} 128 | end 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /test/support/exvcr.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVCR.Adapter.Httpc.Converter do 2 | @moduledoc """ 3 | Provides helpers to mock :httpc methods. 4 | """ 5 | 6 | use ExVCR.Converter 7 | 8 | defp string_to_response(string) do 9 | response = Enum.map(string, fn {x, y} -> {String.to_atom(x), y} end) 10 | response = struct(ExVCR.Response, response) 11 | 12 | response = 13 | if response.status_code do 14 | status_code = 15 | response.status_code 16 | |> Enum.map(&convert_string_to_charlist/1) 17 | |> List.to_tuple() 18 | 19 | %{response | status_code: status_code} 20 | else 21 | response 22 | end 23 | 24 | response = 25 | if response.type == "error" do 26 | %{response | body: {String.to_atom(response.body), []}} 27 | else 28 | response 29 | end 30 | 31 | response = 32 | if is_map(response.headers) do 33 | headers = 34 | response.headers 35 | |> Map.to_list() 36 | |> Enum.map(fn {k, v} -> {to_charlist(k), to_charlist(v)} end) 37 | 38 | %{response | headers: headers} 39 | else 40 | response 41 | end 42 | 43 | response 44 | end 45 | 46 | defp convert_string_to_charlist(elem) do 47 | if is_binary(elem) do 48 | to_charlist(elem) 49 | else 50 | elem 51 | end 52 | end 53 | 54 | defp request_to_string([url]) do 55 | request_to_string([:get, {url, [], [], []}, [], []]) 56 | end 57 | 58 | defp request_to_string([method, {url, headers}, http_options, options]) do 59 | request_to_string([method, {url, headers, [], []}, http_options, options]) 60 | end 61 | 62 | # TODO: need to handle content_type 63 | defp request_to_string([method, {url, headers, _content_type, body}, http_options, options]) do 64 | %ExVCR.Request{ 65 | url: parse_url(url), 66 | headers: parse_headers(headers), 67 | method: to_string(method), 68 | body: parse_request_body(body), 69 | options: [ 70 | httpc_options: parse_keyword_list(options), 71 | http_options: parse_keyword_list(http_options) 72 | ] 73 | } 74 | end 75 | 76 | def parse_keyword_list(params) do 77 | Enum.map(params, fn {k, v} -> {k, inspect(v)} end) 78 | end 79 | 80 | defp response_to_string({:ok, {{http_version, status_code, reason_phrase}, headers, body}}) do 81 | %ExVCR.Response{ 82 | type: "ok", 83 | status_code: [to_string(http_version), status_code, to_string(reason_phrase)], 84 | headers: parse_headers(headers), 85 | body: to_string(body) 86 | } 87 | end 88 | 89 | defp response_to_string({:error, {reason, _detail}}) do 90 | %ExVCR.Response{ 91 | type: "error", 92 | body: Atom.to_string(reason) 93 | } 94 | end 95 | end 96 | -------------------------------------------------------------------------------- /test/support/nodes.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Nodes do 2 | @moduledoc false 3 | 4 | def connect(caller, result \\ true, node) do 5 | send(caller, {:connect, node}) 6 | result 7 | end 8 | 9 | def disconnect(caller, result \\ true, node) do 10 | send(caller, {:disconnect, node}) 11 | result 12 | end 13 | 14 | def list_nodes(nodes) do 15 | nodes 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /test/support/telemetry.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.Telemetry do 2 | @moduledoc false 3 | 4 | use ExUnit.Case 5 | 6 | def setup_telemetry(event) do 7 | telemetry_handle_id = "test-telemetry-handler-#{inspect(self())}" 8 | 9 | :ok = 10 | :telemetry.attach_many( 11 | telemetry_handle_id, 12 | [ 13 | event 14 | ], 15 | &send_to_pid/4, 16 | nil 17 | ) 18 | 19 | :ok = on_exit(fn -> :telemetry.detach(telemetry_handle_id) end) 20 | end 21 | 22 | defp send_to_pid(event, measurements, metadata, config) do 23 | pid = config[:pid] || self() 24 | 25 | send(pid, {:telemetry_event, {event, measurements, metadata, config}}) 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------