├── .formatter.exs ├── .gitignore ├── README.md ├── lib ├── libcluster │ ├── ecs_cluster_info.ex │ ├── ecs_strategy.ex │ └── epmd.ex └── libcluster_ecs.ex ├── mix.exs └── test ├── libcluster_ecs_test.exs └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | libcluster_ecs-*.tar 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ClusterEcs 2 | 3 | Use this library to set up clustering within AWS ECS. 4 | 5 | This library, unlike others, does not rely on configuring your nodes with `awsvpc` networking mode. Instead it queries ECS's port mappings to accomplish the goal. 6 | 7 | ## Getting started 8 | 9 | Create a container port mapping (eg, container port 7777 to host port 0, this will assign a random port). 10 | 11 | Configure the libcluster topology: 12 | 13 | ``` 14 | config :libcluster, 15 | topologies: [ 16 | mycluster: [ 17 | strategy: Cluster.EcsStrategy, 18 | config: [ 19 | cluster_name: "mycluster", 20 | service_name: "myservice", 21 | app_prefix: "myapp_prefix", 22 | region: "eu-west-1", 23 | container_port: 7777 24 | ] 25 | ] 26 | ] 27 | ``` 28 | 29 | Add `Cluster.EcsClusterInfo` to your supervision tree before the cluster supervisor and provide it with your config: 30 | 31 | ``` 32 | children = [ 33 | ... 34 | {Cluster.EcsClusterInfo, Application.get_env(:libcluster, :topologies)[:mycluster][:config]}, 35 | {Cluster.Supervisor, [Application.get_env(:libcluster, :topologies), [name: MyApp.ClusterSupervisor]]} 36 | ... 37 | ] 38 | ``` 39 | 40 | Configure libcluster EPMD by setting `DISTRIBUTION_PORT` in `rel/env.sh.eex`. This needs to be an env var because this EPMD module is used during startup and application configuration is not available yet: 41 | 42 | ``` 43 | export DISTRIBUTION_PORT=7777 44 | ``` 45 | 46 | Add the following line to `rel/vm.args.eex`: 47 | 48 | ``` 49 | -epmd_module Elixir.Cluster.EPMD 50 | ``` 51 | 52 | Configure (if you haven't already) `ex_aws`. The IAM user that you configure needs the following permissions: 53 | 54 | ``` 55 | ecs:ListClusters" 56 | ecs:ListServices 57 | ecs:ListTasks 58 | ecs:DescribeTasks 59 | ecs:DescribeContainerInstances 60 | ec2:DescribeInstances 61 | ``` 62 | 63 | ## Installation 64 | 65 | If [available in Hex](https://hex.pm/docs/publish), the package can be installed 66 | by adding `libcluster_ecs` to your list of dependencies in `mix.exs`: 67 | 68 | ```elixir 69 | def deps do 70 | [ 71 | {:libcluster_ecs, "~> 0.1.0"} 72 | ] 73 | end 74 | ``` 75 | 76 | Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) 77 | and published on [HexDocs](https://hexdocs.pm). Once published, the docs can 78 | be found at . 79 | 80 | -------------------------------------------------------------------------------- /lib/libcluster/ecs_cluster_info.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.EcsClusterInfo do 2 | @moduledoc """ 3 | The goal of this module is to get us the following information: 4 | 5 | %{node_name => {{127,0,0,1} = ip, port}} 6 | 7 | for all the nodes in our ECS cluster. 8 | """ 9 | 10 | use GenServer 11 | require Logger 12 | 13 | @refresh_timeout 10_000 14 | 15 | def start_link(config) do 16 | GenServer.start_link(__MODULE__, config, name: __MODULE__) 17 | end 18 | 19 | @spec get_nodes() :: 20 | %{(node_name :: String.t()) => {ip :: tuple(), port :: integer()}} | no_return() 21 | def get_nodes() do 22 | GenServer.call(__MODULE__, :get_nodes) 23 | end 24 | 25 | @impl true 26 | def init(config) do 27 | set_refresh() 28 | 29 | state = set_config(config, %{}) 30 | 31 | {:ok, nodes} = my_get_nodes(state) 32 | 33 | {:ok, state |> Map.put(:nodes, nodes)} 34 | end 35 | 36 | @impl true 37 | def handle_call(:get_nodes, _from, state) do 38 | {:reply, Map.get(state, :nodes, %{}), state} 39 | end 40 | 41 | @impl true 42 | def handle_info(:refresh, state) do 43 | {:ok, nodes} = my_get_nodes(state) 44 | 45 | set_refresh() 46 | {:noreply, state |> Map.put(:nodes, nodes)} 47 | end 48 | 49 | defp set_refresh() do 50 | Process.send_after(self(), :refresh, @refresh_timeout) 51 | end 52 | 53 | defp set_config(config, state) do 54 | region = Keyword.fetch!(config, :region) 55 | cluster_name = Keyword.fetch!(config, :cluster_name) 56 | service_name = Keyword.fetch!(config, :service_name) |> List.wrap() 57 | app_prefix = Keyword.fetch!(config, :app_prefix) 58 | container_port = Keyword.fetch!(config, :container_port) 59 | 60 | state 61 | |> Map.put(:region, region) 62 | |> Map.put(:cluster_name, cluster_name) 63 | |> Map.put(:service_name, service_name) 64 | |> Map.put(:app_prefix, app_prefix) 65 | |> Map.put(:container_port, container_port) 66 | end 67 | 68 | defp my_get_nodes(state) do 69 | region = state.region 70 | cluster_name = state.cluster_name 71 | service_name = state.service_name 72 | app_prefix = state.app_prefix 73 | container_port = state.container_port 74 | 75 | with {:ok, list_service_body} <- list_services(cluster_name, region), 76 | {:ok, service_arns} <- extract_service_arns(list_service_body), 77 | {:ok, task_arns} <- 78 | get_tasks_for_services(cluster_name, region, service_arns, service_name), 79 | {:ok, desc_task_body} <- describe_tasks(cluster_name, task_arns, region), 80 | {:ok, arns_ports} <- extract_arns_ports(desc_task_body, container_port), 81 | {:ok, ips_ports} <- extract_ips_ports(cluster_name, arns_ports, region) do 82 | {:ok, 83 | Map.new(ips_ports, fn {runtime_id, ip, port} -> 84 | {runtime_id_to_nodename(runtime_id, app_prefix), {ip, port}} 85 | end)} 86 | else 87 | err -> 88 | Logger.warn(fn -> "Error #{inspect(err)} while determining nodes in cluster via ECS" end) 89 | 90 | {:error, []} 91 | end 92 | end 93 | 94 | defp get_tasks_for_services(cluster_name, region, service_arns, service_names) do 95 | Enum.reduce(service_names, {:ok, []}, fn service_name, acc -> 96 | case acc do 97 | {:ok, acc_tasks} -> 98 | with( 99 | {:ok, service_arn} <- find_service_arn(service_arns, service_name), 100 | {:ok, list_task_body} <- list_tasks(cluster_name, service_arn, region), 101 | {:ok, task_arns} <- extract_task_arns(list_task_body) 102 | ) do 103 | {:ok, acc_tasks ++ task_arns} 104 | end 105 | 106 | other -> 107 | other 108 | end 109 | end) 110 | end 111 | 112 | defp log_aws(response, request_type) do 113 | Logger.debug("ExAws #{request_type} response: #{inspect(response)}") 114 | response 115 | end 116 | 117 | defp list_services(cluster_name, region) do 118 | params = %{ 119 | "cluster" => cluster_name 120 | } 121 | 122 | query("ListServices", params) 123 | |> ExAws.request(region: region) 124 | |> log_aws("ListServices") 125 | |> list_services(cluster_name, region, []) 126 | end 127 | 128 | defp list_services( 129 | {:ok, %{"nextToken" => next_token, "serviceArns" => service_arns}}, 130 | cluster_name, 131 | region, 132 | accum 133 | ) 134 | when not is_nil(next_token) do 135 | params = %{ 136 | "cluster" => cluster_name, 137 | "nextToken" => next_token 138 | } 139 | 140 | query("ListServices", params) 141 | |> ExAws.request(region: region) 142 | |> log_aws("ListServices") 143 | |> list_services(cluster_name, region, accum ++ service_arns) 144 | end 145 | 146 | defp list_services({:ok, %{"serviceArns" => service_arns}}, _cluster_name, _region, accum) do 147 | {:ok, %{"serviceArns" => accum ++ service_arns}} 148 | end 149 | 150 | defp list_services({:error, message}, _cluster_name, _region, _accum) do 151 | {:error, message} 152 | end 153 | 154 | defp list_tasks(cluster_name, service_arn, region) do 155 | params = %{ 156 | "cluster" => cluster_name, 157 | "serviceName" => service_arn, 158 | "desiredStatus" => "RUNNING" 159 | } 160 | 161 | query("ListTasks", params) 162 | |> ExAws.request(region: region) 163 | |> log_aws("ListTasks") 164 | end 165 | 166 | defp describe_tasks(cluster_name, task_arns, region) do 167 | params = %{ 168 | "cluster" => cluster_name, 169 | "tasks" => task_arns 170 | } 171 | 172 | query("DescribeTasks", params) 173 | |> ExAws.request(region: region) 174 | |> log_aws("DescribeTasks") 175 | end 176 | 177 | defp describe_container_instances(cluster_name, container_arns, region) do 178 | params = %{ 179 | "cluster" => cluster_name, 180 | "containerInstances" => container_arns 181 | } 182 | 183 | query("DescribeContainerInstances", params) 184 | |> ExAws.request(region: region) 185 | |> log_aws("DescribeContainerInstances") 186 | end 187 | 188 | defp describe_ec2_instances(instance_ids, region) do 189 | ExAws.EC2.describe_instances(instance_ids: instance_ids) 190 | |> ExAws.request(region: region) 191 | |> log_aws("EC2:DescribeInstances") 192 | end 193 | 194 | @namespace "AmazonEC2ContainerServiceV20141113" 195 | defp query(action, params) do 196 | ExAws.Operation.JSON.new( 197 | :ecs, 198 | %{ 199 | data: params, 200 | headers: [ 201 | {"accept-encoding", "identity"}, 202 | {"x-amz-target", "#{@namespace}.#{action}"}, 203 | {"content-type", "application/x-amz-json-1.1"} 204 | ] 205 | } 206 | ) 207 | end 208 | 209 | defp extract_task_arns(%{"taskArns" => arns}), do: {:ok, arns} 210 | defp extract_task_arns(_), do: {:error, "unknown task arns response"} 211 | 212 | defp extract_service_arns(%{"serviceArns" => arns}), do: {:ok, arns} 213 | defp extract_service_arns(_), do: {:error, "unknown service arns response"} 214 | 215 | defp find_service_arn(service_arns, service_name) when is_list(service_arns) do 216 | with {:ok, regex} <- Regex.compile(service_name) do 217 | service_arns 218 | |> Enum.find(&Regex.match?(regex, &1)) 219 | |> case do 220 | nil -> 221 | Logger.error("no service matching #{service_name} found") 222 | {:error, "no service matching #{service_name} found"} 223 | 224 | arn -> 225 | {:ok, arn} 226 | end 227 | end 228 | end 229 | 230 | defp find_service_arn(_, _), do: {:error, "no service arns returned"} 231 | 232 | defp extract_arns_ports(%{"tasks" => tasks}, container_port) do 233 | arns_ports = 234 | tasks 235 | |> Enum.flat_map(fn t -> 236 | container_instance_arn = Map.get(t, "containerInstanceArn") 237 | 238 | Map.get(t, "containers") 239 | |> Enum.map(fn c -> {container_instance_arn, c} end) 240 | end) 241 | |> Enum.map(fn {container_instance_arn, c} -> 242 | runtime_id = 243 | case Map.get(c, "runtimeId") do 244 | nil -> nil 245 | string -> String.slice(string, 0..11) 246 | end 247 | 248 | host_port = 249 | case Map.get(c, "networkBindings") do 250 | nil -> 251 | nil 252 | 253 | network_bindings -> 254 | network_bindings 255 | |> Enum.find_value(fn 256 | %{"containerPort" => ^container_port, "hostPort" => h_port} -> 257 | h_port 258 | 259 | _ -> 260 | false 261 | end) 262 | end 263 | 264 | if container_instance_arn && runtime_id && host_port do 265 | {container_instance_arn, runtime_id, host_port} 266 | else 267 | nil 268 | end 269 | end) 270 | |> Enum.filter(& &1) 271 | 272 | {:ok, arns_ports} 273 | end 274 | 275 | defp extract_arns_ports(_, _), do: {:error, "can't extract ips"} 276 | 277 | defp extract_ips_ports(cluster_name, arns_ports, region) do 278 | import SweetXml 279 | 280 | container_arns = 281 | Enum.map(arns_ports, fn {container_arn, _runtime_id, _host_port} -> container_arn end) 282 | |> Enum.uniq() 283 | 284 | {:ok, ecs_instances} = 285 | case container_arns do 286 | [] -> 287 | {:ok, []} 288 | 289 | _ -> 290 | describe_container_instances(cluster_name, container_arns, region) 291 | end 292 | 293 | container_arn_to_ip = 294 | Map.get(ecs_instances, "containerInstances") 295 | |> Enum.map(fn i -> 296 | instance_id = Map.get(i, "ec2InstanceId") 297 | {:ok, %{body: body}} = describe_ec2_instances([instance_id], region) 298 | 299 | {:ok, ip_address} = 300 | xpath(body, ~x"//privateIpAddress/text()") 301 | |> :inet.parse_ipv4_address() 302 | 303 | {Map.get(i, "containerInstanceArn"), ip_address} 304 | end) 305 | |> Map.new() 306 | 307 | {:ok, 308 | Enum.map(arns_ports, fn {container_arn, runtime_id, host_port} -> 309 | ip_address = Map.get(container_arn_to_ip, container_arn) 310 | {runtime_id, ip_address, host_port} 311 | end)} 312 | end 313 | 314 | defp runtime_id_to_nodename(runtime_id, app_prefix) do 315 | :"#{app_prefix}@#{runtime_id}" 316 | end 317 | end 318 | -------------------------------------------------------------------------------- /lib/libcluster/ecs_strategy.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.EcsStrategy do 2 | @moduledoc """ 3 | Clustering!! 4 | """ 5 | 6 | use GenServer 7 | use Cluster.Strategy 8 | 9 | alias Cluster.Strategy.State 10 | 11 | @default_polling_interval 5_000 12 | 13 | @impl true 14 | def start_link(args), do: GenServer.start_link(__MODULE__, args) 15 | 16 | @impl true 17 | def init([%State{meta: nil} = state]) do 18 | init([%State{state | :meta => MapSet.new()}]) 19 | end 20 | 21 | def init([%State{} = state]) do 22 | {:ok, load(state), 0} 23 | end 24 | 25 | @impl true 26 | def handle_info(:load, state) do 27 | {:noreply, load(state)} 28 | end 29 | 30 | def handle_info(_, state) do 31 | {:noreply, state} 32 | end 33 | 34 | defp load(%State{topology: topology, meta: meta} = state) do 35 | new_nodelist = 36 | Cluster.EcsClusterInfo.get_nodes() 37 | |> Map.keys() 38 | |> MapSet.new() 39 | 40 | removed = MapSet.difference(meta, new_nodelist) 41 | 42 | new_nodelist = 43 | case Cluster.Strategy.disconnect_nodes( 44 | topology, 45 | state.disconnect, 46 | state.list_nodes, 47 | MapSet.to_list(removed) 48 | ) do 49 | :ok -> 50 | new_nodelist 51 | 52 | {:error, bad_nodes} -> 53 | # Add back the nodes which should have been removed, but which couldn't be for some reason 54 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 55 | MapSet.put(acc, n) 56 | end) 57 | end 58 | 59 | new_nodelist = 60 | case Cluster.Strategy.connect_nodes( 61 | topology, 62 | state.connect, 63 | state.list_nodes, 64 | MapSet.to_list(new_nodelist) 65 | ) do 66 | :ok -> 67 | new_nodelist 68 | 69 | {:error, bad_nodes} -> 70 | # Remove the nodes which should have been added, but couldn't be for some reason 71 | Enum.reduce(bad_nodes, new_nodelist, fn {n, _}, acc -> 72 | MapSet.delete(acc, n) 73 | end) 74 | end 75 | 76 | Process.send_after( 77 | self(), 78 | :load, 79 | polling_interval(state) 80 | ) 81 | 82 | %State{state | meta: new_nodelist} 83 | end 84 | 85 | defp polling_interval(%State{config: config}) do 86 | Keyword.get(config, :polling_interval, @default_polling_interval) 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /lib/libcluster/epmd.ex: -------------------------------------------------------------------------------- 1 | defmodule Cluster.EPMD do 2 | alias Cluster.EcsClusterInfo 3 | require Logger 4 | 5 | @magic_version 5 6 | 7 | def start_link do 8 | :erl_epmd.start_link() 9 | end 10 | 11 | def register_node(name, port, family) do 12 | :erl_epmd.register_node(name, port, family) 13 | end 14 | 15 | def listen_port_please(_name, _hostname) do 16 | container_port = System.get_env("DISTRIBUTION_PORT") |> String.to_integer() 17 | {:ok, container_port} 18 | end 19 | 20 | @spec address_please(charlist(), charlist(), atom()) :: 21 | {:ok, :inet.ip_address(), integer(), integer()} | {:error, term()} 22 | def address_please(name, hostname, family) do 23 | nodename = :"#{name}@#{hostname}" 24 | 25 | case EcsClusterInfo.get_nodes() do 26 | %{^nodename => {ip, port}} -> {:ok, ip, port, @magic_version} 27 | _ -> :erl_epmd.address_please(name, hostname, family) 28 | end 29 | end 30 | 31 | def names(hostname) do 32 | :erl_epmd.names(hostname) 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/libcluster_ecs.ex: -------------------------------------------------------------------------------- 1 | defmodule ClusterEcs do 2 | end 3 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule ClusterEcs.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :libcluster_ecs, 7 | version: "0.2.0", 8 | elixir: "~> 1.13", 9 | start_permanent: Mix.env() == :prod, 10 | name: "Libcluster ECS", 11 | package: package(), 12 | source_url: "https://github.com/wuunder/libcluster_ecs", 13 | deps: deps() 14 | ] 15 | end 16 | 17 | def application do 18 | [ 19 | extra_applications: [:logger] 20 | ] 21 | end 22 | 23 | defp package do 24 | [ 25 | description: "libcluster + AWS Elastic Container Service (ECS)", 26 | licenses: ["Apache-2.0"], 27 | links: %{ 28 | "GitHub" => "https://github.com/wuunder/libcluster_ecs" 29 | } 30 | ] 31 | end 32 | 33 | defp deps do 34 | [ 35 | {:ex_doc, "~> 0.28", only: :dev, runtime: false}, 36 | {:ex_aws, "~> 2.1"}, 37 | {:ex_aws_ec2, "~> 2.0"}, 38 | {:sweet_xml, "~> 0.7.0"}, 39 | {:libcluster, "~> 3.3"} 40 | ] 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/libcluster_ecs_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LibclusterEcsTest do 2 | use ExUnit.Case 3 | doctest LibclusterEcs 4 | 5 | test "greets the world" do 6 | assert LibclusterEcs.hello() == :world 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------