├── shortener ├── config │ ├── dev.exs │ ├── test.exs │ └── config.exs ├── test │ ├── test_helper.exs │ ├── support │ │ └── test_utils.ex │ ├── 03a_g_counter_test.exs │ ├── 03b_storing_aggregates.exs │ ├── 01_distributing_links_test.exs │ ├── 02_cluster_management_test.exs │ └── 03c_aggregates_test.exs ├── .formatter.exs ├── .gitignore ├── lib │ └── shortener │ │ ├── application.ex │ │ ├── storage.ex │ │ ├── g_counter.ex │ │ ├── link_manager │ │ └── cache.ex │ │ ├── link_manager.ex │ │ ├── cluster.ex │ │ ├── router.ex │ │ └── aggregates.ex ├── mix.exs ├── README.md └── mix.lock ├── ping_pong ├── test │ ├── test_helper.exs │ └── ping_pong_test.exs ├── .formatter.exs ├── lib │ └── ping_pong │ │ ├── application.ex │ │ ├── producer.ex │ │ └── consumer.ex ├── .gitignore ├── mix.lock ├── mix.exs ├── config │ └── config.exs └── README.md ├── papers ├── sagas.pdf ├── Dynamo.pdf ├── spanner.pdf ├── Map Reduce.pdf ├── fallacies.pdf ├── holygrail.pdf ├── hybrid logical clocks.pdf ├── paxos made live (chubby).pdf ├── consistency without concurrency control.pdf ├── Harvest, Yield, and Scalable Tolerant Systems.pdf └── Epidemic Algorithms for Replicated Database Maintenance.pdf ├── slides ├── dist_sys_01_training.key ├── dist_sys_01_training.pdf ├── dist_sys_02_training.key ├── dist_sys_02_training.pdf ├── dist_sys_03_training.key ├── dist_sys_03_training.pdf ├── dist_sys_04_training.key ├── dist_sys_04_training.pdf ├── dont-use-dist_sys_02_training .key └── dont-use-dist_sys_02_training .pdf └── README.md /shortener/config/dev.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | -------------------------------------------------------------------------------- /shortener/test/test_helper.exs: -------------------------------------------------------------------------------- 1 | LocalCluster.start() 2 | ExUnit.start() 3 | -------------------------------------------------------------------------------- /ping_pong/test/test_helper.exs: -------------------------------------------------------------------------------- 1 | LocalCluster.start() 2 | ExUnit.start() 3 | 4 | -------------------------------------------------------------------------------- /papers/sagas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/sagas.pdf -------------------------------------------------------------------------------- /papers/Dynamo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/Dynamo.pdf -------------------------------------------------------------------------------- /papers/spanner.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/spanner.pdf -------------------------------------------------------------------------------- /papers/Map Reduce.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/Map Reduce.pdf -------------------------------------------------------------------------------- /papers/fallacies.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/fallacies.pdf -------------------------------------------------------------------------------- /papers/holygrail.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/holygrail.pdf -------------------------------------------------------------------------------- /slides/dist_sys_01_training.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_01_training.key -------------------------------------------------------------------------------- /slides/dist_sys_01_training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_01_training.pdf -------------------------------------------------------------------------------- /slides/dist_sys_02_training.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_02_training.key -------------------------------------------------------------------------------- /slides/dist_sys_02_training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_02_training.pdf -------------------------------------------------------------------------------- /slides/dist_sys_03_training.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_03_training.key -------------------------------------------------------------------------------- /slides/dist_sys_03_training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_03_training.pdf -------------------------------------------------------------------------------- /slides/dist_sys_04_training.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_04_training.key -------------------------------------------------------------------------------- /slides/dist_sys_04_training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dist_sys_04_training.pdf -------------------------------------------------------------------------------- /papers/hybrid logical clocks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/hybrid logical clocks.pdf -------------------------------------------------------------------------------- /papers/paxos made live (chubby).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/paxos made live (chubby).pdf -------------------------------------------------------------------------------- /ping_pong/.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /shortener/.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /shortener/config/test.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :shortener, 4 | redis_database: 1 5 | 6 | config :logger, 7 | level: :error 8 | -------------------------------------------------------------------------------- /slides/dont-use-dist_sys_02_training .key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dont-use-dist_sys_02_training .key -------------------------------------------------------------------------------- /slides/dont-use-dist_sys_02_training .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/slides/dont-use-dist_sys_02_training .pdf -------------------------------------------------------------------------------- /papers/consistency without concurrency control.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/consistency without concurrency control.pdf -------------------------------------------------------------------------------- /papers/Harvest, Yield, and Scalable Tolerant Systems.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/Harvest, Yield, and Scalable Tolerant Systems.pdf -------------------------------------------------------------------------------- /papers/Epidemic Algorithms for Replicated Database Maintenance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keathley/distsys_training/HEAD/papers/Epidemic Algorithms for Replicated Database Maintenance.pdf -------------------------------------------------------------------------------- /shortener/config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | import Config 4 | 5 | config :shortener, 6 | redis_database: 0 7 | 8 | config :logger, 9 | level: :info 10 | 11 | import_config "#{Mix.env()}.exs" 12 | -------------------------------------------------------------------------------- /ping_pong/lib/ping_pong/application.ex: -------------------------------------------------------------------------------- 1 | defmodule PingPong.Application do 2 | @moduledoc false 3 | 4 | use Application 5 | 6 | def start(_type, _args) do 7 | children = [ 8 | PingPong.Producer, 9 | PingPong.Consumer, 10 | ] 11 | 12 | opts = [ 13 | strategy: :one_for_one 14 | ] 15 | 16 | Supervisor.start_link(children, opts) 17 | end 18 | end 19 | 20 | -------------------------------------------------------------------------------- /ping_pong/.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | ping_pong-*.tar 24 | 25 | -------------------------------------------------------------------------------- /shortener/.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | shortener-*.tar 24 | 25 | -------------------------------------------------------------------------------- /shortener/test/support/test_utils.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.TestUtils do 2 | def post(url, params) do 3 | headers = [ 4 | {"Content-Type", "application/x-www-form-urlencoded"} 5 | ] 6 | HTTPoison.post!(url, URI.encode_query(params), headers) 7 | end 8 | 9 | def get(url) do 10 | HTTPoison.get!(url) 11 | end 12 | 13 | def eventually(f, retries \\ 0) do 14 | f.() 15 | rescue 16 | err -> 17 | if retries >= 10 do 18 | reraise err, __STACKTRACE__ 19 | else 20 | :timer.sleep(500) 21 | eventually(f, retries + 1) 22 | end 23 | catch 24 | _exit, _term -> 25 | :timer.sleep(500) 26 | eventually(f, retries + 1) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /ping_pong/mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "global_flags": {:hex, :global_flags, "1.0.0", "ee6b864979a1fb38d1fbc67838565644baf632212bce864adca21042df036433", [:rebar3], [], "hexpm"}, 3 | "local_cluster": {:hex, :local_cluster, "1.1.0", "a2a0e3e965aa1549939108066bfa537ce89f0107917f5b0260153e2fdb304116", [:mix], [{:global_flags, "~> 1.0", [hex: :global_flags, repo: "hexpm", optional: false]}], "hexpm"}, 4 | "propcheck": {:hex, :propcheck, "1.1.4", "95852a3f050cc3ee1ef5c9ade14a3bd34e29b54cb8db7ae8bc7f716d904d5120", [:mix], [{:proper, "~> 1.3", [hex: :proper, repo: "hexpm", optional: false]}], "hexpm"}, 5 | "proper": {:hex, :proper, "1.3.0", "c1acd51c51da17a2fe91d7a6fc6a0c25a6a9849d8dc77093533109d1218d8457", [:make, :mix, :rebar3], [], "hexpm"}, 6 | "schism": {:hex, :schism, "1.0.1", "b700883b4023b06faa5ab4add3aba5706877feb0a3dcfe8127b5dfeefe2513a5", [:mix], [], "hexpm"}, 7 | } 8 | -------------------------------------------------------------------------------- /ping_pong/mix.exs: -------------------------------------------------------------------------------- 1 | defmodule PingPong.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :ping_pong, 7 | version: "0.1.0", 8 | elixir: "~> 1.7", 9 | start_permanent: Mix.env() == :prod, 10 | deps: deps(), 11 | aliases: aliases() 12 | ] 13 | end 14 | 15 | # Run "mix help compile.app" to learn about applications. 16 | def application do 17 | [ 18 | extra_applications: [:logger], 19 | mod: {PingPong.Application, []} 20 | ] 21 | end 22 | 23 | # Run "mix help deps" to learn about dependencies. 24 | defp deps do 25 | [ 26 | {:local_cluster, "~> 1.0", only: [:dev, :test]}, 27 | {:schism, "~> 1.0", only: [:dev, :test]}, 28 | ] 29 | end 30 | 31 | def aliases do 32 | [ 33 | test: ["test --no-start --seed 0 --trace --max-failures 1"] 34 | ] 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /shortener/lib/shortener/application.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.Application do 2 | @moduledoc false 3 | 4 | use Application 5 | 6 | def start(_type, _args) do 7 | children = [ 8 | Plug.Cowboy.child_spec(scheme: :http, plug: Shortener.Router, options: [port: port()]), 9 | Shortener.LinkManager, 10 | Shortener.Storage, 11 | Shortener.Aggregates, 12 | Shortener.Cluster, 13 | ] 14 | 15 | opts = [strategy: :one_for_one, name: Shortener.Supervisor] 16 | Supervisor.start_link(children, opts) 17 | end 18 | 19 | defp port do 20 | name = Node.self() 21 | 22 | env = 23 | name 24 | |> Atom.to_string 25 | |> String.replace(~r/@.*$/, "") 26 | |> String.upcase 27 | 28 | name_specific_port = System.get_env("#{env}_PORT") 29 | specific_port = System.get_env("PORT") 30 | default_port = "4000" 31 | 32 | String.to_integer(name_specific_port || specific_port || default_port) 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /shortener/lib/shortener/storage.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.Storage do 2 | @pool_size 3 3 | 4 | def child_spec(_opts \\ []) do 5 | children = [ 6 | {Redix, database: database(), name: __MODULE__} 7 | ] 8 | 9 | %{ 10 | id: __MODULE__, 11 | type: :supervisor, 12 | start: {Supervisor, :start_link, [children, [strategy: :one_for_one]]}, 13 | } 14 | end 15 | 16 | def command(name \\ name(), cmds) when is_list(cmds) do 17 | Redix.command(name, cmds) 18 | end 19 | 20 | def set(name \\ name(), key, value) do 21 | with {:ok, _} <- command(name, ["SET", key, value, "NX"]) do 22 | :ok 23 | end 24 | end 25 | 26 | def get(name \\ name(), key) do 27 | command(name, ["GET", key]) 28 | end 29 | 30 | def flush(name \\ name()) do 31 | command(name, ["FLUSHDB"]) 32 | end 33 | 34 | defp name, do: __MODULE__ 35 | 36 | defp database do 37 | Application.get_env(:shortener, :redis_database) 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /shortener/lib/shortener/g_counter.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.GCounter do 2 | @moduledoc """ 3 | This module defines a grow-only counter, CRDT. 4 | """ 5 | 6 | @doc """ 7 | Returns a new counter 8 | """ 9 | def new(), do: nil 10 | 11 | @doc """ 12 | Increments the counter for this node by the given delta. If this is the first 13 | increment operation for this node then the count defaults to the delta. 14 | """ 15 | def increment(counter, node \\ Node.self(), delta \\ 1) when delta >= 0 do 16 | # TODO - Increment the counter for a given node. 17 | end 18 | 19 | @doc """ 20 | Merges 2 counters together taking the highest value seen for each node. 21 | """ 22 | def merge(c1, c2) do 23 | # TODO - Merge's 2 counter's together by taking the highest value seen 24 | # for each node. 25 | end 26 | 27 | @doc """ 28 | Convert a counter to an integer. 29 | """ 30 | def to_i(counter) do 31 | # TODO - Convert the counter into an integer 32 | end 33 | end 34 | 35 | -------------------------------------------------------------------------------- /shortener/lib/shortener/link_manager/cache.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.LinkManager.Cache do 2 | @moduledoc false 3 | use GenServer 4 | 5 | def start_link(args) do 6 | GenServer.start_link(__MODULE__, args, name: __MODULE__) 7 | end 8 | 9 | def lookup(cache \\ __MODULE__, key) do 10 | # TODO - Do lookup here 11 | end 12 | 13 | def insert(cache \\ __MODULE__, key, value) do 14 | GenServer.call(cache, {:insert, key, value}) 15 | end 16 | 17 | def broadcast_insert(cache \\ __MODULE__, key, value) do 18 | GenServer.abcast(Node.list(), cache, {:insert, key, value}) 19 | end 20 | 21 | def flush(cache \\ __MODULE__) do 22 | GenServer.call(cache, :flush) 23 | end 24 | 25 | def init(args) do 26 | # TODO - Replace nil with real table 27 | {:ok, %{}} 28 | end 29 | 30 | def handle_cast({:insert, key, value}, data) do 31 | # TODO - Build cache insert 32 | {:noreply, data} 33 | end 34 | 35 | def handle_call({:insert, key, value}, _from, data) do 36 | # TODO - Insert the key into the table 37 | {:reply, :ok, data} 38 | end 39 | 40 | def handle_call(:flush, _from, data) do 41 | :ets.delete_all_objects(data.table) 42 | {:reply, :ok, data} 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /ping_pong/lib/ping_pong/producer.ex: -------------------------------------------------------------------------------- 1 | defmodule PingPong.Producer do 2 | @moduledoc """ 3 | Sends pings to consumer processes 4 | """ 5 | use GenServer 6 | 7 | alias PingPong.Consumer 8 | 9 | @initial %{current: 0} 10 | 11 | def start_link(args) do 12 | GenServer.start_link(__MODULE__, args, name: __MODULE__) 13 | end 14 | 15 | def send_ping(server \\ __MODULE__) do 16 | GenServer.call(server, :send_ping) 17 | end 18 | 19 | def get_counts(server \\ __MODULE__) do 20 | GenServer.call(server, :get_counts) 21 | end 22 | 23 | def init(_args) do 24 | # TODO - Listen for node up and down events 25 | {:ok, @initial} 26 | end 27 | 28 | def handle_call(:send_ping, _from, data) do 29 | # TODO - Send a ping to all consumer processes 30 | {:reply, :ok, %{data | current: data.current+1}} 31 | end 32 | 33 | def handle_call(:get_counts, _from, data) do 34 | # TODO - Get the count from each consumer 35 | map = %{} 36 | {:reply, map, data} 37 | end 38 | 39 | # Don't remove me :) 40 | def handle_call(:flush, _, _) do 41 | {:reply, :ok, @initial} 42 | end 43 | 44 | def handle_info(_msg, data) do 45 | # TODO - Fill me in l8r 46 | {:noreply, data} 47 | end 48 | end 49 | 50 | -------------------------------------------------------------------------------- /shortener/mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Shortener.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :shortener, 7 | version: "0.1.0", 8 | elixir: "~> 1.9", 9 | elixirc_paths: elixirc_paths(Mix.env()), 10 | start_permanent: Mix.env() == :prod, 11 | deps: deps(), 12 | aliases: aliases() 13 | ] 14 | end 15 | 16 | # Run "mix help compile.app" to learn about applications. 17 | def application do 18 | [ 19 | extra_applications: [:logger], 20 | mod: {Shortener.Application, []} 21 | ] 22 | end 23 | 24 | # Run "mix help deps" to learn about dependencies. 25 | defp deps do 26 | [ 27 | {:plug_cowboy, "~> 2.0"}, 28 | {:redix, "~> 0.9"}, 29 | {:httpoison, "~> 1.5"}, 30 | {:drax, "~> 0.1"}, 31 | {:ex_hash_ring, "~> 3.0"}, 32 | {:libcluster, "~> 3.1"}, 33 | {:local_cluster, "~> 1.0", only: [:dev, :test]}, 34 | {:schism, "~> 1.0", only: [:dev, :test]}, 35 | ] 36 | end 37 | 38 | defp elixirc_paths(:test), do: ["lib", "test/support"] 39 | defp elixirc_paths(_), do: ["lib"] 40 | 41 | def aliases do 42 | [ 43 | test: ["test --no-start --seed 0 --trace --max-failures 1"] 44 | ] 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /ping_pong/config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for 9 | # 3rd-party users, it should be done in your "mix.exs" file. 10 | 11 | # You can configure your application as: 12 | # 13 | # config :ping_pong, key: :value 14 | # 15 | # and access this configuration in your application as: 16 | # 17 | # Application.get_env(:ping_pong, :key) 18 | # 19 | # You can also configure a 3rd-party app: 20 | # 21 | # config :logger, level: :info 22 | # 23 | 24 | # It is also possible to import configuration files, relative to this 25 | # directory. For example, you can emulate configuration per environment 26 | # by uncommenting the line below and defining dev.exs, test.exs and such. 27 | # Configuration from the imported file will override the ones defined 28 | # here (which is why it is important to import them last). 29 | # 30 | # import_config "#{Mix.env()}.exs" 31 | -------------------------------------------------------------------------------- /shortener/lib/shortener/link_manager.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.LinkManager do 2 | @moduledoc """ 3 | Manages the lifecycles of links 4 | """ 5 | 6 | alias Shortener.Storage 7 | alias Shortener.LinkManager.Cache 8 | alias Shortener.Cluster 9 | 10 | @lookup_sup __MODULE__.LookupSupervisor 11 | 12 | def child_spec(_args) do 13 | children = [ 14 | Cache, 15 | # TODO - Extend this supervision tree to support remote lookups 16 | ] 17 | 18 | %{ 19 | id: __MODULE__, 20 | type: :supervisor, 21 | start: {Supervisor, :start_link, [children, [strategy: :one_for_one]]} 22 | } 23 | end 24 | 25 | def create(url) do 26 | short_code = generate_short_code(url) 27 | 28 | {:ok, short_code} 29 | end 30 | 31 | def lookup(short_code) do 32 | Storage.get(short_code) 33 | end 34 | 35 | def remote_lookup(short_code) do 36 | # TODO - Do a remote lookup 37 | end 38 | 39 | def generate_short_code(url) do 40 | url 41 | |> hash 42 | |> Base.encode16(case: :lower) 43 | |> String.to_integer(16) 44 | |> pack_bitstring 45 | |> Base.url_encode64 46 | |> String.replace(~r/==\n?/, "") 47 | end 48 | 49 | defp hash(str), do: :crypto.hash(:sha256, str) 50 | 51 | defp pack_bitstring(int), do: << int :: big-unsigned-32 >> 52 | end 53 | -------------------------------------------------------------------------------- /shortener/lib/shortener/cluster.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.Cluster do 2 | @moduledoc """ 3 | This module provides an interface for updating clusters as well as a 4 | supervision tree for starting and stopping node discovery. 5 | """ 6 | 7 | alias Shortener.Storage 8 | 9 | alias ExHashRing.HashRing 10 | 11 | @ring_key {__MODULE__, :hash_ring} 12 | 13 | def child_spec(_args) do 14 | children = [ 15 | {Cluster.Supervisor, [topology(), [name: Shortener.ClusterSupervisor]]}, 16 | ] 17 | 18 | %{ 19 | id: __MODULE__, 20 | type: :supervisor, 21 | start: {Supervisor, :start_link, [children, [strategy: :one_for_one]]} 22 | } 23 | end 24 | 25 | def find_node(key) do 26 | # TODO - Update with hash ring lookup 27 | end 28 | 29 | # Sets the canonical set of nodes into persistent storage. 30 | def set_canonical_nodes(nodes) do 31 | bin = :erlang.term_to_binary(nodes) 32 | :ok = Storage.set("shortener:cluster", bin) 33 | end 34 | 35 | def update_ring do 36 | # TODO - Fetch nodes from persistent store, update hash ring 37 | # put the hash ring into persistent term storage. 38 | :ok 39 | end 40 | 41 | defp topology do 42 | [ 43 | shortener: [ 44 | strategy: Cluster.Strategy.Gossip, 45 | ] 46 | ] 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /ping_pong/lib/ping_pong/consumer.ex: -------------------------------------------------------------------------------- 1 | defmodule PingPong.Consumer do 2 | @moduledoc """ 3 | Consumes pings sent from a producer process 4 | """ 5 | use GenServer 6 | 7 | alias PingPong.Producer 8 | 9 | @initial %{counts: %{}} 10 | 11 | def start_link(args) do 12 | GenServer.start_link(__MODULE__, args, name: __MODULE__) 13 | end 14 | 15 | def total_pings(server) do 16 | GenServer.call(server, :total_pings) 17 | end 18 | 19 | def count_for_node(server \\ __MODULE__, node) do 20 | counts = GenServer.call(server, :get_pings) 21 | counts[node] 22 | end 23 | 24 | def init(_args) do 25 | {:ok, @initial} 26 | end 27 | 28 | def handle_cast({:ping, index, node}, data) do 29 | {:noreply, put_in(data, [:counts, node], index)} 30 | end 31 | 32 | def handle_call(:get_pings, _from, data) do 33 | {:reply, data.counts, data} 34 | end 35 | 36 | def handle_call(:total_pings, _from, data) do 37 | ping_count = 38 | data.counts 39 | |> Enum.map(fn {_, count} -> count end) 40 | |> Enum.sum() 41 | 42 | {:reply, ping_count, data} 43 | end 44 | 45 | # We need these for testing. Ignore the warning and do not remove :) 46 | def handle_call(:flush, _, _) do 47 | {:reply, :ok, @initial} 48 | end 49 | def handle_call(:crash, _from, _data) do 50 | _count = 42/0 51 | {:reply, :ok, @initial} 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /shortener/lib/shortener/router.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.Router do 2 | use Plug.Router 3 | 4 | require Logger 5 | 6 | alias Plug.Conn 7 | alias Shortener.{ 8 | Aggregates, 9 | LinkManager, 10 | Storage, 11 | } 12 | 13 | plug Plug.Logger, log: :debug 14 | plug Plug.Parsers, 15 | parsers: [:urlencoded, :multipart], 16 | pass: ["text/*"] 17 | plug :match 18 | plug :dispatch 19 | 20 | post "/" do 21 | %{"url" => url} = conn.params 22 | 23 | case LinkManager.create(url) do 24 | {:ok, short_code} -> 25 | conn 26 | |> put_resp_header("location", short_link(conn, short_code)) 27 | |> send_resp(201, short_code) 28 | 29 | {:error, _} -> 30 | conn 31 | |> send_resp(422, "Unable to shorten #{url}") 32 | end 33 | end 34 | 35 | get "/:short_code" do 36 | case LinkManager.lookup(short_code) do 37 | {:ok, url} -> 38 | conn 39 | |> put_resp_header("location", url) 40 | |> send_resp(302, url) 41 | 42 | {:error, _} -> 43 | send_resp(conn, 404, "Not Found") 44 | end 45 | end 46 | 47 | get "/:short_code/aggregates" do 48 | count = Aggregates.count_for(short_code) 49 | 50 | conn 51 | |> send_resp(200, "Redirects: #{count}") 52 | end 53 | 54 | match _ do 55 | send_resp(conn, 404, "oops") 56 | end 57 | 58 | defp short_link(conn, code) do 59 | conn 60 | |> Conn.request_url 61 | |> URI.merge(code) 62 | |> to_string 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /shortener/lib/shortener/aggregates.ex: -------------------------------------------------------------------------------- 1 | defmodule Shortener.Aggregates do 2 | use GenServer 3 | 4 | alias __MODULE__ 5 | alias Shortener.GCounter 6 | 7 | require Logger 8 | 9 | def count_for(table \\ __MODULE__, hash) do 10 | # TODO: Do lookup from ets in the client process 11 | 0 12 | end 13 | 14 | def increment(server \\ __MODULE__, hash) do 15 | GenServer.cast(server, {:increment, hash}) 16 | end 17 | 18 | def merge(server \\ __MODULE__, hash, counter) do 19 | GenServer.cast(server, {:merge, hash, counter}) 20 | end 21 | 22 | def flush(server \\ __MODULE__) do 23 | GenServer.call(server, :flush) 24 | end 25 | 26 | def start_link(args) do 27 | GenServer.start_link(__MODULE__, args, name: __MODULE__) 28 | end 29 | 30 | def init(_args \\ []) do 31 | # TODO: Monitor node connections and disconnects 32 | 33 | {:ok, %{table: __MODULE__, counters: %{}}} 34 | end 35 | 36 | def handle_cast({:increment, short_code}, %{counters: counters}=data) do 37 | # TODO: Increment counter and broadcast a merge to the other nodes 38 | 39 | {:noreply, data} 40 | end 41 | 42 | def handle_cast({:merge, short_code, counter}, data) do 43 | # TODO: Merge our existing set of counters with the new counter 44 | 45 | {:noreply, data} 46 | end 47 | 48 | def handle_call(:flush, _from, data) do 49 | :ets.delete_all_objects(data.table) 50 | {:reply, :ok, %{data | counters: %{}}} 51 | end 52 | 53 | def handle_info(msg, data) do 54 | # TODO - Handle node disconnects and reconnections 55 | Logger.info("Unhandled message: #{inspect msg}") 56 | 57 | {:noreply, data} 58 | end 59 | end 60 | 61 | -------------------------------------------------------------------------------- /shortener/test/03a_g_counter_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Shortener.GCounterTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias Shortener.GCounter 5 | 6 | describe "new/0" do 7 | test "returns a blank g counter" do 8 | assert GCounter.new() == %{} 9 | end 10 | end 11 | 12 | describe "increment/3" do 13 | test "defaults to the existing node name" do 14 | assert GCounter.new() 15 | |> GCounter.increment() == %{Node.self() => 1} 16 | 17 | assert GCounter.new() 18 | |> GCounter.increment() 19 | |> GCounter.increment() 20 | |> GCounter.increment() == %{Node.self() => 3} 21 | end 22 | end 23 | 24 | describe "merging/2" do 25 | test "takes the max value for each node" do 26 | c1 = 27 | GCounter.new() 28 | |> GCounter.increment(:foo) 29 | |> GCounter.increment(:bar) 30 | |> GCounter.increment(:bar) 31 | 32 | c2 = 33 | GCounter.new() 34 | |> GCounter.increment(:bar) 35 | |> GCounter.increment(:baz) 36 | 37 | c3 = GCounter.new() 38 | 39 | merged = 40 | c1 41 | |> GCounter.merge(c2) 42 | |> GCounter.merge(c3) 43 | |> GCounter.merge(c2) 44 | |> GCounter.merge(c1) 45 | 46 | merged = 47 | merged 48 | |> GCounter.merge(merged) 49 | |> GCounter.merge(c3) 50 | |> GCounter.merge(c2) 51 | 52 | assert GCounter.to_i(merged) == 4 53 | end 54 | end 55 | 56 | describe "to_i/1" do 57 | test "converts a counter to an integer" do 58 | assert GCounter.new() 59 | |> GCounter.increment(:foo) 60 | |> GCounter.increment(:foo) 61 | |> GCounter.increment(:bar) 62 | |> GCounter.increment(:baz) 63 | |> GCounter.increment(:baz) 64 | |> GCounter.to_i() == 5 65 | end 66 | end 67 | end 68 | 69 | -------------------------------------------------------------------------------- /shortener/README.md: -------------------------------------------------------------------------------- 1 | # Shortener 2 | 3 | ## Goal 4 | 5 | Our goals to build a link shortener utilizing distributed erlang. 6 | 7 | ## Problem 1 8 | 9 | In this first example were going to take a full url and convert it to a short code, 10 | cache that short code in an in-memory cache, store that short code in a database, 11 | and finally broadcast the short codes across the cluster. 12 | 13 | ## Problem 2 14 | 15 | Now that we have our nodes connected and sharing links we want to make our distribution, 16 | more efficient. Currently we're doing full replication of all of our links. This 17 | is pretty inefficient. In this exercise we're going to update our replication 18 | strategy to send specific links to specific nodes. We'll do this using a technique 19 | known as Consistent Hashing. We're going to use the ExHashRing library to accomplish this. 20 | You will need to implement a function to rebuild the hash ring on demand. In our 21 | tests we'll store the known set of nodes in the cluster. You will need to implement 22 | a function to read the cluster from Redis and use it to rebuild the hash ring. 23 | 24 | Once the hash ring has been built you can change the replication logic to send 25 | creates to a specific node in the cluster. You will also need to change the 26 | lookup logic to do lookups from a remote node. 27 | 28 | Because our cache is based on ets you won't be able to get cache results from ets 29 | directly. You'll need to build a way to call a remote function on a node and 30 | send yourself the results from the lookup back. You may find `Task.Supervisor` 31 | and the docs for `Task.async` when used with remote nodes to be helpful here. 32 | 33 | ## Problem 3 34 | 35 | In the final exercise we're going to keep track of how many times a short code 36 | has been redirected to. We're going to do this by first building a Grow Only Counter CRDT 37 | (GCounter). Once this is done we're going to store each gcounter in a genserver, 38 | increment each gcounter locally, and then broadcast it to all other nodes. Finally 39 | we're going to support node disconnects and re-connects by catching up any 40 | missed messages during a netsplit. 41 | 42 | ### GCounter 43 | -------------------------------------------------------------------------------- /shortener/test/03b_storing_aggregates.exs: -------------------------------------------------------------------------------- 1 | defmodule Shortener.StoringAggregatesTest do 2 | use ExUnit.Case, async: false 3 | 4 | import Shortener.TestUtils 5 | 6 | alias Shortener.{ 7 | Aggregates, 8 | GCounter, 9 | } 10 | 11 | setup_all do 12 | Application.ensure_all_started(:shortener) 13 | 14 | :ok 15 | end 16 | 17 | setup do 18 | Aggregates.flush() 19 | 20 | :ok 21 | end 22 | 23 | describe "Aggregates" do 24 | test "creates a counter for a short code" do 25 | current_state = :sys.get_state(Aggregates) 26 | assert %{} == current_state.counters 27 | Aggregates.increment("chris") 28 | assert match?(%{"chris" => _}, :sys.get_state(Aggregates).counters) 29 | end 30 | 31 | test "turns counters into a representation for quick lookups" do 32 | Aggregates.increment("chris") 33 | Aggregates.increment("alice") 34 | Aggregates.increment("andra") 35 | 36 | eventually(fn -> 37 | assert Aggregates.count_for("chris") == 1 38 | assert Aggregates.count_for("alice") == 1 39 | assert Aggregates.count_for("andra") == 1 40 | end) 41 | end 42 | 43 | test "counters can be merged" do 44 | chris_counter = 45 | GCounter.new() 46 | |> GCounter.increment(:other) 47 | |> GCounter.increment(:other) 48 | |> GCounter.increment(:other) 49 | 50 | alice_counter = 51 | GCounter.new() 52 | |> GCounter.increment(:other) 53 | |> GCounter.increment(:other) 54 | 55 | andra_counter = 56 | GCounter.new() 57 | |> GCounter.increment(:other) 58 | 59 | Aggregates.increment("chris") 60 | Aggregates.increment("alice") 61 | Aggregates.increment("alice") 62 | Aggregates.increment("alice") 63 | 64 | Aggregates.merge("chris", chris_counter) 65 | Aggregates.merge("alice", alice_counter) 66 | Aggregates.merge("andra", andra_counter) 67 | 68 | eventually(fn -> 69 | assert Aggregates.count_for("chris") == 4 70 | assert Aggregates.count_for("alice") == 5 71 | assert Aggregates.count_for("andra") == 1 72 | end) 73 | end 74 | end 75 | end 76 | 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distributed Systems Training in Elixir 2 | 3 | This training is divided into 4 parts. Each part is designed to teach you 4 | concepts about distributed systems, the ways that they fail, and how to utilize 5 | some of the tools available in erlang and elixir to help mitigate those failures. 6 | 7 | ## Requirements 8 | 9 | You'll need these things installed or available in order to go through 10 | this training. 11 | 12 | * Elixir >= 1.9 13 | * Erlang >= 22 14 | * Redis 15 | 16 | ## Initial Setup 17 | 18 | Run through these steps before starting on the examples. These should help 19 | ensure that your system is set up correctly. 20 | 21 | ### Check VPNs or Firewall rules 22 | 23 | In other trainings we've seen issues with corporate vpns or firewalls. These 24 | issues typically cause connections to be very slow or not connect at all. 25 | You may need to temporarily disable these or add rules to allow epmd and 26 | erlang to open ports on your local machine. 27 | 28 | If you're on macos then the first time you start a node with distribution 29 | turned on then you may see a prompt to allow epmd open network 30 | connections. You want to allow this. 31 | 32 | ### Ensure you can connect nodes 33 | 34 | You'll find it useful to run multiple nodes simultaneously for debugging 35 | and testing. There are many ways to do this such as tmux, emacs buffers, 36 | split terminal windows, or whatever other method works for you. We want to 37 | ensure that you can connect nodes together before we move on. 38 | 39 | * Open up two terminal windows using whatever method you like. 40 | * In window 1: run `iex --name gold@127.0.0.1` 41 | * In window 2: run `iex --name silver@127.0.0.1` 42 | * In window 1: run `Node.connect(:"silver@127.0.0.1")` 43 | * In window 2: run `Node.list()`. The output should be 44 | `[:"gold@127.0.0.1"]` 45 | 46 | If you have an error during any of these steps please ask Chris or Ben for 47 | help. 48 | 49 | ## Part 1 - Ping Pong 50 | 51 | Part 1 provides a rough overview of connecting erlang nodes. We will see 52 | how to start processes on specific nodes, some of the failure scenarios 53 | when BEAMs disconnect, sending RPCs and other fundamental concepts. 54 | 55 | ## Parts 2, 3, and 4 - Link Shortener 56 | 57 | For the remainder of the training we'll be building a link shortener. We will use distributed erlang to support very low latency reads, fannout using consistent hashing, CRDTs, and robust replication strategies. 58 | 59 | ## Why does this use Distributed Erlang? 60 | 61 | This training uses standard, distributed erlang. While there are many limitations 62 | and issues with dist-erl the goal of this training is not to promote a specific 63 | tool but instead to teach the underlying concepts that are universal to 64 | all distributed systems. Dist-erl provides the lowest barrier for doing 65 | that. We make no attempt to hide the issues with dist-erl. If you need 66 | a more robust solution you should look at Partisan. 67 | -------------------------------------------------------------------------------- /shortener/test/01_distributing_links_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ShortenerTest do 2 | use ExUnit.Case, async: false 3 | 4 | import Shortener.TestUtils 5 | 6 | alias Shortener.{ 7 | Cluster, 8 | LinkManager, 9 | LinkManager.Cache, 10 | Storage, 11 | } 12 | 13 | setup_all do 14 | System.put_env("PORT", "4000") 15 | System.put_env("SHORTENER1_PORT", "4001") 16 | System.put_env("SHORTENER2_PORT", "4002") 17 | System.put_env("SHORTENER3_PORT", "4003") 18 | 19 | Application.ensure_all_started(:shortener) 20 | # Ignore these lines for now ;). They're only here so we don't break 21 | # these tests in the next section 22 | nodes = LocalCluster.start_nodes("shortener", 2) 23 | Cluster.set_canonical_nodes([Node.self() | nodes]) 24 | Cluster.update_ring() 25 | 26 | for node <- nodes do 27 | :rpc.call(node, Shortener.Cluster, :update_ring, []) 28 | end 29 | 30 | {:ok, nodes: nodes} 31 | end 32 | 33 | setup do 34 | Storage.flush() 35 | GenServer.multi_call(Cache, :flush) 36 | 37 | :ok 38 | end 39 | 40 | describe "Cache" do 41 | test "can store urls" do 42 | assert Cache.lookup("shortcode") == {:error, :not_found} 43 | Cache.insert("shortcode", "https://elixiroutlaws.com") 44 | assert Cache.lookup("shortcode") == {:ok, "https://elixiroutlaws.com"} 45 | end 46 | 47 | test "after a cache miss the cache is updated" do 48 | url = "https://elixiroutlaws.com" 49 | assert {:ok, code} = LinkManager.create(url) 50 | assert :ok == Cache.flush() 51 | assert {:error, :not_found} == Cache.lookup(code) 52 | assert {:ok, url} == LinkManager.lookup(code) 53 | assert {:ok, url} == Cache.lookup(code) 54 | end 55 | end 56 | 57 | describe "clustered" do 58 | test "it shortens links" do 59 | resp = post("http://localhost:4000", %{"url" => "https://keathley.io"}) 60 | assert resp.status_code == 201 61 | assert {_, short_link} = Enum.find(resp.headers, fn {h, _} -> h == "location" end) 62 | 63 | resp = get(short_link) 64 | assert resp.status_code == 302 65 | assert {_, "https://keathley.io"} = Enum.find(resp.headers, fn {h, _} -> h == "location" end) 66 | assert "https://keathley.io" = resp.body 67 | end 68 | 69 | test "all nodes can see a new short link", %{nodes: nodes} do 70 | url = "https://elixiroutlaws.com" 71 | [n1, n2] = nodes 72 | 73 | assert {:ok, code} = LinkManager.create(url) 74 | assert {:ok, ^url} = LinkManager.lookup(code) 75 | 76 | eventually(fn -> 77 | assert {:ok, ^url} = :rpc.call(n1, LinkManager, :lookup, [code]) 78 | assert {:ok, ^url} = :rpc.call(n2, LinkManager, :lookup, [code]) 79 | end) 80 | end 81 | 82 | test "links can be returned even during a partition", %{nodes: nodes} do 83 | url = "https://elixiroutlaws.com" 84 | [n1, n2] = nodes 85 | 86 | Schism.partition([n1]) 87 | 88 | assert{:ok, code} = :rpc.call(n2, LinkManager, :create, [url]) 89 | 90 | eventually(fn -> 91 | assert {:ok, ^url} = LinkManager.lookup(code) 92 | assert {:ok, ^url} = :rpc.call(n1, LinkManager, :lookup, [code]) 93 | end) 94 | end 95 | end 96 | end 97 | 98 | -------------------------------------------------------------------------------- /ping_pong/README.md: -------------------------------------------------------------------------------- 1 | # PingPong 2 | 3 | ## Goal 4 | 5 | The goal of this exercise is to connect Nodes together, send some messages 6 | across them, and see what happens when those messages fail. 7 | 8 | Each node starts a producer and a consumer. The Producer's job is to send 9 | pings to Consumer's. It will do this by broadcasting them to all 10 | consumer's on all connected nodes. Producers keep track of the number of 11 | pings that they've sent. Consumer's keep a count of pings they've seen 12 | from a each producer on each node. 13 | 14 | ## Helpful functions 15 | 16 | In order to solve each of these problems it'll help to know about a few important OTP functions. 17 | 18 | * `Node.list/0` - Lists all currently connected nodes. 19 | * `GenServer.abcast/2` - Casts a message to a genserver with the name on all connected nodes. 20 | * `GenServer.multi_call/2` - Calls a genserver with a given name on all connected nodes. 21 | * `net_kernel.monitor_nodes/1` - Allows any process to monitor node up and node down events. Node events can be handled in the `handle_info` callback. 22 | 23 | 24 | ## Problem 1 25 | 26 | In this problem you need to cast pings to all consumers. 27 | 28 | ## Problem 2 29 | 30 | Now that we can broadcast pings to all consumers we need to check each 31 | consumer to see what their current ping counts are. 32 | 33 | ## Problem 3 34 | 35 | If our consumer crashes our states will get out of sync. In this exercise your 36 | job is to recover gracefully from a crash. In this case we're going to do this 37 | by having the consumer request the current ping count from each producer 38 | when the consumer starts. To make this work you'll need to modify both the 39 | consumer and the producer code. 40 | 41 | We could have chosen to solve this problem with monitors. But monitors 42 | have an inherent race condition where the producer could cast to 43 | a consumer that isn't currently started yet. Using this demand driven 44 | approach helps us to eliminate that race condition and is generally more 45 | reliable. 46 | 47 | ## Problem 4 48 | 49 | In our last exercise we're going to see how things fail when network 50 | partitions occur. In order to create partitions between nodes we're using 51 | a tool called Schism. By calling `Schism.partition/1` we can cause 52 | a partition between nodes. When we want to heal the partition we can call 53 | `Schism.heal/1`. 54 | 55 | After a node is split from the network - or if a new node joins the 56 | cluster - we need to catch them up on our latest status. In order to 57 | accomplish we need our producer to monitor node events. When the producer 58 | sees a new node join the cluster it should send a ping to the consumer 59 | with its current ping count. 60 | 61 | ## Additional exercises 62 | 63 | * In these exercises we only tested independent failures. What would happen if a consumer crashed during a partition? Would we be able to recover from this? 64 | * Our Consumer's manage their own state. Which means if they crash this state is lost. Is there a way to pull apart the updating of the state and the storage of the state so we don't have to worry about crashes? 65 | * We didn't test producer crashes. If we wanted to ensure that we didn't lose any data how could we protect ourselves against a producer crashing? What would happen if the producer crashed during a netsplit? 66 | * Currently if anyone queries the consumer during a partition we have a high probability of returning incorrect data. If we wanted to always return the "correct" data what tradeoffs would we need to make? 67 | -------------------------------------------------------------------------------- /ping_pong/test/ping_pong_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PingPongTest do 2 | use ExUnit.Case 3 | 4 | alias PingPong.{ 5 | Consumer, 6 | Producer 7 | } 8 | 9 | setup_all do 10 | Application.ensure_all_started(:ping_pong) 11 | 12 | :ok 13 | end 14 | 15 | setup do 16 | nodes = LocalCluster.start_nodes("ping-pong", 2) 17 | GenServer.multi_call(Consumer, :flush) 18 | GenServer.multi_call(Producer, :flush) 19 | 20 | on_exit fn -> 21 | LocalCluster.stop_nodes(nodes) 22 | end 23 | 24 | {:ok, nodes: nodes} 25 | end 26 | 27 | test "producer sends pings to each connected nodes consumer", %{nodes: nodes} do 28 | [n1, n2] = nodes 29 | assert :ok == Producer.send_ping() 30 | assert :ok == Producer.send_ping({Producer, n2}) 31 | assert :ok == Producer.send_ping({Producer, n1}) 32 | 33 | for n <- nodes do 34 | assert Consumer.total_pings({Consumer, n}) == 3 35 | end 36 | end 37 | 38 | test "producer can check the state of each connected consumer", %{nodes: nodes} do 39 | [n1, n2] = nodes 40 | 41 | assert :ok = Producer.send_ping() 42 | assert :ok = Producer.send_ping({Producer, n1}) 43 | assert :ok = Producer.send_ping({Producer, n2}) 44 | 45 | eventually(fn -> 46 | assert Producer.get_counts() == %{ 47 | n1 => 3, 48 | n2 => 3, 49 | Node.self() => 3, 50 | } 51 | end) 52 | end 53 | 54 | test "producer can catch up crashed consumers", %{nodes: nodes} do 55 | [n1, _n2] = nodes 56 | 57 | assert :ok = Producer.send_ping() 58 | assert :ok = Producer.send_ping() 59 | 60 | for n <- nodes do 61 | eventually(fn -> 62 | assert Consumer.count_for_node({Consumer, n}, Node.self()) == 2 63 | end) 64 | end 65 | 66 | # Crash the consumer in a process so we don't need to catch exceptions 67 | spawn(fn -> 68 | GenServer.call({Consumer, n1}, :crash) 69 | end) 70 | 71 | :erlang.yield() 72 | 73 | for n <- nodes do 74 | eventually(fn -> 75 | assert Consumer.count_for_node({Consumer, n}, Node.self()) == 2 76 | end) 77 | end 78 | end 79 | 80 | test "producer can catch up nodes after a netsplit", %{nodes: nodes} do 81 | [n1, n2] = nodes 82 | 83 | assert :ok = GenServer.call({Producer, n2}, :send_ping) 84 | assert :ok = GenServer.call({Producer, n1}, :send_ping) 85 | 86 | eventually(fn -> 87 | assert Consumer.total_pings({Consumer, n1}) == 2 88 | assert Consumer.total_pings({Consumer, n2}) == 2 89 | end) 90 | 91 | # Split n1 away from n2 92 | Schism.partition([n1]) 93 | 94 | # Sending pings from n2 should not reach n1 and vice versa 95 | assert :ok = GenServer.call({Producer, n2}, :send_ping) 96 | assert :ok = GenServer.call({Producer, n1}, :send_ping) 97 | 98 | eventually(fn -> 99 | assert Consumer.total_pings({Consumer, n1}) == 3 100 | assert Consumer.total_pings({Consumer, n2}) == 3 101 | end) 102 | 103 | Schism.heal([n1, n2]) 104 | 105 | eventually(fn -> 106 | assert Consumer.total_pings({Consumer, n1}) == 4 107 | assert Consumer.total_pings({Consumer, n2}) == 4 108 | end) 109 | end 110 | 111 | def eventually(f, retries \\ 0) do 112 | f.() 113 | rescue 114 | err -> 115 | if retries >= 10 do 116 | reraise err, __STACKTRACE__ 117 | else 118 | :timer.sleep(500) 119 | eventually(f, retries + 1) 120 | end 121 | catch 122 | _exit, _term -> 123 | :timer.sleep(500) 124 | eventually(f, retries + 1) 125 | end 126 | end 127 | -------------------------------------------------------------------------------- /shortener/mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, 3 | "cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"}, 4 | "cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"}, 5 | "drax": {:hex, :drax, "0.1.0", "e5a549f7d90d5c2e24e563e648484658b1ffe831c172c01cb4bc7e74c6fdcf79", [:mix], [], "hexpm"}, 6 | "ex_hash_ring": {:hex, :ex_hash_ring, "3.0.0", "da32c83d7c6d964b9537eb52f27bad0a3a6f7012efdc2749e11a5f268b120b6b", [:mix], [], "hexpm"}, 7 | "global_flags": {:hex, :global_flags, "1.0.0", "ee6b864979a1fb38d1fbc67838565644baf632212bce864adca21042df036433", [:rebar3], [], "hexpm"}, 8 | "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, 9 | "httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, 10 | "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, 11 | "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"}, 12 | "libcluster": {:hex, :libcluster, "3.1.1", "cbab97b96141f47f2fe5563183c444bbce9282b3991ef054d69b8805546f0122", [:mix], [{:jason, "~> 1.1.2", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"}, 13 | "local_cluster": {:hex, :local_cluster, "1.1.0", "a2a0e3e965aa1549939108066bfa537ce89f0107917f5b0260153e2fdb304116", [:mix], [{:global_flags, "~> 1.0", [hex: :global_flags, repo: "hexpm", optional: false]}], "hexpm"}, 14 | "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, 15 | "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"}, 16 | "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, 17 | "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, 18 | "plug": {:hex, :plug, "1.8.3", "12d5f9796dc72e8ac9614e94bda5e51c4c028d0d428e9297650d09e15a684478", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"}, 19 | "plug_cowboy": {:hex, :plug_cowboy, "2.1.0", "b75768153c3a8a9e8039d4b25bb9b14efbc58e9c4a6e6a270abff1cd30cbe320", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, 20 | "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, 21 | "propcheck": {:hex, :propcheck, "1.1.5", "be6e904bed62b556273338ce51e4246764f0439d7367c4de78542bb43059d636", [:mix], [{:proper, "~> 1.3", [hex: :proper, repo: "hexpm", optional: false]}], "hexpm"}, 22 | "proper": {:hex, :proper, "1.3.0", "c1acd51c51da17a2fe91d7a6fc6a0c25a6a9849d8dc77093533109d1218d8457", [:make, :mix, :rebar3], [], "hexpm"}, 23 | "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"}, 24 | "redix": {:hex, :redix, "0.10.2", "a9eabf47898aa878650df36194aeb63966d74f5bd69d9caa37babb32dbb93c5d", [:mix], [{:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, 25 | "schism": {:hex, :schism, "1.0.1", "b700883b4023b06faa5ab4add3aba5706877feb0a3dcfe8127b5dfeefe2513a5", [:mix], [], "hexpm"}, 26 | "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, 27 | "telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"}, 28 | "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"}, 29 | } 30 | -------------------------------------------------------------------------------- /shortener/test/02_cluster_management_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Shortener.ClusterManagementTest do 2 | use ExUnit.Case, async: false 3 | 4 | import Shortener.TestUtils 5 | 6 | alias Shortener.Cluster 7 | alias Shortener.Storage 8 | alias Shortener.LinkManager 9 | alias Shortener.LinkManager.Cache 10 | 11 | setup_all do 12 | System.put_env("PORT", "4000") 13 | System.put_env("SHORTENER1_PORT", "4001") 14 | System.put_env("SHORTENER2_PORT", "4002") 15 | System.put_env("SHORTENER3_PORT", "4003") 16 | 17 | Application.ensure_all_started(:shortener) 18 | 19 | nodes = LocalCluster.start_nodes("shortener", 3) 20 | Cluster.set_canonical_nodes(nodes) 21 | Cluster.update_ring() 22 | 23 | for node <- nodes do 24 | :rpc.call(node, Shortener.Cluster, :update_ring, []) 25 | end 26 | 27 | {:ok, nodes: nodes} 28 | end 29 | 30 | setup do 31 | Storage.flush() 32 | GenServer.multi_call(Cache, :flush) 33 | 34 | :ok 35 | end 36 | 37 | test "cluster membership can be set", %{nodes: nodes} do 38 | [n1, n2, n3] = nodes 39 | Cluster.set_canonical_nodes(nodes) 40 | assert :ok = Cluster.update_ring() 41 | assert n3 == Cluster.find_node("a") 42 | assert n1 == Cluster.find_node("b") 43 | assert n2 == Cluster.find_node("c") 44 | end 45 | 46 | test "links are routed to specific boxes", %{nodes: nodes} do 47 | [_n1, _n2, n3] = nodes 48 | url = "https://elixiroutlaws.com" 49 | 50 | assert {:ok, code} = LinkManager.create(url) 51 | 52 | # We should not have loaded our local cache since we're creating a 53 | # record on a remote node. 54 | assert {:error, :not_found} == Cache.lookup(code) 55 | assert {:ok, url} == :rpc.call(n3, Cache, :lookup, [code]) 56 | end 57 | 58 | test "doing a remote lookup on another node loads the link into memory", %{nodes: nodes} do 59 | [_n1, _n2, n3] = nodes 60 | url = "https://elixiroutlaws.com" 61 | 62 | # Creating this code will load it into the ets cache. We flush the cache 63 | # to ensure nothing is in there before we do the remote lookup. 64 | assert {:ok, code} = LinkManager.create(url) 65 | :ok = Cache.flush({Cache, n3}) 66 | assert {:error, :not_found} == :rpc.call(n3, Cache, :lookup, [code]) 67 | 68 | # Running the remote_lookup should re-load the record into that nodes ets 69 | # cache. 70 | assert {:ok, url} == LinkManager.remote_lookup(code) 71 | assert {:ok, url} == :rpc.call(n3, Cache, :lookup, [code]) 72 | end 73 | 74 | test "during partitions creation and remote lookups are unavailable", %{nodes: nodes} do 75 | [n1, n2, n3] = nodes 76 | url = "https://elixiroutlaws.com" 77 | 78 | # This specific url hashes into node 3 so we separate it from the cluster. 79 | Schism.partition([n3]) 80 | assert {:error, :node_down} == :rpc.call(n1, LinkManager, :create, [url]) 81 | 82 | # Since we're the manager we can still reach n3. 83 | assert {:ok, code} = LinkManager.create(url) 84 | 85 | # Node 2 should not be able to lookup urls on n3. 86 | assert {:error, :node_down} == :rpc.call(n2, LinkManager, :remote_lookup, [code]) 87 | 88 | # Heal the partition 89 | Schism.heal([n3]) 90 | 91 | assert {:ok, ^url} = :rpc.call(n1, LinkManager, :remote_lookup, [code]) 92 | assert {:ok, ^url} = :rpc.call(n2, LinkManager, :remote_lookup, [code]) 93 | end 94 | 95 | test "api utilizes remote creation", %{nodes: nodes} do 96 | [n1, n2, n3] = nodes 97 | url = "https://elixiroutlaws.com" 98 | 99 | resp = post("http://localhost:4000", %{"url" => url}) 100 | assert resp.status_code == 201 101 | short_code = resp.body 102 | 103 | assert {:error, :not_found} == :rpc.call(n1, Cache, :lookup, [short_code]) 104 | assert {:error, :not_found} == :rpc.call(n2, Cache, :lookup, [short_code]) 105 | assert {:ok, url} == :rpc.call(n3, Cache, :lookup, [short_code]) 106 | end 107 | 108 | test "api uses remote lookups", %{nodes: nodes} do 109 | [n1, n2, n3] = nodes 110 | url = "https://elixiroutlaws.com" 111 | 112 | resp = post("http://localhost:4000", %{"url" => url}) 113 | assert resp.status_code == 201 114 | short_code = resp.body 115 | 116 | for i <- 1..3 do 117 | resp = get("http://localhost:400#{i}/#{short_code}") 118 | assert resp.status_code == 302 119 | assert {_, ^url} = Enum.find(resp.headers, fn {h, _} -> h == "location" end) 120 | assert url == resp.body 121 | end 122 | 123 | assert {:error, :not_found} = :rpc.call(n1, Cache, :lookup, [short_code]) 124 | assert {:error, :not_found} = :rpc.call(n2, Cache, :lookup, [short_code]) 125 | assert {:ok, url} == :rpc.call(n3, Cache, :lookup, [short_code]) 126 | end 127 | 128 | test "api if remote lookup we fetch from storage", %{nodes: nodes} do 129 | [n1, n2, n3] = nodes 130 | url = "https://elixiroutlaws.com" 131 | 132 | resp = post("http://localhost:4000", %{"url" => url}) 133 | assert resp.status_code == 201 134 | short_code = resp.body 135 | 136 | # nodes 1 and 2 will not be able to talk to node 3 137 | Schism.partition([n1, n2]) 138 | 139 | for i <- 1..2 do 140 | resp = get("http://localhost:400#{i}/#{short_code}") 141 | assert resp.status_code == 302 142 | assert {_, ^url} = Enum.find(resp.headers, fn {h, _} -> h == "location" end) 143 | assert url == resp.body 144 | end 145 | end 146 | end 147 | 148 | -------------------------------------------------------------------------------- /shortener/test/03c_aggregates_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Shortener.AggregatesTest do 2 | use ExUnit.Case, async: false 3 | 4 | import Shortener.TestUtils 5 | 6 | alias Shortener.{ 7 | Aggregates, 8 | Cluster, 9 | LinkManager.Cache, 10 | Storage, 11 | } 12 | 13 | setup_all do 14 | System.put_env("PORT", "4000") 15 | System.put_env("SHORTENER1_PORT", "4001") 16 | System.put_env("SHORTENER2_PORT", "4002") 17 | System.put_env("SHORTENER3_PORT", "4003") 18 | 19 | Application.ensure_all_started(:shortener) 20 | nodes = LocalCluster.start_nodes("shortener", 3) 21 | Cluster.set_canonical_nodes([Node.self() | nodes]) 22 | Cluster.update_ring() 23 | 24 | for node <- nodes do 25 | :rpc.call(node, Shortener.Cluster, :update_ring, []) 26 | end 27 | 28 | {:ok, nodes: nodes} 29 | end 30 | 31 | setup do 32 | Storage.flush() 33 | GenServer.multi_call(Cache, :flush) 34 | GenServer.multi_call(Aggregates, :flush) 35 | 36 | :ok 37 | end 38 | 39 | test "aggregates can be shared across nodes", %{nodes: nodes} do 40 | [n1, n2, n3] = nodes 41 | Aggregates.increment("outlaws") 42 | 43 | eventually(fn -> 44 | assert Aggregates.count_for("outlaws") == 1 45 | assert :rpc.call(n1, Aggregates, :count_for, ["outlaws"]) == 1 46 | assert :rpc.call(n2, Aggregates, :count_for, ["outlaws"]) == 1 47 | assert :rpc.call(n3, Aggregates, :count_for, ["outlaws"]) == 1 48 | end) 49 | end 50 | 51 | test "aggregates recover from netsplits", %{nodes: nodes} do 52 | [n1, n2, n3] = nodes 53 | 54 | Schism.partition([n1, n2]) 55 | 56 | # These will be unseen by 1 and 2 57 | Aggregates.increment({Aggregates, n3}, "outlaws") 58 | Aggregates.increment({Aggregates, n3}, "outlaws") 59 | Aggregates.increment({Aggregates, n3}, "outlaws") 60 | 61 | # These will be unseen by 3 62 | Aggregates.increment({Aggregates, n1}, "outlaws") 63 | Aggregates.increment({Aggregates, n1}, "outlaws") 64 | Aggregates.increment({Aggregates, n2}, "outlaws") 65 | 66 | eventually(fn -> 67 | assert :rpc.call(n1, Aggregates, :count_for, ["outlaws"]) == 3 68 | assert :rpc.call(n2, Aggregates, :count_for, ["outlaws"]) == 3 69 | assert :rpc.call(n3, Aggregates, :count_for, ["outlaws"]) == 3 70 | end) 71 | 72 | Schism.heal([n1, n2, n3]) 73 | 74 | # Healing should trigger each node to merge their respective counters 75 | eventually(fn -> 76 | assert :rpc.call(n1, Aggregates, :count_for, ["outlaws"]) == 6 77 | assert :rpc.call(n2, Aggregates, :count_for, ["outlaws"]) == 6 78 | assert :rpc.call(n3, Aggregates, :count_for, ["outlaws"]) == 6 79 | end) 80 | end 81 | 82 | test "aggregates are shared across nodes", %{nodes: nodes} do 83 | [n1, n2, n3] = nodes 84 | resp = post("http://localhost:4001", %{"url" => "https://keathley.io"}) 85 | assert resp.status_code == 201 86 | assert {_, short_link} = Enum.find(resp.headers, fn {h, _} -> h == "location" end) 87 | 88 | resp = get(short_link) 89 | assert resp.status_code == 302 90 | 91 | hash = URI.parse(short_link).path 92 | 93 | # Check all nodes for values 94 | eventually(fn -> 95 | resp = get("http://localhost:4001" <> hash <> "/aggregates") 96 | assert resp.body == "Redirects: 1" 97 | end) 98 | 99 | eventually(fn -> 100 | resp = get("http://localhost:4002" <> hash <> "/aggregates") 101 | assert resp.body == "Redirects: 1" 102 | end) 103 | 104 | eventually(fn -> 105 | resp = get("http://localhost:4003" <> hash <> "/aggregates") 106 | assert resp.body == "Redirects: 1" 107 | end) 108 | 109 | Schism.partition([n1, n2]) 110 | 111 | resp = get("http://localhost:4001" <> hash) 112 | assert resp.status_code == 302 113 | 114 | # Only n1 and n2 should be updated. n3 should have the old values 115 | 116 | eventually(fn -> 117 | resp = get("http://localhost:4001" <> hash <> "/aggregates") 118 | assert resp.body == "Redirects: 2" 119 | end) 120 | 121 | eventually(fn -> 122 | resp = get("http://localhost:4002" <> hash <> "/aggregates") 123 | assert resp.body == "Redirects: 2" 124 | end) 125 | 126 | eventually(fn -> 127 | resp = get("http://localhost:4003" <> hash <> "/aggregates") 128 | assert resp.body == "Redirects: 1" 129 | end) 130 | 131 | resp = get("http://localhost:4002" <> hash) 132 | assert resp.status_code == 302 133 | 134 | resp = get("http://localhost:4001" <> hash) 135 | assert resp.status_code == 302 136 | 137 | resp = get("http://localhost:4003" <> hash) 138 | assert resp.status_code == 302 139 | 140 | eventually(fn -> 141 | resp = get("http://localhost:4001" <> hash <> "/aggregates") 142 | assert resp.body == "Redirects: 4" 143 | end) 144 | 145 | eventually(fn -> 146 | resp = get("http://localhost:4002" <> hash <> "/aggregates") 147 | assert resp.body == "Redirects: 4" 148 | end) 149 | 150 | eventually(fn -> 151 | resp = get("http://localhost:4003" <> hash <> "/aggregates") 152 | assert resp.body == "Redirects: 2" 153 | end) 154 | 155 | Schism.heal([n1, n2, n3]) 156 | 157 | eventually(fn -> 158 | resp = get("http://localhost:4001" <> hash <> "/aggregates") 159 | assert resp.body == "Redirects: 5" 160 | end) 161 | 162 | eventually(fn -> 163 | resp = get("http://localhost:4002" <> hash <> "/aggregates") 164 | assert resp.body == "Redirects: 5" 165 | end) 166 | 167 | eventually(fn -> 168 | resp = get("http://localhost:4003" <> hash <> "/aggregates") 169 | assert resp.body == "Redirects: 5" 170 | end) 171 | end 172 | end 173 | 174 | --------------------------------------------------------------------------------