├── rebar ├── .gitignore ├── rebar.config ├── test ├── tags └── ballermann_tests.erl ├── src ├── ballermann.app.src ├── ballermann_app.erl ├── ballermann_sup.erl └── ballermann.erl └── README.md /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/odo/ballermann/HEAD/rebar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | ebin 4 | src/tags 5 | test/tags 6 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {deps, [ 2 | {meck, "", {git, "https://github.com/eproxus/meck.git", {branch, "develop"}}} 3 | ]}. 4 | 5 | -------------------------------------------------------------------------------- /test/tags: -------------------------------------------------------------------------------- 1 | handle_info ballermann_tests.erl /^ ballermann:handle_info({'DOWN', x, x, 2, x/ 2 | init ballermann_tests.erl /^ ballermann:init({supervisor, 0.75})$/ 3 | -------------------------------------------------------------------------------- /src/ballermann.app.src: -------------------------------------------------------------------------------- 1 | {application, ballermann, 2 | [ 3 | {description, ""}, 4 | {vsn, "1.6.0"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {mod, { ballermann_app, []}}, 11 | {env, []} 12 | ]}. 13 | -------------------------------------------------------------------------------- /src/ballermann_app.erl: -------------------------------------------------------------------------------- 1 | -module(ballermann_app). 2 | 3 | -behaviour(application). 4 | 5 | %% Application callbacks 6 | -export([start/2, stop/1]). 7 | 8 | %% =================================================================== 9 | %% Application callbacks 10 | %% =================================================================== 11 | 12 | start(_StartType, _StartArgs) -> 13 | ballermann_sup:start_link(). 14 | 15 | stop(_State) -> 16 | ok. 17 | -------------------------------------------------------------------------------- /src/ballermann_sup.erl: -------------------------------------------------------------------------------- 1 | -module(ballermann_sup). 2 | -behaviour(supervisor). 3 | 4 | %% API 5 | -export ([start_link/2, start_link/3]). 6 | 7 | %% Callbacks 8 | -export ([init/1]). 9 | 10 | -define(MIN_ALIVE_RATIO, 0.8). 11 | 12 | start_link(Supervisor, ServerName) -> 13 | start_link(Supervisor, ServerName, ?MIN_ALIVE_RATIO). 14 | 15 | start_link(Supervisor, ServerName, MinAliveRatio) -> 16 | supervisor:start_link({local, supervisor_name(ServerName)}, ?MODULE, {Supervisor, ServerName, MinAliveRatio}). 17 | 18 | init({Supervisor, ServerName, MinAliveRatio}) -> 19 | Server = {ServerName, {ballermann, start_link, [Supervisor, ServerName, MinAliveRatio]}, 20 | permanent, 1000, worker, [ballermann]}, 21 | Children = [Server], 22 | RestartStrategy = {one_for_one, 10, 1}, 23 | {ok, {RestartStrategy, Children}}. 24 | 25 | supervisor_name(ServerName) -> 26 | list_to_atom(string:concat(atom_to_list(ServerName), "_sub")). -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | For an improved implementation of the same idea, see [revolver](https://github.com/odo/revolver). 2 | 3 | Ballermann 4 | ===== 5 | 6 | Ballermann (German for "the one who drinks from the spring, gently") is a tool for load balancing between Erlang processes in a round-robin fashion. 7 | 8 | Comapred to poolboy (https://github.com/devinus/poolboy) ballermann is much simpler since it has no concept of a lease. 9 | It's designed for the fast paralellization of equally sized tasks. 10 | 11 | Building 12 | -------- 13 | 14 | ``` 15 | git clone git://github.com/odo/ballermann.git 16 | cd ballermann 17 | ./rebar get-deps compile 18 | ``` 19 | 20 | Performance 21 | -------- 22 | 23 | On modern hardware, ballermann can hand out more than hundred thousand pids per second. So it will add an overhead < 10 microseconds to each call. 24 | 25 | Usage 26 | -------- 27 | 28 | The idea is to balance between processes owned by a supervisor. In this example we use the sasl supervisor (```sasl_sup```) which has two sub-processes. In a typical setting, you would balance between a large set of identical gen_server processes. 29 | 30 | ``` 31 | 1> application:start(sasl). 32 | ok 33 | 2> ballermann:balance(sasl_sup, sasl_pool). 34 | {ok,<0.43.0>} 35 | 3> ballermann:pid(sasl_pool). 36 | <0.40.0> 37 | 4> ballermann:pid(sasl_pool). 38 | <0.37.0> 39 | 5> ballermann:pid(sasl_pool). 40 | <0.40.0> 41 | 6> ballermann:pid(sasl_pool). 42 | <0.37.0> 43 | ``` 44 | 45 | If you want to get funky, you can specify when the pool is refreshed, meaning when the supervisor is asked for it's children. 46 | You do so by specifying a ratio. If the number of alive processes in the pool in relation to the original number drops below this ratio, ballermann turns to the supervisor and asks for new processes. 47 | The default is 0.8 (80 %). 48 | 49 | ``` 50 | ballermann:balance(sasl_sup, sasl_pool2, 0.9). 51 | ``` 52 | 53 | Sometimes, when the processes behind ballermann have side effects, you want ballermann to stop handing out pids and perform some cleanup. This can be achieved using apply_within(ServerName, {Module, Function, Args, WaitTimeOrFun}). When you provide a numeric value, ballermann will wait for that number of milliseconds. If you provide a fun with arity 1, ballermann will call that function with each pid in the pool as an argument: 54 | 55 | ``` 56 | 1> application:start(sasl). 57 | ok 58 | 2> ballermann:balance(sasl_sup, sasl_pool). 59 | {ok,<0.43.0>} 60 | 3> ballermann:apply_within(sasl_pool, {lists, reverse, [[1, 2]]}, 1000). 61 | [2,1] 62 | 4> ballermann:apply_within(sasl_pool, {lists, reverse, [[1, 2]]}, fun(P) -> P end). 63 | [2,1] 64 | ``` 65 | 66 | If you have a pool of gen_servers running and you want to make sure that they are all idle before you do your maintenance, the call would more likely look like this: 67 | 68 | ``` 69 | ballermann:apply_within(server_pool, {my_cleanup_module, cleanup, [now]}, fun(Pid) -> gen_server:call(Pid, {ping}) end). 70 | ``` 71 | 72 | 73 | 74 | Tests 75 | -------- 76 | 77 | ```./rebar eunit skip_deps=true``` 78 | -------------------------------------------------------------------------------- /test/ballermann_tests.erl: -------------------------------------------------------------------------------- 1 | -module(ballermann_tests). 2 | 3 | -ifdef(TEST). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | ballermann_test_() -> 7 | [{foreach, local, 8 | fun test_setup/0, 9 | fun test_teardown/1, 10 | [ 11 | fun test_balance/0, 12 | fun test_apply_within/0, 13 | fun test_no_supervisor_init/0, 14 | fun test_no_children_init/0, 15 | fun test_no_children/0, 16 | fun test_no_supervisor/0, 17 | fun test_exit/0 18 | ]} 19 | ]. 20 | 21 | test_setup() -> 22 | application:start(sasl), 23 | meck:new(ballermann, [unstick, passthrough]). 24 | 25 | test_teardown(_) -> 26 | meck:unload(ballermann). 27 | 28 | test_balance() -> 29 | meck:expect(ballermann, child_pids, fun(_) -> [1, 2, 3] end), 30 | {ok, StateInit} = ballermann:init({supervisor, 0.75}), 31 | {reply, Pid1, State1} = ballermann:handle_call({pid}, x, StateInit), 32 | ?assertEqual(1, Pid1), 33 | {reply, Pid2, State2} = ballermann:handle_call({pid}, x, State1), 34 | ?assertEqual(2, Pid2), 35 | {reply, Pid3, State3} = ballermann:handle_call({pid}, x, State2), 36 | ?assertEqual(3, Pid3), 37 | {reply, Pid4, _State4} = ballermann:handle_call({pid}, x, State3), 38 | ?assertEqual(1, Pid4). 39 | 40 | test_apply_within() -> 41 | meck:expect(ballermann, child_pids, fun(_) -> [1, 2, 3] end), 42 | {ok, StateInit} = ballermann:init({supervisor, 0.75}), 43 | {reply, Reply, StateInit} = ballermann:handle_call({apply_within, {lists, reverse, [[1, 2]]}, 0}, x, StateInit), 44 | ?assertEqual([2, 1], Reply), 45 | {reply, Reply2, StateInit} = ballermann:handle_call({apply_within, {lists, reverse, [[2, 1]]}, 100}, x, StateInit), 46 | ?assertEqual([1, 2], Reply2), 47 | {reply, Reply3, StateInit} = ballermann:handle_call({apply_within, {lists, reverse, [[2, 3]]}, fun(P) -> P end}, x, StateInit), 48 | ?assertEqual([3, 2], Reply3). 49 | 50 | test_no_supervisor_init() -> 51 | Error = 52 | try 53 | ballermann:init({supervisor, 0.75}) 54 | catch 55 | exit:Reason -> Reason 56 | end, 57 | ?assertEqual({error, supervisor_not_running}, Error). 58 | 59 | test_no_children_init() -> 60 | meck:expect(ballermann, child_pids, fun(_) -> [] end), 61 | Error = 62 | try 63 | ballermann:init({supervisor, 0.75}) 64 | catch 65 | exit:Reason -> Reason 66 | end, 67 | ?assertEqual({error, supervisor_has_no_children}, Error). 68 | 69 | test_no_children() -> 70 | meck:expect(ballermann, child_pids, fun(_) -> [1, 2] end), 71 | {ok, StateInit} = ballermann:init({supervisor, 0.75}), 72 | meck:expect(ballermann, child_pids, fun(_) -> [] end), 73 | Error = 74 | try 75 | {noreply, StateDown} = 76 | ballermann:handle_info({'DOWN', x, x, 1, x}, StateInit), 77 | ballermann:handle_info({'DOWN', x, x, 2, x}, StateDown) 78 | catch 79 | exit:Reason -> Reason 80 | end, 81 | ?assertEqual({error, supervisor_has_no_children}, Error). 82 | 83 | test_no_supervisor() -> 84 | meck:expect(ballermann, child_pids, fun(_) -> [1, 2] end), 85 | {ok, StateInit} = ballermann:init({supervisor, 0.75}), 86 | meck:expect(ballermann, child_pids, fun(Arg) -> ballermann_meck_original:child_pids(Arg) end), 87 | Error = 88 | try 89 | ballermann:handle_info({'DOWN', x, x, 1, x}, StateInit) 90 | catch 91 | exit:Reason -> Reason 92 | end, 93 | ?assertEqual({error, supervisor_not_running}, Error). 94 | 95 | test_exit() -> 96 | meck:expect(ballermann, child_pids, fun(_) -> [1,2,3] end), 97 | {ok, StateInit} = ballermann:init({supervisor, 0.75}), 98 | meck:expect(ballermann, child_pids, fun(_) -> [1,3] end), 99 | {noreply, StateDown} = ballermann:handle_info({'DOWN', x, x, 2, x}, StateInit), 100 | {reply, Pid1, State1} = ballermann:handle_call({pid}, x, StateDown), 101 | ?assertEqual(1, Pid1), 102 | {reply, Pid2, State2} = ballermann:handle_call({pid}, x, State1), 103 | ?assertEqual(3, Pid2), 104 | {reply, Pid3, State3} = ballermann:handle_call({pid}, x, State2), 105 | ?assertEqual(1, Pid3), 106 | meck:expect(ballermann, child_pids, fun(_) -> [1] end), 107 | {noreply, StateDown2} = ballermann:handle_info({'DOWN', x, x, 3, x}, State3), 108 | {reply, Pid4, State4} = ballermann:handle_call({pid}, x, StateDown2), 109 | ?assertEqual(1, Pid4), 110 | {reply, Pid5, _State5} = ballermann:handle_call({pid}, x, State4), 111 | ?assertEqual(1, Pid5). 112 | 113 | -endif. 114 | 115 | -------------------------------------------------------------------------------- /src/ballermann.erl: -------------------------------------------------------------------------------- 1 | -module (ballermann). 2 | 3 | -ifdef(TEST). 4 | -compile([export_all]). 5 | -endif. 6 | 7 | -behaviour (gen_server). 8 | 9 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). 10 | -export([balance/2, balance/3, apply_within/2, apply_within/3, start_link/3, pid/1, child_pids/1]). 11 | 12 | -type sup_ref() :: {atom(), atom()}. 13 | 14 | -record(state, { 15 | supervisor :: sup_ref(), 16 | pid_table :: atom(), 17 | last_pid :: pid(), 18 | pids_count_original :: integer(), 19 | min_alive_ratio :: float() 20 | }). 21 | 22 | start_link(Supervisor, ServerName, MinAliveRatio) -> 23 | gen_server:start_link({local, ServerName}, ?MODULE, {Supervisor, MinAliveRatio}, []). 24 | 25 | balance(Supervisor, BalancerName) -> 26 | ballermann_sup:start_link(Supervisor, BalancerName). 27 | 28 | balance(Supervisor, BalancerName, MinAliveRatio) -> 29 | ballermann_sup:start_link(Supervisor, BalancerName, MinAliveRatio). 30 | 31 | pid(ServerName) -> 32 | gen_server:call(ServerName, {pid}). 33 | 34 | apply_within(ServerName, {Module, Function, Args}) -> 35 | apply_within(ServerName, {Module, Function, Args}, 0). 36 | 37 | apply_within(ServerName, {Module, Function, Args}, WaitTimeOrFun) -> 38 | gen_server:call(ServerName, {apply_within, {Module, Function, Args}, WaitTimeOrFun}). 39 | 40 | init({Supervisor, MinAliveRatio}) -> 41 | PidTable = ets:new(pid_table, [private, duplicate_bag]), 42 | State = #state{ 43 | supervisor = Supervisor, 44 | pids_count_original = undefined, 45 | min_alive_ratio = MinAliveRatio, 46 | pid_table = PidTable, 47 | last_pid = undefined}, 48 | gen_server:cast(self(), add_missing_pids), 49 | {ok, State}. 50 | 51 | handle_call({pid}, _From, State = #state{last_pid = LastPid, pid_table = PidTable}) -> 52 | Pid = case ets:next(PidTable, LastPid) of 53 | '$end_of_table' -> 54 | ets:first(PidTable); 55 | Value -> 56 | Value 57 | end, 58 | {reply, Pid, State#state{last_pid = Pid}}; 59 | 60 | handle_call({apply_within, {Module, Function, Args}, WaitTimeOrFun}, _From, State = #state{supervisor = Supervisor}) -> 61 | case WaitTimeOrFun of 62 | WaitTime when is_float(WaitTime) orelse is_integer(WaitTime) -> 63 | timer:sleep(WaitTime); 64 | Fun when is_function(Fun) -> 65 | [Fun(Pid) || Pid <- ?MODULE:child_pids(Supervisor)] 66 | end, 67 | Reply = apply(Module, Function, Args), 68 | {reply, Reply, State}. 69 | 70 | handle_cast(add_missing_pids, State = #state{ supervisor = Supervisor, pid_table = PidTable }) -> 71 | add_missing_pids(PidTable, Supervisor), 72 | FirstPid = ets:first(PidTable), 73 | TableSize = table_size(PidTable), 74 | {noreply, State#state{ last_pid = FirstPid, pids_count_original = TableSize }}. 75 | 76 | handle_info({'DOWN', _, _, Pid, _}, State = #state{supervisor = Supervisor, last_pid = LastPid, pid_table = PidTable, pids_count_original = PidsCountOriginal, min_alive_ratio = MinAliveRatio}) -> 77 | error_logger:info_msg("~p: The process ~p (child of ~p) died.\n", [?MODULE, Pid, Supervisor]), 78 | ets:delete(PidTable, Pid), 79 | case too_few_pids(PidTable, PidsCountOriginal, MinAliveRatio) of 80 | true -> 81 | error_logger:warning_msg("~p: Reloading children from supervisor ~p.\n", [?MODULE, Supervisor]), 82 | add_missing_pids(PidTable, Supervisor); 83 | false -> 84 | noop 85 | end, 86 | % Pick a valid LastPid, the recent one might be the one which just died. 87 | LastPidSave = case LastPid of 88 | Pid -> 89 | ets:first(PidTable); 90 | _ -> 91 | LastPid 92 | end, 93 | {noreply, State#state{last_pid = LastPidSave}}. 94 | 95 | terminate(_Reason, _State) -> ok. 96 | 97 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 98 | 99 | check_zero_pids(PidTable, Supervisor) -> 100 | case table_size(PidTable) of 101 | 0 -> 102 | error_logger:error_msg("~p: Supervisor ~p has no children. Giving up.\n", [?MODULE, Supervisor]), 103 | exit({error, supervisor_has_no_children}); 104 | _ -> noop 105 | end. 106 | 107 | too_few_pids(PidTable, PidsCountOriginal, MinAliveRatio) -> 108 | table_size(PidTable) / PidsCountOriginal < MinAliveRatio. 109 | 110 | add_missing_pids(Table, Supervisor) -> 111 | Pids = ?MODULE:child_pids(Supervisor), 112 | PidsNew = lists:filter(fun(E) -> ets:lookup(Table, E) =:= [] end, Pids), 113 | error_logger:info_msg("~p: Found ~p new processes of ~p total.\n", [?MODULE, length(PidsNew), length(Pids)]), 114 | PidsWithRefs = [{Pid, {monitor(Pid)}}|| Pid <- PidsNew], 115 | ets:insert(Table, PidsWithRefs), 116 | check_zero_pids(Table, Supervisor). 117 | 118 | -ifdef(TEST). 119 | monitor(_) -> ok. 120 | -else. 121 | monitor(Pid) -> erlang:monitor(process, Pid). 122 | -endif. 123 | 124 | child_pids(Supervisor) -> 125 | case alive(Supervisor) of 126 | false -> 127 | error_logger:error_msg("~p Supervisor ~p not running. Giving up.\n", [?MODULE, Supervisor]), 128 | exit({error, supervisor_not_running}); 129 | _ -> 130 | [ Pid || {_, Pid, _, _} <- supervisor:which_children(Supervisor), is_pid(Pid)] 131 | end. 132 | 133 | alive(undefined) -> 134 | false; 135 | alive(Supervisor) when is_atom(Supervisor) -> 136 | alive(erlang:whereis(Supervisor)); 137 | alive(Supervisor) when is_pid(Supervisor) -> 138 | erlang:is_process_alive(Supervisor). 139 | 140 | table_size(Table) -> 141 | {size, Count} = proplists:lookup(size, ets:info(Table)), 142 | Count. 143 | --------------------------------------------------------------------------------