├── Emakefile ├── README ├── ebin └── distreg.app └── src ├── distreg.app.src ├── distreg.erl ├── distreg.hrl ├── distreg_sup.erl ├── distreg_tracker.erl └── distreg_util.erl /Emakefile: -------------------------------------------------------------------------------- 1 | {['src/*'],[{outdir,"ebin"}]}. 2 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Distreg is a distributed process registry. Its main goal is to spread named worker processes (workers) evenly across an Erlang cluster using consistent hashing. It is designed to keep inter-node communication at a minimum. Only one instance of a worker identified by name is alowed to run. 2 | 3 | Every worker is registered by its name and hashed to determine which node in the cluster it belongs to. Every node in the cluster keeps track of its own workers. Registering a worker on the local node is very fast and does not require any inter-node communication. When you need to call a worker on another node, distreg will determine which node it should be running on and call it on that node. 4 | 5 | Distreg will handle nodes leaving or joining as well as any conflicts. Whenever the cluster membership changes, distreg recalculates the consistent hashing ring and scans all local registered processes if they need to be moved to another node. If the node where a worker should be running has changed, the worker will be informed that it should restart itself on another node and it will be registered on the node where it should be running. Worker does not need to restart/die immediately, because both nodes will keep track of it. In the case of a conflict (more than one instance of a worker is running), all worker instances on the "wrong" nodes will be informed that they should die immediately and will be unregistered immediately as well. 6 | Messages a worker might receive from distreg: 7 | {distreg,shouldrestart} 8 | {distreg,dienow} 9 | 10 | Similar functionality exists in the global module. Global however only keeps multiple processes from being registered with the same name. It is also quite slow since it syncs the process table across all nodes for every registration. Gproc is another alternative, but it does not handle dynamic cluster reconfigurations and though it is much faster than global, it would still be much slower than distreg for the use case distreg was meant for. 11 | 12 | Distreg does not need to be running on an erlang cluster. It can just be a very fast and simple local process registry. 13 | 14 | If a node in the cluster is not running distreg, it will be ignored. 15 | 16 | -------------------------------------------------------------------------------- /ebin/distreg.app: -------------------------------------------------------------------------------- 1 | {application,distreg, 2 | [{description,"Distributed process registry."}, 3 | {vsn,"0.1"}, 4 | {modules,[distreg,distreg_sup,distreg_tracker,distreg_util]}, 5 | {registered,[distreg_sup,distreg_tracker]}, 6 | {applications,[kernel,stdlib]}, 7 | {mod,{distreg_sup,[]}}, 8 | {start_phases,[]}]}. 9 | -------------------------------------------------------------------------------- /src/distreg.app.src: -------------------------------------------------------------------------------- 1 | {application, distreg, [{description, "Distributed process registry."}, 2 | {vsn, "0.1"}, 3 | {modules, [distreg_sup,distreg_tracker,distreg_util,distreg]}, 4 | {registered, [distreg_sup,distreg_tracker]}, 5 | {applications, [kernel, stdlib]}, 6 | {mod, {distreg_sup, []}}, 7 | {start_phases, []} 8 | ]}. 9 | -------------------------------------------------------------------------------- /src/distreg.erl: -------------------------------------------------------------------------------- 1 | -module(distreg). 2 | -define(CALL(Msg),gen_server:call(distreg_tracker,Msg,infinity)). 3 | -define(CAST(Msg),gen_server:cast(distreg_tracker,Msg)). 4 | -export([reg/1,reg/2,unreg/1,track/1, whereis/1, 5 | call/2,cast/2,inform/2,start/2, 6 | procinfo/1,node_for_name/1,processes/0, 7 | % Used internally do not call from client. 8 | node_for_hash/2]). 9 | -include("distreg.hrl"). 10 | % -define(NOTEST, 1). 11 | -include_lib("eunit/include/eunit.hrl"). 12 | -compile(export_all). 13 | 14 | 15 | 16 | 17 | procinfo(Pid) -> 18 | case ets:lookup(?PIDT,Pid) of 19 | [{Pid,L}] -> 20 | L; 21 | _ -> 22 | undefined 23 | end. 24 | 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | % 27 | % Distributed worker API functions. 28 | % 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 | % Name: term 31 | % StartFunction: {Module,Func,Args} | {Module,Func} | {Fun,Param} | Fun/0 32 | % StartFunctionReturn: {ok,Pid} | Pid | anything else for error 33 | start(Name,StartFunction) -> 34 | case get_g_pid(Name) of 35 | undefined -> 36 | Node = node_for_name(Name), 37 | case Node == node() of 38 | true -> 39 | case callstart(StartFunction) of 40 | {ok,Pid} when is_pid(Pid) -> 41 | reg_global(Pid,Name,StartFunction); 42 | Pid when is_pid(Pid) -> 43 | reg_global(Pid,Name,StartFunction); 44 | Err -> 45 | Err 46 | end; 47 | false -> 48 | rpc:call(Node,?MODULE,start,[Name,StartFunction]) 49 | end; 50 | Pid -> 51 | {ok,Pid} 52 | end. 53 | 54 | 55 | call(Name,Msg) -> 56 | call(Name,Msg,infinity). 57 | call(Name,Msg,Timeout) -> 58 | case get_g_pid(Name) of 59 | undefined -> 60 | Node = node_for_name(Name), 61 | case Node == node() of 62 | true -> 63 | {error,worker_not_found}; 64 | false -> 65 | rpc:call(Node,distreg,call,[Name,Msg,Timeout],Timeout) 66 | end; 67 | Pid -> 68 | gen_server:call(Pid,Msg,Timeout) 69 | end. 70 | 71 | cast(Name,Msg) -> 72 | case get_g_pid(Name) of 73 | undefined -> 74 | Node = node_for_name(Name), 75 | case Node == node() of 76 | true -> 77 | {error,worker_not_found}; 78 | false -> 79 | rpc:cast(Node,distreg,cast,[Name,Msg]) 80 | end; 81 | Pid -> 82 | gen_server:cast(Pid,Msg) 83 | end. 84 | 85 | inform(Name,Msg) -> 86 | case get_g_pid(Name) of 87 | undefined -> 88 | Node = node_for_name(Name), 89 | case Node == node() of 90 | true -> 91 | {error,worker_not_found}; 92 | false -> 93 | rpc:cast(Node,distreg,inform,[Name,Msg]) 94 | end; 95 | Pid -> 96 | Pid ! Msg 97 | end. 98 | 99 | 100 | get_g_pid(Name) -> 101 | case ets:lookup(?NAMET_GLOBAL,Name) of 102 | [{Name,Pid}] -> 103 | Pid; 104 | _ -> 105 | undefined 106 | end. 107 | 108 | node_for_name(Name) -> 109 | case ets:lookup(?PIDT,nodes) of 110 | [{nodes,Nodes}] -> 111 | node_for_hash(erlang:phash2(Name),Nodes); 112 | _ -> 113 | undefined 114 | end. 115 | node_for_hash(HName,Nodes) -> 116 | Range = ?MAX_HASH div tuple_size(Nodes), 117 | NodePos = HName div Range + 1, 118 | element(min(tuple_size(Nodes),NodePos),Nodes). 119 | 120 | 121 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 122 | % 123 | % Functions for processes that run only on local node. 124 | % 125 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 126 | % Name = term 127 | % Result: ok | already_named | name_exists 128 | reg(Name) -> 129 | reg(self(),Name). 130 | reg(Pid,Name) -> 131 | (catch ?CALL({register,Pid,Name})). 132 | 133 | unreg(PidOrName) -> 134 | (catch ?CALL({unregister,PidOrName})). 135 | 136 | whereis(Name) -> 137 | case ets:lookup(?NAMET,Name) of 138 | [{Name,Pid}] -> 139 | Pid; 140 | _ -> 141 | undefined 142 | end. 143 | 144 | % Saves pid to table, but does not set any name. 145 | track(Pid) -> 146 | ?CALL({track,Pid}). 147 | 148 | processes() -> 149 | ets:tab2list(?NAMET). 150 | 151 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 152 | % 153 | % Unexported utility functions 154 | % 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | reg_global(Pid,Name,StartFunction) -> 157 | case ?CALL({register_global,Pid,Name}) of 158 | ok -> 159 | {ok,Pid}; 160 | % Name already registered, kill this pid. Calling start again will return existing PID under this name. 161 | name_exists -> 162 | exit(Pid,kill), 163 | start(Name,StartFunction) 164 | end. 165 | callstart({Mod,Fun,Param}) -> 166 | apply(Mod,Fun,Param); 167 | callstart({Fun,Param}) when is_function(Fun) -> 168 | apply(Fun,Param); 169 | callstart({Mod,Fun}) -> 170 | apply(Mod,Fun,[]); 171 | callstart(Fun) when is_function(Fun) -> 172 | Fun(). 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -ifdef(TEST). 182 | 183 | -define(SLAVE1,'slave1@127.0.0.1'). 184 | -define(SLAVE2,'slave2@127.0.0.1'). 185 | 186 | startapp() -> 187 | application:start(distreg). 188 | startapp(Node) -> 189 | {_,_,Path} = code:get_object_code(?MODULE), 190 | rpc:call(Node,code,add_path,[filename:dirname(Path)]), 191 | rpc:call(Node,application,start,[distreg]), 192 | timer:sleep(500), 193 | ok. 194 | stopapp(_) -> 195 | application:stop(distreg). 196 | 197 | startglobal() -> 198 | startapp(), 199 | case node() == 'nonode@nohost' of 200 | true -> 201 | net_kernel:start(['master@127.0.0.1',longnames]); 202 | _ -> 203 | ok 204 | end, 205 | Cookie = erlang:get_cookie(), 206 | ExNodes = nodes(), 207 | slave:start_link('127.0.0.1',slave1,"-setcookie "++atom_to_list(Cookie)), 208 | slave:start_link('127.0.0.1',slave2,"-setcookie "++atom_to_list(Cookie)), 209 | slave:start_link('127.0.0.1',slavedummy,"-setcookie "++atom_to_list(Cookie)), 210 | timer:sleep(1000), 211 | ['slavedummy@127.0.0.1'|ExNodes]. 212 | stopglobal(_) -> 213 | stopapp(ok). 214 | 215 | local_test_() -> 216 | {setup, 217 | fun startapp/0, 218 | fun stopapp/1, 219 | fun localtests/1}. 220 | 221 | global_test_() -> 222 | {setup, 223 | fun startglobal/0, 224 | fun stopglobal/1, 225 | fun globaltests/1}. 226 | 227 | 228 | 229 | localtests(_) -> 230 | [?_assertEqual(ok,reg(asdf)), 231 | ?_assertEqual(self(),?MODULE:whereis(asdf)), 232 | ?_assertEqual([{asdf,self()}],ets:tab2list(?NAMET)), 233 | ?_assertEqual(ok,unreg(asdf)), 234 | ?_assertEqual([],ets:tab2list(?NAMET)), 235 | ?_assertEqual({self(),[]},lists:keyfind(self(),1,ets:tab2list(?PIDT)))]. 236 | 237 | globaltests(IgnoredNodes) -> 238 | % First compare ignored nodes, master should be the only one running distreg 239 | [?_assertEqual(lists:sort([?SLAVE1,?SLAVE2|IgnoredNodes]),lists:sort(?CALL(ignored_nodes))), 240 | ?_assertEqual(node(),node_for_name(1)), 241 | % Start distreg on first slave 242 | ?_assertEqual(ok,startapp(?SLAVE1)), 243 | % Slave1 no longer in ignored 244 | ?_assertEqual(lists:sort([?SLAVE2|IgnoredNodes]),lists:sort(?CALL(ignored_nodes))), 245 | ?_assertEqual(node(),node_for_name(1)), 246 | ?_assertEqual(node(),node_for_name(2)), 247 | ?_assertEqual(?SLAVE1,node_for_name(3)), 248 | % Start 100 processes 249 | % Check they are all spread across 2 nodes 250 | % Add a node, causing processes to restart themselves 251 | % Check they are spread across 3 nodes. 252 | ?_assertEqual(ok,test_distproc()), 253 | ?_assertEqual(ok,startapp(?SLAVE2)), 254 | ?_assertEqual(lists:sort(IgnoredNodes),lists:sort(?CALL(ignored_nodes))), 255 | fun test_check_rebalance/0 256 | ]. 257 | 258 | test_distproc() -> 259 | L = [start_test_proc(N) || N <- lists:seq(1,100)], 260 | [?assertEqual(node(Pid),Node) || {_N,Pid,Node} <- L], 261 | [{_,AL},{_,BL}] = test_group(L,[]), 262 | io:format(user,"Distribution ~p~n", [{length(AL),length(BL)}]), 263 | ?assert(length(AL) >= 30 andalso 70 >= length(AL)), 264 | ?assert(length(BL) >= 30 andalso 70 >= length(BL)), 265 | ok. 266 | % Workers now running over three nodes 267 | test_check_rebalance() -> 268 | timer:sleep(2000), 269 | [?assertEqual(node_for_name(N),node(test_get_pid(N))) || N <- lists:seq(1,100)], 270 | L = [{N,test_get_pid(N),node_for_name(N)} || N <- lists:seq(1,100)], 271 | [{_,AL},{_,BL},{_,CL}] = test_group(L,[]), 272 | io:format(user,"Distribution ~p~n", [{length(AL),length(BL),length(CL)}]), 273 | ?assert(length(AL) >= 25 andalso 45 >= length(AL)), 274 | ?assert(length(BL) >= 25 andalso 45 >= length(BL)), 275 | ?assert(length(CL) >= 25 andalso 45 >= length(CL)), 276 | ok. 277 | 278 | test_get_pid(Name) -> 279 | case get_g_pid(Name) of 280 | undefined -> 281 | Node = node_for_name(Name), 282 | rpc:call(Node,?MODULE,get_g_pid,[Name]); 283 | Pid -> 284 | Pid 285 | end. 286 | 287 | test_group([{N,Pid,Node}|T],L) -> 288 | case lists:keyfind(Node,1,L) of 289 | false -> 290 | test_group(T,[{Node,[{N,Pid}]}|L]); 291 | {Node,KL} -> 292 | test_group(T,[{Node,[{N,Pid}|KL]}|lists:keydelete(Node,1,L)]) 293 | end; 294 | test_group([],L) -> 295 | L. 296 | 297 | start_test_proc(N) -> 298 | {ok,Pid} = start(N,fun() -> spawn(?MODULE,test_proc,[self(),N]) end), 299 | {N,Pid,node_for_name(N)}. 300 | test_proc(Home,N) -> 301 | receive 302 | {distreg,shouldrestart} -> 303 | Home ! {self(),should}, 304 | spawn(fun() -> timer:sleep(100), start_test_proc(N) end); 305 | {distreg,dienow} -> 306 | Home ! {self(),dienow} 307 | end. 308 | 309 | -endif. 310 | -------------------------------------------------------------------------------- /src/distreg.hrl: -------------------------------------------------------------------------------- 1 | % Public ETS tables: 2 | 3 | % Every registered process has a value in PIDT ETS: 4 | % {Pid,[{name,Name},{nametable,[?NAMET|?NAMET_GLOBAL]},{cleanup,Function},..]} 5 | % Info about local node and tuple of all nodes (sorted by name) is stored in this ETS as well. 6 | % {nodes,{node1,node2,node3,...}} 7 | % {node_range,LocalNodePos,LocalNodeFrom,LocalNodeTo} -> workers are spread across nodes using consistent hashing with phash2 8 | -define(PIDT,distreg_pids). 9 | % {Name,Pid} 10 | -define(NAMET,distreg_names). 11 | % {Name,Pid} 12 | -define(NAMET_GLOBAL,distreg_names_global). 13 | % pow(2,27) 14 | -define(MAX_HASH,134217728). 15 | -define(LOCK,distreg_lock). -------------------------------------------------------------------------------- /src/distreg_sup.erl: -------------------------------------------------------------------------------- 1 | -module(distreg_sup). 2 | -behavior(supervisor). 3 | -export([start_link/0, init/1,start/2,stop/1]). 4 | -include("distreg.hrl"). 5 | 6 | start(_Type, _Args) -> 7 | start_link(). 8 | stop(_State) -> 9 | [exit(Pid,kill) || {Pid,_} <- ets:tab2list(?PIDT)], 10 | ok. 11 | 12 | start_link() -> 13 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 14 | 15 | init([]) -> 16 | {ok, {{one_for_one, 500, 1}, 17 | [ 18 | {distreg_tracker, 19 | {distreg_tracker, start, []}, 20 | permanent, 21 | 100, 22 | worker, 23 | [distreg_tracker]} 24 | ]}}. 25 | -------------------------------------------------------------------------------- /src/distreg_tracker.erl: -------------------------------------------------------------------------------- 1 | -module(distreg_tracker). 2 | -behaviour(gen_server). 3 | -export([start/0, stop/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). 4 | -export([print_info/0,reload/0,deser_prop/1]). 5 | -include("distreg.hrl"). 6 | 7 | % When distreg_tracker starts up, check which connected nodes are running distreg. Calc consistent hashing boundaries. 8 | % Every time a node comes online, check if it is running distreg_tracker. 9 | % If not add it to ignored nodes, but still check again in 10s. 10 | % If yes, recalc consistent hashing boundaries and scann the table of processes. 11 | % Every time a node goes offline, recalc and scan the table of processes. 12 | 13 | regpid(Pid,Name) -> 14 | regpid(Pid,Name,?NAMET). 15 | regpid(Pid,Name,Nametable) -> 16 | case ets:lookup(Nametable,Name) of 17 | [] -> 18 | case ets:lookup(?PIDT,Pid) of 19 | [{_,L}] -> 20 | ok; 21 | _ -> 22 | L = [] 23 | end, 24 | case proplists:get_value(name,L) of 25 | undefined -> 26 | ets:insert(?PIDT,{Pid,lists:keystore(name,1,lists:keystore(nametable,1,L,{nametable,Nametable}),{name,Name})}), 27 | ets:insert(Nametable,{Name,Pid}), 28 | ok; 29 | Name -> 30 | ok; 31 | _Nm -> 32 | already_named 33 | end; 34 | [{Name,Pid}] -> 35 | ok; 36 | _ -> 37 | name_exists 38 | end. 39 | 40 | unregpid(Pid) when is_pid(Pid) -> 41 | case ets:lookup(?PIDT,Pid) of 42 | [{_,L}] -> 43 | case proplists:get_value(name,L) of 44 | undefined -> 45 | ok; 46 | Name -> 47 | ets:delete(?NAMET,Name) 48 | end, 49 | ets:insert(?PIDT,{Pid,lists:keydelete(name,1,lists:keydelete(nametable,1,L))}), 50 | ok; 51 | _X -> 52 | ok 53 | end; 54 | unregpid(Name) -> 55 | case ets:lookup(?NAMET,Name) of 56 | [{_,Pid}] -> 57 | unregpid(Pid); 58 | _ -> 59 | ok 60 | end. 61 | 62 | trackpid(Pid) -> 63 | case ets:lookup(?PIDT,Pid) of 64 | [{_,_}] -> 65 | known; 66 | _ -> 67 | ets:insert(?PIDT,{Pid,[]}), 68 | ok 69 | end. 70 | 71 | regglobal(L) -> 72 | regglobal(L,[]). 73 | regglobal([{Name,Pid}|T],L) -> 74 | case regpid(Pid,Name,?NAMET_GLOBAL) of 75 | ok -> 76 | erlang:monitor(process,Pid), 77 | regglobal(T,L); 78 | already_named -> 79 | regglobal(T,L); 80 | name_exists -> 81 | regglobal(T,[{Name,Pid}|L]) 82 | end; 83 | regglobal([],L) -> 84 | L. 85 | 86 | 87 | -record(dp,{problem_nodes = [], ignored_nodes = []}). 88 | -define(R2P(Record), distreg_util:rec2prop(Record, record_info(fields, dp))). 89 | -define(P2R(Prop), distreg_util:prop2rec(Prop, dp, #dp{}, record_info(fields, dp))). 90 | 91 | handle_call({register,Pid,Name},_From,P) -> 92 | case regpid(Pid,Name) of 93 | ok -> 94 | erlang:monitor(process,Pid), 95 | {reply,ok,P}; 96 | X -> 97 | {reply,X,P} 98 | end; 99 | handle_call({unregister,Pid},_From,P) -> 100 | {reply,unregpid(Pid),P}; 101 | handle_call({getreg,Name},_From,P) -> 102 | {reply,distreg:whereis(Name),P}; 103 | handle_call({track,Pid},_From,P) -> 104 | {reply,trackpid(Pid),P}; 105 | % Called from other nodes. Sends us processes that should be running on this node. 106 | % Return: {ok,L} 107 | % L - list of workers already registered locally, they need to be killed on the remote node. 108 | % ProcInfo: [{WorkerName,WorkerPid},..] 109 | handle_call({remote_pids,ProcInfo},_From,P) -> 110 | {reply,{ok,regglobal(ProcInfo)},P}; 111 | handle_call({register_global,Pid,Name},_From,P) -> 112 | case regpid(Pid,Name,?NAMET_GLOBAL) of 113 | ok -> 114 | erlang:monitor(process,Pid), 115 | {reply,ok,P}; 116 | X -> 117 | {reply,X,P} 118 | end; 119 | handle_call(ignored_nodes,_,P) -> 120 | {reply,P#dp.ignored_nodes,P}; 121 | handle_call({reload}, _, P) -> 122 | code:purge(?MODULE), 123 | code:load_file(?MODULE), 124 | {reply, ok, ?MODULE:deser_prop(?R2P(P))}; 125 | handle_call(stop, _, P) -> 126 | {stop, shutdown, stopped, P}. 127 | 128 | deser_prop(P) -> 129 | ?P2R(P). 130 | 131 | handle_cast({ignore_node,Node},P) -> 132 | case lists:member(Node,P#dp.ignored_nodes) of 133 | true -> 134 | {noreply,P}; 135 | false -> 136 | % Check again in 10s. 137 | erlang:send_after(10000,self(),{nodeup,Node}), 138 | {noreply,P#dp{ignored_nodes = [Node|P#dp.ignored_nodes]}} 139 | end; 140 | handle_cast({nolonger_ignored,Node},P) -> 141 | {noreply,P#dp{ignored_nodes = lists:delete(Node,P#dp.ignored_nodes)}}; 142 | handle_cast(save_nodeinfo,P) -> 143 | save_nodeinfo(P#dp.ignored_nodes), 144 | {noreply,P}; 145 | handle_cast({nodeup,N},P) -> 146 | handle_info({nodeup,N},P); 147 | handle_cast({nodeup,Callback,N},P) -> 148 | handle_info({nodeup,Callback,N},P); 149 | handle_cast({print_info},P) -> 150 | io:format("~p~n", [?R2P(P)]), 151 | {noreply,P}; 152 | handle_cast(_, P) -> 153 | {noreply, P}. 154 | 155 | handle_info({'DOWN', _Monitor, _, Pid, _Reason},P) -> 156 | case distreg:procinfo(Pid) of 157 | undefined -> 158 | ok; 159 | L -> 160 | case proplists:get_value(name,L) of 161 | undefined -> 162 | ok; 163 | Name -> 164 | ets:delete(proplists:get_value(nametable,L),Name) 165 | end, 166 | ets:delete(?PIDT,Pid) 167 | end, 168 | {noreply,P}; 169 | handle_info({nodeup,Node},P) -> 170 | handle_info({nodeup,true,Node},P); 171 | handle_info({nodeup,Callback,Node},P) -> 172 | % Check if this gen_server is running on that node. 173 | spawn(fun() -> 174 | case is_node_participating(Node) of 175 | true -> 176 | case Callback of 177 | true -> 178 | % This is a safety measure to prevent inconsistencies where 179 | % a node thinking some other node is not participating 180 | rpc:cast(Node,gen_server,cast,[?MODULE,{nodeup,false,Node}]); 181 | false -> 182 | ok 183 | end, 184 | case lists:member(Node,P#dp.ignored_nodes) of 185 | true -> 186 | gen_server:cast(?MODULE,{nolonger_ignored,Node}), 187 | NL = lists:delete(Node,P#dp.ignored_nodes); 188 | false -> 189 | NL = P#dp.ignored_nodes 190 | end, 191 | save_nodeinfo_op(NL); 192 | false -> 193 | gen_server:cast(?MODULE,{ignore_node,Node}) 194 | end 195 | end), 196 | {noreply,P}; 197 | handle_info({nodedown,_},P) -> 198 | save_nodeinfo(P#dp.ignored_nodes), 199 | {noreply,P}; 200 | handle_info({stop},P) -> 201 | handle_info({stop,noreason},P); 202 | handle_info({stop,Reason},P) -> 203 | {stop, Reason, P}; 204 | handle_info(_, P) -> 205 | {noreply, P}. 206 | 207 | terminate(_, _) -> 208 | ok. 209 | code_change(_, P, _) -> 210 | {ok, P}. 211 | 212 | init([]) -> 213 | net_kernel:monitor_nodes(true), 214 | case ets:info(?PIDT) of 215 | undefined -> 216 | ets:new(?PIDT, [named_table,public,{heir,whereis(distreg_sup),<<>>}]); 217 | _ -> 218 | [erlang:monitor(process,Pid) || {Pid,_Info} <- ets:tab2list(?PIDT), is_pid(Pid)] 219 | end, 220 | case ets:info(?NAMET) of 221 | undefined -> 222 | ets:new(?NAMET, [named_table,public,{heir,whereis(distreg_sup),<<>>}]); 223 | _ -> 224 | ok 225 | end, 226 | case ets:info(?NAMET_GLOBAL) of 227 | undefined -> 228 | ets:new(?NAMET_GLOBAL, [named_table,public,{heir,whereis(distreg_sup),<<>>}]); 229 | _ -> 230 | ok 231 | end, 232 | spawn(fun() -> 233 | Ignored = [Nd || Nd <- nodes(), is_node_participating(Nd) == false], 234 | [gen_server:cast(?MODULE,{ignore_node,Nd}) || Nd <- Ignored], 235 | save_nodeinfo_op(Ignored) 236 | end), 237 | % Nodeup is specifically sent in the case that distreg was started after nodes have already been connected. 238 | % Because other nodes will have checked if distreg is running on local node and have determined that it is not. 239 | % Well written systems should not connect to nodes before all applications have been run however.... 240 | gen_server:abcast(nodes(),?MODULE,{nodeup,node()}), 241 | {ok,#dp{}}. 242 | 243 | 244 | is_node_participating(Node) -> 245 | Res = rpc:call(Node,erlang,whereis,[?MODULE]), 246 | is_pid(Res). 247 | 248 | 249 | save_nodeinfo(Ignored) -> 250 | spawn(fun() -> save_nodeinfo_op(Ignored) end). 251 | % HAS TO BE CALLED IN SEPERATE PROCESS 252 | save_nodeinfo_op(IgnoredNodes) -> 253 | case catch register(distreg_checknodes,self()) of 254 | true -> 255 | case ets:lookup(?PIDT,nodes) of 256 | [{nodes,OldNodes}] -> 257 | ok; 258 | _ -> 259 | OldNodes = {} 260 | end, 261 | Nodes = list_to_tuple(lists:sort(lists:subtract([node()|nodes()],IgnoredNodes))), 262 | case OldNodes == Nodes of 263 | true -> 264 | ok; 265 | false -> 266 | Pos = find_myself(1,Nodes), 267 | Range = ?MAX_HASH div tuple_size(Nodes), 268 | MyRangeFrom = Pos*Range-Range, 269 | MyRangeTo1 = Pos * Range, 270 | case MyRangeTo1 + tuple_size(Nodes) >= ?MAX_HASH of 271 | true -> 272 | MyRangeTo = ?MAX_HASH; 273 | false -> 274 | MyRangeTo = MyRangeTo1 275 | end, 276 | ets:insert(?PIDT,[{node_range,Pos,MyRangeFrom, MyRangeTo},{nodes,Nodes}]), 277 | ToMove1 = ets:foldl(fun({Name,Pid},WorkersToMove) -> 278 | HN = erlang:phash2(Name), 279 | case HN >= MyRangeFrom andalso HN =< MyRangeTo of 280 | true -> 281 | WorkersToMove; 282 | false -> 283 | Node = distreg:node_for_hash(HN,Nodes), 284 | case node(Pid) /= Node of 285 | true -> 286 | Pid ! {distreg,shouldrestart}, 287 | [{Node,Name,Pid}|WorkersToMove]; 288 | false -> 289 | WorkersToMove 290 | end 291 | end 292 | end,[],?NAMET_GLOBAL), 293 | ToMove = group(ToMove1,[]), 294 | [begin 295 | case rpc:call(Node,gen_server,call,[?MODULE,{remote_pids,ProcInfo},20000]) of 296 | {ok,L} -> 297 | [begin 298 | case node(Pid) /= Node of 299 | true -> 300 | pid_conflicted(Pid), 301 | ets:delete(?PIDT,Pid), 302 | ets:delete(?NAMET_GLOBAL,Name); 303 | false -> 304 | ok 305 | end 306 | end || {Pid,Name} <- L]; 307 | _X -> 308 | gen_server:cast(?MODULE,{ignore_node,Node}), 309 | gen_server:cast(?MODULE,save_nodeinfo) 310 | end 311 | end || {Node,ProcInfo} <- ToMove] 312 | end; 313 | _ -> 314 | Monitor = erlang:monitor(process,distreg_checknodes), 315 | receive 316 | {'DOWN', Monitor, _, _Pid, _Reason} -> 317 | save_nodeinfo_op(IgnoredNodes) 318 | end 319 | end. 320 | 321 | pid_conflicted(Pid) -> 322 | Pid ! {distreg,dienow}, 323 | spawn(fun() -> timer:sleep(1000),exit(Pid,kill) end). 324 | 325 | group([{GK,V,K}|T],L) -> 326 | case lists:keyfind(GK,1,L) of 327 | false -> 328 | group(T,[{GK,[{V,K}]}|L]); 329 | {GK,KL} -> 330 | group(T,[{GK,[{V,K}|KL]}|lists:keydelete(GK,1,L)]) 331 | end; 332 | group([],L) -> 333 | L. 334 | 335 | find_myself(N,Nodes) when element(N,Nodes) == node() -> 336 | N; 337 | find_myself(N,Nodes) -> 338 | find_myself(N+1,Nodes). 339 | 340 | 341 | 342 | start() -> 343 | gen_server:start_link({local,?MODULE},?MODULE, [], []). 344 | stop() -> 345 | gen_server:call(?MODULE, stop). 346 | print_info() -> 347 | gen_server:cast(?MODULE,{print_info}). 348 | reload() -> 349 | gen_server:call(?MODULE, {reload}). 350 | -------------------------------------------------------------------------------- /src/distreg_util.erl: -------------------------------------------------------------------------------- 1 | -module(distreg_util). 2 | -compile(export_all). 3 | 4 | % record to proplist 5 | rec2prop(Rec, RecordFields) -> 6 | loop_rec(RecordFields, 1, Rec, []). 7 | 8 | loop_rec([H|T], N, Rec, L) -> 9 | loop_rec(T, N+1, Rec, [{H, element(N+1, Rec)}|L]); 10 | loop_rec([], _, _, L) -> 11 | L. 12 | 13 | % % convert prop list to record 14 | prop2rec(Prop, RecName, DefRec, RecordFields) -> 15 | loop_fields(erlang:make_tuple(tuple_size(DefRec), RecName), RecordFields, DefRec, Prop, 2). 16 | 17 | loop_fields(Tuple, [Field|T], DefRec, Props, N) -> 18 | case lists:keysearch(Field, 1, Props) of 19 | {value, {_, Val}} -> 20 | loop_fields(setelement(N, Tuple, Val), T, DefRec, Props, N+1); 21 | false -> 22 | loop_fields(setelement(N, Tuple, element(N, DefRec)), T, DefRec, Props, N+1) 23 | end; 24 | loop_fields(Tuple, [], _, _, _) -> 25 | Tuple. 26 | --------------------------------------------------------------------------------