├── rebar ├── .hgignore ├── .gitignore ├── src ├── riak_core.proto ├── process_proxy.erl ├── riak_core_vnode_sup.erl ├── riak_core_test_util.erl ├── app_helper.erl ├── riak_core_ring_util.erl ├── riak_core_eventhandler_guard.erl ├── json_pp.erl ├── riak_core_eventhandler_sup.erl ├── riak_core_wm_urlmap.erl ├── riak_core_sup.erl ├── riak_core_handoff_manager.erl ├── riak_core_handoff_listener.erl ├── riak_core_node_watcher_events.erl ├── riak_core_cinfo_core.erl ├── riak_core_ring_events.erl ├── riak_core_app.erl ├── riak_core_web.erl ├── riak_core_bucket.erl ├── riak_core_ring_handler.erl ├── spiraltime.erl ├── riak_core.erl ├── riak_core_handoff_receiver.erl ├── gen_nb_server.erl ├── bloom.erl ├── riak_core_sysmon_minder.erl ├── riak_core_config.erl ├── riak_core_sysmon_handler.erl ├── riak_core_apl.erl ├── riak_core_tracer.erl ├── riak_core_handoff_sender.erl ├── priority_queue.erl ├── riak_core_vnode_master.erl ├── riak_core_util.erl ├── chash.erl ├── riak_core_gossip.erl ├── vclock.erl └── riak_core_ring_manager.erl ├── include ├── riak_core_handoff.hrl └── riak_core_vnode.hrl ├── Makefile ├── .hgtags ├── rebar.config ├── README.org ├── ebin └── riak_core.app └── test ├── test_guarded_event_handler.erl ├── mock_vnode.erl └── core_vnode_eqc.erl /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/riak_core/master/rebar -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax regex 2 | .*~ 3 | ^ebin/.*.beam 4 | include/riak_core_pb.hrl 5 | ^.eunit -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit/* 2 | deps/* 3 | priv/* 4 | *.o 5 | *.beam 6 | include/*_pb.hrl 7 | *~ 8 | doc/* 9 | -------------------------------------------------------------------------------- /src/riak_core.proto: -------------------------------------------------------------------------------- 1 | message RiakObject_PB { 2 | required bytes bucket = 1; 3 | required bytes key = 2; 4 | required bytes val = 3; 5 | } 6 | 7 | -------------------------------------------------------------------------------- /include/riak_core_handoff.hrl: -------------------------------------------------------------------------------- 1 | -define(PT_MSG_INIT, 0). 2 | -define(PT_MSG_OBJ, 1). 3 | -define(PT_MSG_OLDSYNC, 2). 4 | -define(PT_MSG_SYNC, 3). 5 | -define(PT_MSG_CONFIGURE, 4). 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | .PHONY: deps 4 | 5 | all: deps compile 6 | 7 | compile: 8 | ./rebar compile 9 | 10 | deps: 11 | ./rebar get-deps 12 | 13 | clean: 14 | ./rebar clean 15 | 16 | distclean: clean 17 | ./rebar delete-deps 18 | 19 | eunit: 20 | ./rebar skip_deps=true eunit 21 | 22 | docs: deps 23 | ./rebar skip_deps=true doc 24 | 25 | dialyzer: compile 26 | @dialyzer -Wno_return -c apps/riak_core/ebin 27 | 28 | 29 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 1be0ce0200a7ac51cc3e0ca7ee9c244a539e409f riak_core-0.13.0rc1 2 | 1e43d42258bcc02b5797ee73f02fae60bfeac67e riak_core-0.13.0rc2 3 | 69b0f62c1f01b7a3c48d9379f6fe0e152331deee riak_core-0.13.0rc3 4 | 82ebfed1e301bff550f9690de874ca1ffb55cd17 riak_core-0.13.0rc5 5 | 1048210d6c679d3599b362b6df9a0002cf3c60a7 riak_core-0.13.0rc6 6 | c1b5e936c950aa4e29b4ffdc760e5a2651349018 riak_core-0.13.0rc7 7 | cb8c8703125a21e2eb3454236449ca957b26bacb riak_core-0.13.0rc8 8 | 063f7cdf287721f4d160c0c99192667a653a237f riak_core-0.13.0rc9 9 | 5b2683c697e6f322920fd276177c08764c722c98 riak_core-0.13.0 10 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_first_files, ["src/gen_nb_server.erl", "src/gen_server2.erl"]}. 2 | {cover_enabled, true}. 3 | {edoc_opts, [{preprocess, true}]}. 4 | {deps, [ 5 | {protobuffs, "0.6.0", {git, "git://github.com/basho/erlang_protobuffs", 6 | {tag, "protobuffs-0.6.0"}}}, 7 | {basho_stats, ".*", {git, "git://github.com/basho/basho_stats", "HEAD"}}, 8 | {riak_sysmon, ".*", {git, "git://github.com/basho/riak_sysmon", {branch, "master"}}}, 9 | {webmachine, "1.8.*", {git, "git://github.com/basho/webmachine", 10 | {tag, "webmachine-1.8.0"}}} 11 | ]}. 12 | -------------------------------------------------------------------------------- /include/riak_core_vnode.hrl: -------------------------------------------------------------------------------- 1 | -type sender_type() :: fsm | server | raw. 2 | -type sender() :: {sender_type(), reference(), pid()} | 3 | %% TODO: Double-check that these special cases are kosher 4 | {server, undefined, undefined} | % special case in 5 | % riak_core_vnode_master.erl 6 | {fsm, undefined, pid()} | % special case in 7 | % riak_kv_util:make_request/2.erl 8 | ignore. 9 | -type partition() :: non_neg_integer(). 10 | -type vnode_req() :: term(). 11 | 12 | -record(riak_vnode_req_v1, { 13 | index :: partition(), 14 | sender=ignore :: sender(), 15 | request :: vnode_req()}). 16 | 17 | 18 | -record(riak_core_fold_req_v1, { 19 | foldfun :: fun(), 20 | acc0 :: term()}). 21 | 22 | -define(VNODE_REQ, #riak_vnode_req_v1). 23 | -define(FOLD_REQ, #riak_core_fold_req_v1). 24 | 25 | -------------------------------------------------------------------------------- /src/process_proxy.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | -module(process_proxy). 20 | -export([start_link/2, init/1, stop/1]). 21 | 22 | start_link(RegName, ProxyTo) -> 23 | proc_lib:start_link(?MODULE, init, [[self(), RegName, ProxyTo]]). 24 | 25 | init([ParentPid, RegName, ProxyTo]) -> 26 | erlang:register(RegName, self()), 27 | proc_lib:init_ack(ParentPid, {ok, self()}), 28 | loop(ProxyTo). 29 | 30 | stop(Name) -> 31 | Name ! stop. 32 | 33 | loop(ProxyTo) -> 34 | receive 35 | stop -> 36 | exit(normal); 37 | M -> 38 | ProxyTo ! M, 39 | loop(ProxyTo) 40 | end. 41 | -------------------------------------------------------------------------------- /src/riak_core_vnode_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_vnode_sup: supervise riak_vnode processes 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc supervise riak_vnode processes 24 | 25 | -module(riak_core_vnode_sup). 26 | -behaviour(supervisor). 27 | -export([start_link/0, init/1]). 28 | -export([start_vnode/2]). 29 | 30 | start_vnode(Mod, Index) when is_integer(Index) -> 31 | supervisor:start_child(?MODULE, [Mod, Index]). 32 | 33 | start_link() -> 34 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 35 | 36 | %% @private 37 | init([]) -> 38 | {ok, 39 | {{simple_one_for_one, 10, 10}, 40 | [{undefined, 41 | {riak_core_vnode, start_link, []}, 42 | temporary, brutal_kill, worker, dynamic}]}}. 43 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | * riak_core 2 | ** Overview 3 | Riak Core is an open source Erlang library that helps you build distributed, scalable, fault-tolerant 4 | applications. Riak Core is influenced by the [[http://s3.amazonaws.com/AllThingsDistributed/sosp/amazon-dynamo-sosp2007.pdf][Amazon Dynamo Paper]] and [[http://portal.acm.org/citation.cfm?doid=564585.564601][CAP Theorem]]. 5 | 6 | ** Quick Start 7 | You must have [[http://erlang.org/download.html][Erlang/OTP R13B04]] or later and a GNU-style build 8 | system to compile and run =riak_core=. The easiest way to utilize riak_core is by installing the full 9 | Riak application available on [[https://github.com/basho/riak][Github]]. 10 | 11 | ** Contributing 12 | We encourage contributions to =riak_core= from the community. 13 | 14 | 1) Fork the =riak_core= repository on [[https://github.com/basho/riak_core][Github]]. 15 | 2) Clone your fork or add the remote if you already have a clone of 16 | the repository. 17 | #+BEGIN_SRC shell 18 | git clone git@github.com:yourusername/riak_core.git 19 | # or 20 | git remote add mine git@github.com:yourusername/riak_core.git 21 | #+END_SRC 22 | 3) Create a topic branch for your change. 23 | #+BEGIN_SRC shell 24 | git checkout -b some-topic-branch 25 | #+END_SRC 26 | 4) Make your change and commit. Use a clear and descriptive commit 27 | message, spanning multiple lines if detailed explanation is 28 | needed. 29 | 5) Push to your fork of the repository and then send a pull-request 30 | through Github. 31 | #+BEGIN_SRC shell 32 | git push mine some-topic-branch 33 | #+END_SRC 34 | 6) A Basho engineer or community maintainer will review your patch 35 | and merge it into the main repository or send you feedback. 36 | -------------------------------------------------------------------------------- /src/riak_core_test_util.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_test_util: utilities for test scripts 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc utilities for test scripts 24 | 25 | -module(riak_core_test_util). 26 | 27 | -ifdef(TEST). 28 | -export([setup_mockring1/0]). 29 | -include_lib("eunit/include/eunit.hrl"). 30 | 31 | setup_mockring1() -> 32 | % requires a running riak_core_ring_manager, in test-mode is ok 33 | Ring0 = lists:foldl(fun(_,R) -> 34 | riak_core_ring:transfer_node( 35 | hd(riak_core_ring:my_indices(R)), 36 | othernode@otherhost, R) end, 37 | riak_core_ring:fresh(16,node()),[1,2,3,4,5,6]), 38 | Ring = lists:foldl(fun(_,R) -> 39 | riak_core_ring:transfer_node( 40 | hd(riak_core_ring:my_indices(R)), 41 | othernode2@otherhost2, R) end, 42 | Ring0,[1,2,3,4,5,6]), 43 | riak_core_ring_manager:set_ring_global(Ring). 44 | 45 | -endif. %TEST. 46 | -------------------------------------------------------------------------------- /src/app_helper.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | -module(app_helper). 24 | 25 | -export([get_env/1, get_env/2, get_env/3]). 26 | 27 | %% =================================================================== 28 | %% Public API 29 | %% =================================================================== 30 | 31 | %% @spec get_env(App :: atom()) -> [{Key :: atom(), Value :: term()}] 32 | %% @doc Retrieve all Key/Value pairs in the env for the specified app. 33 | get_env(App) -> 34 | application:get_all_env(App). 35 | 36 | %% @spec get_env(App :: atom(), Key :: atom()) -> term() 37 | %% @doc The official way to get a value from the app's env. 38 | %% Will return the 'undefined' atom if that key is unset. 39 | get_env(App, Key) -> 40 | get_env(App, Key, undefined). 41 | 42 | %% @spec get_env(App :: atom(), Key :: atom(), Default :: term()) -> term() 43 | %% @doc The official way to get a value from this application's env. 44 | %% Will return Default if that key is unset. 45 | get_env(App, Key, Default) -> 46 | case application:get_env(App, Key) of 47 | {ok, Value} -> 48 | Value; 49 | _ -> 50 | Default 51 | end. 52 | -------------------------------------------------------------------------------- /src/riak_core_ring_util.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core_ring_util). 23 | 24 | -export([assign/2, 25 | check_ring/0, 26 | check_ring/1]). 27 | 28 | %% @doc Forcibly assign a partition to a specific node 29 | assign(Partition, ToNode) -> 30 | F = fun(Ring, _) -> 31 | {new_ring, riak_core_ring:transfer_node(Partition, ToNode, Ring)} 32 | end, 33 | {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, undefined), 34 | ok. 35 | 36 | %% @doc Check the local ring for any preflists that do not satisfy n_val 37 | check_ring() -> 38 | {ok, R} = riak_core_ring_manager:get_my_ring(), 39 | check_ring(R). 40 | 41 | %% @doc Check a ring for any preflists that do not satisfy n_val 42 | check_ring(Ring) -> 43 | {ok, Props} = application:get_env(riak_core, default_bucket_props), 44 | {n_val, Nval} = lists:keyfind(n_val, 1, Props), 45 | Preflists = riak_core_ring:all_preflists(Ring, Nval), 46 | lists:foldl(fun(PL,Acc) -> 47 | PLNodes = lists:usort([Node || {_,Node} <- PL]), 48 | case length(PLNodes) of 49 | Nval -> 50 | Acc; 51 | _ -> 52 | ordsets:add_element(PL, Acc) 53 | end 54 | end, [], Preflists). 55 | -------------------------------------------------------------------------------- /src/riak_core_eventhandler_guard.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core_eventhandler_guard: Guard process for persistent event handlers. 4 | %% 5 | %% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core_eventhandler_guard). 23 | -behaviour(gen_server). 24 | -export([start_link/3, start_link/4]). 25 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 26 | terminate/2, code_change/3]). 27 | -record(state, {handlermod, handler, exitfun}). 28 | 29 | start_link(HandlerMod, Handler, Args) -> 30 | start_link(HandlerMod, Handler, Args, undefined). 31 | 32 | start_link(HandlerMod, Handler, Args, ExitFun) -> 33 | gen_server:start_link(?MODULE, [HandlerMod, Handler, Args, ExitFun], []). 34 | 35 | init([HandlerMod, Handler, Args, ExitFun]) -> 36 | ok = gen_event:add_sup_handler(HandlerMod, Handler, Args), 37 | {ok, #state{handlermod=HandlerMod, handler=Handler, exitfun=ExitFun}}. 38 | 39 | handle_call(_Request, _From, State) -> {reply, ok, State}. 40 | 41 | handle_cast(_Msg, State) -> {noreply, State}. 42 | 43 | 44 | handle_info({gen_event_EXIT, _Handler, shutdown}, State) -> 45 | {stop, normal, State}; 46 | handle_info({gen_event_EXIT, _Handler, normal}, State) -> 47 | {stop, normal, State}; 48 | handle_info({gen_event_EXIT, Handler, _Reason}, State=#state{exitfun=undefined}) -> 49 | {stop, {gen_event_EXIT, Handler}, State}; 50 | handle_info({gen_event_EXIT, Handler, Reason}, State=#state{exitfun=ExitFun}) -> 51 | ExitFun(Handler, Reason), 52 | {stop, {gen_event_EXIT, Handler}, State}; 53 | handle_info(_Info, State) -> 54 | {noreply, State}. 55 | 56 | terminate(_Reason, #state{}) -> 57 | ok. 58 | 59 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 60 | 61 | -------------------------------------------------------------------------------- /src/json_pp.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(json_pp). 23 | 24 | -define(SPACE, 32). 25 | -define(is_quote(C), (C == $\") orelse (C == $\')). 26 | -define(is_indent(C), (C == 91) orelse (C == 123)). % [, { 27 | -define(is_undent(C), (C == 93) orelse (C == 125)). % ], } 28 | -export([print/1, 29 | test/0]). 30 | 31 | print(Str) when is_list(Str) -> json_pp(Str, 0, undefined, []). 32 | 33 | json_pp([$\\, C| Rest], I, C, Acc) -> % in quote 34 | json_pp(Rest, I, C, [C, $\\| Acc]); 35 | json_pp([C| Rest], I, undefined, Acc) when ?is_quote(C) -> 36 | json_pp(Rest, I, C, [C| Acc]); 37 | json_pp([C| Rest], I, C, Acc) -> % in quote 38 | json_pp(Rest, I, undefined, [C| Acc]); 39 | json_pp([C| Rest], I, undefined, Acc) when ?is_indent(C) -> 40 | json_pp(Rest, I+1, undefined, [pp_indent(I+1), $\n, C| Acc]); 41 | json_pp([C| Rest], I, undefined, Acc) when ?is_undent(C) -> 42 | json_pp(Rest, I-1, undefined, [C, pp_indent(I-1), $\n| Acc]); 43 | json_pp([$,| Rest], I, undefined, Acc) -> 44 | json_pp(Rest, I, undefined, [pp_indent(I), $\n, $,| Acc]); 45 | json_pp([$:| Rest], I, undefined, Acc) -> 46 | json_pp(Rest, I, undefined, [?SPACE, $:| Acc]); 47 | json_pp([C|Rest], I, Q, Acc) -> 48 | json_pp(Rest, I, Q, [C| Acc]); 49 | json_pp([], _I, _Q, Acc) -> % done 50 | lists:reverse(Acc). 51 | 52 | pp_indent(I) -> lists:duplicate(I*4, ?SPACE). 53 | 54 | %% testing 55 | 56 | test_data() -> 57 | {struct, [{foo, true}, 58 | {bar, false}, 59 | {baz, {array, [1, 2, 3, 4]}}, 60 | {'fiz:f', null}, 61 | {"fozzer\"", 5}]}. 62 | 63 | listify(IoList) -> binary_to_list(list_to_binary(IoList)). 64 | 65 | test() -> 66 | J1 = listify(mochijson:encode(test_data())), 67 | io:format("~s~n", [listify(print(J1))]). 68 | 69 | 70 | -------------------------------------------------------------------------------- /src/riak_core_eventhandler_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core_eventhandler_sup: supervise minder processes for gen_event handlers 4 | %% 5 | %% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc supervise riak_core_eventhandler_guard processes 24 | -module(riak_core_eventhandler_sup). 25 | -behaviour(supervisor). 26 | -export([start_link/0, init/1]). 27 | -export([start_guarded_handler/3, start_guarded_handler/4, stop_guarded_handler/3]). 28 | 29 | start_guarded_handler(HandlerMod, Handler, Args) -> 30 | start_guarded_handler(HandlerMod, Handler, Args, undefined). 31 | 32 | start_guarded_handler(HandlerMod, Handler, Args, ExitFun) -> 33 | case supervisor:start_child(?MODULE, handler_spec(HandlerMod, Handler, Args, ExitFun)) of 34 | {ok, _Pid} -> ok; 35 | Other -> Other 36 | end. 37 | 38 | stop_guarded_handler(HandlerMod, Handler, Args) -> 39 | case lists:member(Handler, gen_event:which_handlers(HandlerMod)) of 40 | true -> 41 | case gen_event:delete_handler(HandlerMod, Handler, Args) of 42 | {error, module_not_found} -> 43 | {error, module_not_found}; 44 | O -> 45 | Id = {HandlerMod, Handler}, 46 | ok = supervisor:terminate_child(?MODULE, Id), 47 | ok = supervisor:delete_child(?MODULE, Id), 48 | O 49 | end; 50 | false -> 51 | {error, module_not_found} 52 | end. 53 | 54 | handler_spec(HandlerMod, Handler, Args, ExitFun) -> 55 | {{HandlerMod, Handler}, 56 | {riak_core_eventhandler_guard, start_link, [HandlerMod, Handler, Args, ExitFun]}, 57 | transient, 5000, worker, [riak_core_eventhandler_guard]}. 58 | 59 | start_link() -> 60 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 61 | 62 | %% @private 63 | init([]) -> 64 | {ok, {{one_for_one, 10, 10}, []}}. 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/riak_core_wm_urlmap.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Riak: A lightweight, decentralized key-value store. 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc This module provides a Webmachine resource that lists the 24 | %% URLs for other resources available on this host. 25 | %% 26 | %% Links to Riak resources will be added to the Link header in 27 | %% the form: 28 | %%``` 29 | %% ; rel="RESOURCE_NAME" 30 | %%''' 31 | %% HTML output of this resource is a list of link tags like: 32 | %%``` 33 | %% RESOURCE_NAME 34 | %%''' 35 | %% JSON output of this resource in an object with elements like: 36 | %%``` 37 | %% "RESOURCE_NAME":"URL" 38 | %%''' 39 | -module(riak_core_wm_urlmap). 40 | -export([ 41 | init/1, 42 | resource_exists/2, 43 | content_types_provided/2, 44 | to_html/2, 45 | to_json/2 46 | ]). 47 | 48 | -include_lib("webmachine/include/webmachine.hrl"). 49 | 50 | init([]) -> 51 | {ok, service_list()}. 52 | 53 | resource_exists(RD, Services) -> 54 | {true, add_link_header(RD, Services), Services}. 55 | 56 | add_link_header(RD, Services) -> 57 | wrq:set_resp_header( 58 | "Link", 59 | string:join([ ["<",Uri,">; rel=\"",Resource,"\""] 60 | || {Resource, Uri} <- Services ], 61 | ","), 62 | RD). 63 | 64 | content_types_provided(RD, Services) -> 65 | {[{"text/html", to_html},{"application/json", to_json}], RD, Services}. 66 | 67 | to_html(RD, Services) -> 68 | {[""], 72 | RD, Services}. 73 | 74 | to_json(RD, Services) -> 75 | {mochijson:encode({struct, Services}), RD, Services}. 76 | 77 | service_list() -> 78 | {ok, Dispatch} = application:get_env(webmachine, dispatch_list), 79 | lists:usort( 80 | [{atom_to_list(Resource), "/"++UriBase} 81 | || {[UriBase|_], Resource, _} <- Dispatch]). 82 | -------------------------------------------------------------------------------- /src/riak_core_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | -module(riak_core_sup). 24 | 25 | -behaviour(supervisor). 26 | 27 | %% API 28 | -export([start_link/0]). 29 | 30 | %% Supervisor callbacks 31 | -export([init/1]). 32 | 33 | %% Helper macro for declaring children of supervisor 34 | -define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). 35 | -define (IF (Bool, A, B), if Bool -> A; true -> B end). 36 | 37 | %% =================================================================== 38 | %% API functions 39 | %% =================================================================== 40 | 41 | start_link() -> 42 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 43 | 44 | %% =================================================================== 45 | %% Supervisor callbacks 46 | %% =================================================================== 47 | 48 | init([]) -> 49 | RiakWebs = case lists:flatten(riak_core_web:bindings(http), 50 | riak_core_web:bindings(https)) of 51 | [] -> 52 | %% check for old settings, in case app.config 53 | %% was not updated 54 | riak_core_web:old_binding(); 55 | Binding -> 56 | Binding 57 | end, 58 | 59 | Children = lists:flatten( 60 | [?CHILD(riak_core_sysmon_minder, worker), 61 | ?CHILD(riak_core_vnode_sup, supervisor), 62 | ?CHILD(riak_core_eventhandler_sup, supervisor), 63 | ?CHILD(riak_core_handoff_manager, worker), 64 | ?CHILD(riak_core_handoff_listener, worker), 65 | ?CHILD(riak_core_ring_events, worker), 66 | ?CHILD(riak_core_ring_manager, worker), 67 | ?CHILD(riak_core_node_watcher_events, worker), 68 | ?CHILD(riak_core_node_watcher, worker), 69 | ?CHILD(riak_core_gossip, worker), 70 | RiakWebs 71 | ]), 72 | 73 | {ok, {{one_for_one, 10, 10}, Children}}. 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/riak_core_handoff_manager.erl: -------------------------------------------------------------------------------- 1 | %% This file is provided to you under the Apache License, 2 | %% Version 2.0 (the "License"); you may not use this file 3 | %% except in compliance with the License. You may obtain 4 | %% a copy of the License at 5 | %% 6 | %% http://www.apache.org/licenses/LICENSE-2.0 7 | %% 8 | %% Unless required by applicable law or agreed to in writing, 9 | %% software distributed under the License is distributed on an 10 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | %% KIND, either express or implied. See the License for the 12 | %% specific language governing permissions and limitations 13 | %% under the License. 14 | 15 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 16 | -module(riak_core_handoff_manager). 17 | -behaviour(gen_server). 18 | -export([start_link/0]). 19 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 20 | terminate/2, code_change/3]). 21 | -export([add_exclusion/2, get_handoff_lock/1, get_exclusions/1]). 22 | -export([remove_exclusion/2]). 23 | -export([release_handoff_lock/2]). 24 | -record(state, {excl}). 25 | 26 | start_link() -> 27 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 28 | 29 | init([]) -> 30 | {ok, #state{excl=ordsets:new()}}. 31 | 32 | add_exclusion(Module, Index) -> 33 | gen_server:cast(?MODULE, {add_exclusion, {Module, Index}}). 34 | 35 | remove_exclusion(Module, Index) -> 36 | gen_server:cast(?MODULE, {del_exclusion, {Module, Index}}). 37 | 38 | get_exclusions(Module) -> 39 | gen_server:call(?MODULE, {get_exclusions, Module}, infinity). 40 | 41 | get_handoff_lock(LockId) -> 42 | TokenCount = app_helper:get_env(riak_core, handoff_concurrency, 4), 43 | get_handoff_lock(LockId, TokenCount). 44 | 45 | get_handoff_lock(_LockId, 0) -> 46 | {error, max_concurrency}; 47 | get_handoff_lock(LockId, Count) -> 48 | case global:set_lock({{handoff_token, Count}, {node(), LockId}}, [node()], 0) of 49 | true -> 50 | {ok, {handoff_token, Count}}; 51 | false -> 52 | get_handoff_lock(LockId, Count-1) 53 | end. 54 | 55 | release_handoff_lock(LockId, Token) -> 56 | global:del_lock({{handoff_token,Token}, {node(), LockId}}, [node()]). 57 | 58 | handle_call({get_exclusions, Module}, _From, State=#state{excl=Excl}) -> 59 | Reply = [I || {M, I} <- ordsets:to_list(Excl), M =:= Module], 60 | {reply, {ok, Reply}, State}. 61 | 62 | handle_cast({del_exclusion, {Mod, Idx}}, State=#state{excl=Excl}) -> 63 | {noreply, State#state{excl=ordsets:del_element({Mod, Idx}, Excl)}}; 64 | handle_cast({add_exclusion, {Mod, Idx}}, State=#state{excl=Excl}) -> 65 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 66 | riak_core_ring_events:ring_update(Ring), 67 | {noreply, State#state{excl=ordsets:add_element({Mod, Idx}, Excl)}}. 68 | 69 | handle_info(_Info, State) -> 70 | {noreply, State}. 71 | 72 | terminate(_Reason, _State) -> 73 | ok. 74 | 75 | code_change(_OldVsn, State, _Extra) -> 76 | {ok, State}. 77 | 78 | -------------------------------------------------------------------------------- /src/riak_core_handoff_listener.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_handoff_listener: entry point for TCP-based handoff 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc entry point for TCP-based handoff 24 | 25 | -module(riak_core_handoff_listener). 26 | -behavior(gen_nb_server). 27 | -export([start_link/0]). 28 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 29 | terminate/2, code_change/3]). 30 | -export([sock_opts/0, new_connection/2]). 31 | -record(state, { 32 | portnum :: integer(), 33 | ssl_opts :: list() 34 | }). 35 | 36 | start_link() -> 37 | PortNum = app_helper:get_env(riak_core, handoff_port), 38 | IpAddr = app_helper:get_env(riak_core, handoff_ip), 39 | SslOpts = riak_core_handoff_sender:get_handoff_ssl_options(), 40 | gen_nb_server:start_link(?MODULE, IpAddr, PortNum, [PortNum, SslOpts]). 41 | 42 | init([PortNum, SslOpts]) -> 43 | register(?MODULE, self()), 44 | 45 | %% This exit() call shouldn't be necessary, AFAICT the VM's EXIT 46 | %% propagation via linking should do the right thing, but BZ 823 47 | %% suggests otherwise. However, the exit() call should fall into 48 | %% the "shouldn't hurt", especially since the next line will 49 | %% explicitly try to spawn a new proc that will try to register 50 | %% the riak_kv_handoff_listener name. 51 | catch exit(whereis(riak_kv_handoff_listener), kill), 52 | process_proxy:start_link(riak_kv_handoff_listener, ?MODULE), 53 | 54 | {ok, #state{portnum=PortNum, ssl_opts = SslOpts}}. 55 | 56 | sock_opts() -> [binary, {packet, 4}, {reuseaddr, true}, {backlog, 64}]. 57 | 58 | handle_call(handoff_port, _From, State=#state{portnum=P}) -> 59 | {reply, {ok, P}, State}. 60 | 61 | handle_cast(_Msg, State) -> {noreply, State}. 62 | 63 | handle_info(_Info, State) -> {noreply, State}. 64 | 65 | terminate(_Reason, _State) -> ok. 66 | 67 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 68 | 69 | new_connection(Socket, State = #state{ssl_opts = SslOpts}) -> 70 | {ok, Pid} = riak_core_handoff_receiver:start_link(SslOpts), 71 | gen_tcp:controlling_process(Socket, Pid), 72 | ok = riak_core_handoff_receiver:set_socket(Pid, Socket), 73 | {ok, State}. 74 | 75 | -------------------------------------------------------------------------------- /src/riak_core_node_watcher_events.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core_node_watcher_events). 23 | 24 | -behaviour(gen_event). 25 | 26 | %% API 27 | -export([start_link/0, 28 | add_handler/2, 29 | add_sup_handler/2, 30 | add_guarded_handler/2, 31 | add_callback/1, 32 | add_sup_callback/1, 33 | add_guarded_callback/1, 34 | service_update/1]). 35 | 36 | %% gen_event callbacks 37 | -export([init/1, handle_event/2, handle_call/2, 38 | handle_info/2, terminate/2, code_change/3]). 39 | 40 | -record(state, { callback }). 41 | 42 | %% =================================================================== 43 | %% API functions 44 | %% =================================================================== 45 | 46 | start_link() -> 47 | gen_event:start_link({local, ?MODULE}). 48 | 49 | add_handler(Handler, Args) -> 50 | gen_event:add_handler(?MODULE, Handler, Args). 51 | 52 | add_sup_handler(Handler, Args) -> 53 | gen_event:add_sup_handler(?MODULE, Handler, Args). 54 | 55 | add_guarded_handler(Handler, Args) -> 56 | riak_core:add_guarded_event_handler(?MODULE, Handler, Args). 57 | 58 | add_callback(Fn) when is_function(Fn) -> 59 | gen_event:add_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 60 | 61 | add_sup_callback(Fn) when is_function(Fn) -> 62 | gen_event:add_sup_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 63 | 64 | add_guarded_callback(Fn) when is_function(Fn) -> 65 | riak_core:add_guarded_event_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 66 | 67 | service_update(Services) -> 68 | gen_event:notify(?MODULE, {service_update, Services}). 69 | 70 | 71 | %% =================================================================== 72 | %% gen_event callbacks 73 | %% =================================================================== 74 | 75 | init([Fn]) -> 76 | %% Get the initial list of available services 77 | Fn(riak_core_node_watcher:services()), 78 | {ok, #state { callback = Fn }}. 79 | 80 | handle_event({service_update, Services}, State) -> 81 | (State#state.callback)(Services), 82 | {ok, State}. 83 | 84 | handle_call(_Request, State) -> 85 | {ok, ok, State}. 86 | 87 | handle_info(_Info, State) -> 88 | {ok, State}. 89 | 90 | terminate(_Reason, _State) -> 91 | ok. 92 | 93 | code_change(_OldVsn, State, _Extra) -> 94 | {ok, State}. 95 | 96 | -------------------------------------------------------------------------------- /src/riak_core_cinfo_core.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Riak: A lightweight, decentralized key-value store. 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core_cinfo_core). 23 | 24 | -export([cluster_info_init/0, cluster_info_generator_funs/0]). 25 | 26 | %% @spec () -> term() 27 | %% @doc Required callback function for cluster_info: initialization. 28 | %% 29 | %% This function doesn't have to do anything. 30 | 31 | cluster_info_init() -> 32 | ok. 33 | 34 | %% @spec () -> list({string(), fun()}) 35 | %% @doc Required callback function for cluster_info: return list of 36 | %% {NameForReport, FunOfArity_1} tuples to generate ASCII/UTF-8 37 | %% formatted reports. 38 | 39 | cluster_info_generator_funs() -> 40 | [ 41 | {"Riak Core config files", fun config_files/1}, 42 | {"Riak Core vnode modules", fun vnode_modules/1}, 43 | {"Riak Core ring", fun get_my_ring/1}, 44 | {"Riak Core latest ring file", fun latest_ringfile/1}, 45 | {"Riak Core active partitions", fun active_partitions/1} 46 | ]. 47 | 48 | vnode_modules(CPid) -> % CPid is the data collector's pid. 49 | cluster_info:format(CPid, "~p\n", [riak_core:vnode_modules()]). 50 | 51 | get_my_ring(CPid) -> 52 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 53 | cluster_info:format(CPid, "~p\n", [Ring]). 54 | 55 | latest_ringfile(CPid) -> 56 | {ok, Path} = riak_core_ring_manager:find_latest_ringfile(), 57 | {ok, Contents} = file:read_file(Path), 58 | cluster_info:format(CPid, "Latest ringfile: ~s\n", [Path]), 59 | cluster_info:format(CPid, "File contents:\n~p\n", [binary_to_term(Contents)]). 60 | 61 | active_partitions(CPid) -> 62 | Pids = [Pid || {_,Pid,_,_} <- supervisor:which_children(riak_core_vnode_sup)], 63 | Vnodes = [riak_core_vnode:get_mod_index(Pid) || Pid <- Pids], 64 | Partitions = lists:foldl(fun({_,P}, Ps) -> 65 | ordsets:add_element(P, Ps) 66 | end, ordsets:new(), Vnodes), 67 | cluster_info:format(CPid, "~p\n", [Partitions]). 68 | 69 | config_files(C) -> 70 | {ok, [[AppPath]]} = init:get_argument(config), 71 | EtcDir = filename:dirname(AppPath), 72 | VmPath = filename:join(EtcDir, "vm.args"), 73 | [begin 74 | cluster_info:format(C, "File: ~s\n", [os:cmd("ls -l " ++ File)]), 75 | {ok, FileBin} = file:read_file(File), 76 | cluster_info:format(C, "File contents:\n~s\n", [FileBin]) 77 | end || File <- [AppPath, VmPath]]. 78 | 79 | -------------------------------------------------------------------------------- /ebin/riak_core.app: -------------------------------------------------------------------------------- 1 | %% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*- 2 | %% ex: ts=4 sw=4 et 3 | {application, riak_core, 4 | [ 5 | {description, "Riak Core"}, 6 | {vsn, "0.14.0"}, 7 | {modules, [ 8 | app_helper, 9 | bloom, 10 | chash, 11 | gen_nb_server, 12 | gen_server2, 13 | json_pp, 14 | merkerl, 15 | priority_queue, 16 | process_proxy, 17 | riak_core, 18 | riak_core_apl, 19 | riak_core_app, 20 | riak_core_bucket, 21 | riak_core_cinfo_core, 22 | riak_core_claim, 23 | riak_core_config, 24 | riak_core_eventhandler_guard, 25 | riak_core_eventhandler_sup, 26 | riak_core_gossip, 27 | riak_core_handoff_listener, 28 | riak_core_handoff_manager, 29 | riak_core_handoff_receiver, 30 | riak_core_handoff_sender, 31 | riak_core_node_watcher, 32 | riak_core_node_watcher_events, 33 | riak_core_pb, 34 | riak_core_ring, 35 | riak_core_ring_events, 36 | riak_core_ring_handler, 37 | riak_core_ring_manager, 38 | riak_core_ring_util, 39 | riak_core_sup, 40 | riak_core_sysmon_handler, 41 | riak_core_sysmon_minder, 42 | riak_core_tracer, 43 | riak_core_test_util, 44 | riak_core_util, 45 | riak_core_vnode, 46 | riak_core_vnode_master, 47 | riak_core_vnode_sup, 48 | riak_core_web, 49 | riak_core_wm_urlmap, 50 | slide, 51 | spiraltime, 52 | vclock 53 | ]}, 54 | {registered, []}, 55 | {applications, [ 56 | kernel, 57 | stdlib, 58 | sasl, 59 | crypto, 60 | riak_sysmon, 61 | webmachine 62 | ]}, 63 | {mod, { riak_core_app, []}}, 64 | {env, [ 65 | %% Cluster name 66 | {cluster_name, "default"}, 67 | 68 | %% Default location of ringstate 69 | {ring_state_dir, "data/ring"}, 70 | 71 | %% Default ring creation size. Make sure it is a power of 2, 72 | %% e.g. 16, 32, 64, 128, 256, 512 etc 73 | {ring_creation_size, 64}, 74 | 75 | %% Default gossip interval (milliseconds) 76 | {gossip_interval, 60000}, 77 | 78 | %% Target N value 79 | {target_n_val, 4}, 80 | 81 | %% Default claims functions 82 | {wants_claim_fun, {riak_core_claim, default_wants_claim}}, 83 | {choose_claim_fun, {riak_core_claim, default_choose_claim}}, 84 | 85 | %% Vnode inactivity timeout (how often to check if fallback vnodes 86 | %% should return their data) in ms. 87 | {vnode_inactivity_timeout, 60000}, 88 | 89 | %% Number of VNodes allowed to do handoff concurrently. 90 | {handoff_concurrency, 4}, 91 | 92 | %% Disable Nagle on HTTP sockets 93 | {disable_http_nagle, false}, 94 | 95 | %% Handoff IP/port 96 | {handoff_port, 8099}, 97 | {handoff_ip, "0.0.0.0"} 98 | ]} 99 | ]}. 100 | -------------------------------------------------------------------------------- /src/riak_core_ring_events.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core_ring_events). 23 | 24 | -behaviour(gen_event). 25 | 26 | %% API 27 | -export([start_link/0, 28 | add_handler/2, 29 | add_sup_handler/2, 30 | add_guarded_handler/2, 31 | add_callback/1, 32 | add_sup_callback/1, 33 | add_guarded_callback/1, 34 | ring_update/1, 35 | force_update/0, 36 | ring_sync_update/1, 37 | force_sync_update/0]). 38 | 39 | %% gen_event callbacks 40 | -export([init/1, handle_event/2, handle_call/2, 41 | handle_info/2, terminate/2, code_change/3]). 42 | 43 | -record(state, { callback }). 44 | 45 | %% =================================================================== 46 | %% API functions 47 | %% =================================================================== 48 | 49 | start_link() -> 50 | gen_event:start_link({local, ?MODULE}). 51 | 52 | add_handler(Handler, Args) -> 53 | gen_event:add_handler(?MODULE, Handler, Args). 54 | 55 | add_sup_handler(Handler, Args) -> 56 | gen_event:add_sup_handler(?MODULE, Handler, Args). 57 | 58 | add_guarded_handler(Handler, Args) -> 59 | riak_core:add_guarded_event_handler(?MODULE, Handler, Args). 60 | 61 | add_callback(Fn) when is_function(Fn) -> 62 | gen_event:add_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 63 | 64 | add_sup_callback(Fn) when is_function(Fn) -> 65 | gen_event:add_sup_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 66 | 67 | add_guarded_callback(Fn) when is_function(Fn) -> 68 | riak_core:add_guarded_event_handler(?MODULE, {?MODULE, make_ref()}, [Fn]). 69 | 70 | force_update() -> 71 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 72 | ring_update(Ring). 73 | 74 | ring_update(Ring) -> 75 | gen_event:notify(?MODULE, {ring_update, Ring}). 76 | 77 | force_sync_update() -> 78 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 79 | ring_sync_update(Ring). 80 | 81 | ring_sync_update(Ring) -> 82 | gen_event:sync_notify(?MODULE, {ring_update, Ring}). 83 | 84 | %% =================================================================== 85 | %% gen_event callbacks 86 | %% =================================================================== 87 | 88 | init([Fn]) -> 89 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 90 | Fn(Ring), 91 | {ok, #state { callback = Fn }}. 92 | 93 | handle_event({ring_update, Ring}, State) -> 94 | (State#state.callback)(Ring), 95 | {ok, State}. 96 | 97 | handle_call(_Request, State) -> 98 | {ok, ok, State}. 99 | 100 | handle_info(_Info, State) -> 101 | {ok, State}. 102 | 103 | terminate(_Reason, _State) -> 104 | ok. 105 | 106 | code_change(_OldVsn, State, _Extra) -> 107 | {ok, State}. 108 | 109 | -------------------------------------------------------------------------------- /test/test_guarded_event_handler.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% test_guarded_event_handler 4 | %% 5 | %% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(test_guarded_event_handler). 23 | -behaviour(gen_event). 24 | -export([start_link/0]). 25 | -export([init/1, handle_event/2, handle_call/2, 26 | handle_info/2, terminate/2, code_change/3]). 27 | -export([get_events/0]). 28 | -record(state, {events=[]}). 29 | 30 | -include_lib("eunit/include/eunit.hrl"). 31 | 32 | start_link() -> 33 | gen_event:start_link({local, ?MODULE}). 34 | 35 | get_events() -> 36 | gen_event:call(?MODULE, ?MODULE, get_events). 37 | 38 | init([]) -> 39 | {ok, #state{}}. 40 | 41 | handle_event({event, E}, State=#state{events=Events}) -> 42 | {ok, State#state{events=[E|Events]}}; 43 | handle_event(crash, State) -> 44 | exit(crash), 45 | {ok, State}. 46 | 47 | handle_call(get_events, State) -> 48 | {ok, State#state.events, State}. 49 | 50 | handle_info(_Info, State) -> 51 | {ok, State}. 52 | 53 | terminate(Reason, _State) -> 54 | Reason. 55 | 56 | code_change(_OldVsn, State, _Extra) -> 57 | {ok, State}. 58 | 59 | -ifdef(TEST). 60 | 61 | guarded_handler_test_() -> 62 | { setup, local, 63 | fun setup/0, 64 | fun cleanup/1, 65 | [ 66 | fun guarded_handler_test_case/0 67 | ] 68 | }. 69 | 70 | setup() -> 71 | riak_core_eventhandler_sup:start_link(), 72 | ?MODULE:start_link(). 73 | 74 | cleanup(_Pid) -> 75 | %% ARG: ugly hack to not die when the supervisor exits. 76 | process_flag(trap_exit, true), 77 | gen_event:stop(?MODULE). 78 | 79 | wait_for_exitfun() -> 80 | receive 81 | {?MODULE, {'EXIT', crash}} -> 82 | ok 83 | after 5000 -> 84 | fail 85 | end. 86 | 87 | guarded_handler_test_case() -> 88 | Self = self(), 89 | F = fun(Handler, Reason) -> 90 | Self ! {Handler, Reason} 91 | end, 92 | riak_core:add_guarded_event_handler(?MODULE, ?MODULE, [], F), 93 | gen_event:notify(?MODULE, {event, foo}), 94 | ?assertEqual(?MODULE:get_events(), [foo]), 95 | gen_event:notify(?MODULE, crash), 96 | ?assertEqual(wait_for_exitfun(), ok), 97 | wait_for_handler(?MODULE, 1000, 100), 98 | gen_event:notify(?MODULE, {event, baz}), 99 | ?assertEqual(?MODULE:get_events(), [baz]), 100 | ?assertEqual(riak_core:delete_guarded_event_handler(?MODULE,?MODULE,quux), quux), 101 | ?assertNot(lists:member(?MODULE, gen_event:which_handlers(?MODULE))), 102 | ?assertEqual([], supervisor:which_children(riak_core_eventhandler_sup)). 103 | 104 | wait_for_handler(_, 0, _) -> 105 | fail; 106 | wait_for_handler(Name, Count, Sleep) -> 107 | case lists:member(Name, gen_event:which_handlers(?MODULE)) of 108 | true -> ok; 109 | false -> 110 | timer:sleep(Sleep), 111 | wait_for_handler(Name, Count - 1, Sleep); 112 | _ -> 113 | ok 114 | end. 115 | 116 | -endif. 117 | -------------------------------------------------------------------------------- /src/riak_core_app.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | -module(riak_core_app). 24 | 25 | -behaviour(application). 26 | 27 | %% Application callbacks 28 | -export([start/2, stop/1]). 29 | 30 | %% =================================================================== 31 | %% Application callbacks 32 | %% =================================================================== 33 | 34 | start(_StartType, _StartArgs) -> 35 | %% Don't add our system_monitor event handler here. Instead, let 36 | %% riak_core_sysmon_minder start it, because that process can act 37 | %% on any handler crash notification, whereas we cannot. 38 | 39 | %% Validate that the ring state directory exists 40 | riak_core_util:start_app_deps(riak_core), 41 | RingStateDir = app_helper:get_env(riak_core, ring_state_dir), 42 | case filelib:ensure_dir(filename:join(RingStateDir, "dummy")) of 43 | ok -> 44 | ok; 45 | {error, RingReason} -> 46 | error_logger:error_msg( 47 | "Ring state directory ~p does not exist, " 48 | "and could not be created. (reason: ~p)\n", 49 | [RingStateDir, RingReason]), 50 | throw({error, invalid_ring_state_dir}) 51 | end, 52 | 53 | %% Register our cluster_info app callback modules, with catch if 54 | %% the app is missing or packaging is broken. 55 | catch cluster_info:register_app(riak_core_cinfo_core), 56 | 57 | %% add these defaults now to supplement the set that may have been 58 | %% configured in app.config 59 | riak_core_bucket:append_bucket_defaults( 60 | [{n_val,3}, 61 | {allow_mult,false}, 62 | {last_write_wins,false}, 63 | {precommit, []}, 64 | {postcommit, []}, 65 | {chash_keyfun, {riak_core_util, chash_std_keyfun}}]), 66 | 67 | %% Spin up the supervisor; prune ring files as necessary 68 | case riak_core_sup:start_link() of 69 | {ok, Pid} -> 70 | ok = riak_core_ring_events:add_guarded_handler(riak_core_ring_handler, []), 71 | %% App is running; search for latest ring file and initialize with it 72 | riak_core_ring_manager:prune_ringfiles(), 73 | case riak_core_ring_manager:find_latest_ringfile() of 74 | {ok, RingFile} -> 75 | Ring = riak_core_ring_manager:read_ringfile(RingFile), 76 | riak_core_ring_manager:set_my_ring(Ring); 77 | {error, not_found} -> 78 | riak_core_ring_manager:write_ringfile(), 79 | error_logger:warning_msg("No ring file available.\n"); 80 | {error, Reason} -> 81 | error_logger:error_msg("Failed to load ring file: ~p\n", 82 | [Reason]), 83 | throw({error, Reason}) 84 | end, 85 | {ok, Pid}; 86 | {error, Reason} -> 87 | {error, Reason} 88 | end. 89 | 90 | stop(_State) -> 91 | ok. 92 | -------------------------------------------------------------------------------- /src/riak_core_web.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core_web: setup Riak's HTTP interface 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc Convenience functions for setting up the HTTP interface 24 | %% of Riak. This module loads parameters from the application 25 | %% environment: 26 | %% 27 | %%
web_ip 28 | %%
IP address that the Webmachine node should listen to 29 | %%
web_port 30 | %%
port that the Webmachine node should listen to 31 | %%
web_logdir 32 | %%
directory under which the access log will be stored 33 | %%
34 | -module(riak_core_web). 35 | 36 | -export([bindings/1, 37 | old_binding/0]). 38 | 39 | bindings(Scheme) -> 40 | Pairs = app_helper:get_env(riak_core, Scheme, []), 41 | [binding_config(Scheme, Pair) || Pair <- Pairs]. 42 | 43 | %% read the old, unwrapped web_ip and web_port config 44 | old_binding() -> 45 | case {app_helper:get_env(riak_core, web_ip), 46 | app_helper:get_env(riak_core, web_port)} of 47 | {IP, Port} when IP /= undefined, 48 | Port /= undefined -> 49 | error_logger:warning_msg( 50 | "app.config is using old-style {web_ip, ~p} and" 51 | " {web_port, ~p} settings in its riak_core configuration.~n" 52 | "These are now deprecated, and will be removed in a" 53 | " future version of Riak.~n" 54 | "Please migrate to the new-style riak_core configuration" 55 | " of {http, [{~p, ~p}]}.~n", 56 | [IP, Port, IP, Port]), 57 | [binding_config(http, {IP, Port})]; 58 | _ -> 59 | %% do not start the HTTP interface if any part of its 60 | %% config is missing (maintains 0.13 behavior) 61 | [] 62 | end. 63 | 64 | binding_config(Scheme, Binding) -> 65 | {Ip, Port} = Binding, 66 | Name = spec_name(Scheme, Ip, Port), 67 | Config = spec_from_binding(Scheme, Name, Binding), 68 | 69 | {Name, 70 | {webmachine_mochiweb, start, [Config]}, 71 | permanent, 5000, worker, dynamic}. 72 | 73 | spec_from_binding(http, Name, Binding) -> 74 | {Ip, Port} = Binding, 75 | NoDelay = app_helper:get_env(riak_core, disable_http_nagle, false), 76 | lists:flatten([{name, Name}, 77 | {ip, Ip}, 78 | {port, Port}, 79 | {nodelay, NoDelay}], 80 | common_config()); 81 | 82 | spec_from_binding(https, Name, Binding) -> 83 | {Ip, Port} = Binding, 84 | SslOpts = app_helper:get_env(riak_core, ssl, 85 | [{certfile, "etc/cert.pem"}, {keyfile, "etc/key.pem"}]), 86 | NoDelay = app_helper:get_env(riak_core, disable_http_nagle, false), 87 | lists:flatten([{name, Name}, 88 | {ip, Ip}, 89 | {port, Port}, 90 | {ssl, true}, 91 | {ssl_opts, SslOpts}, 92 | {nodelay, NoDelay}], 93 | common_config()). 94 | 95 | spec_name(Scheme, Ip, Port) -> 96 | atom_to_list(Scheme) ++ "_" ++ Ip ++ ":" ++ integer_to_list(Port). 97 | 98 | common_config() -> 99 | [{log_dir, app_helper:get_env(riak_core, http_logdir, "log")}, 100 | {backlog, 128}, 101 | {dispatch, [{[], riak_core_wm_urlmap, []}]}]. 102 | -------------------------------------------------------------------------------- /test/mock_vnode.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% mock_vnode: mock vnode for unit testing 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc mock vnode for unit testing 24 | 25 | -module(mock_vnode). 26 | %TODO: Work out why this gives a warning 27 | %-behavior(riak_core_vnode). 28 | -export([start_vnode/1, 29 | get_index/1, 30 | get_counter/1, 31 | neverreply/1, 32 | returnreply/1, 33 | latereply/1, 34 | crash/1, 35 | get_crash_reason/1, 36 | stop/1]). 37 | -export([init/1, 38 | handle_command/3, 39 | terminate/2, 40 | handle_exit/3]). 41 | 42 | -record(state, {index, counter, crash_reason}). 43 | 44 | -define(MASTER, mock_vnode_master). 45 | 46 | %% API 47 | start_vnode(I) -> 48 | % io:format("Starting vnode ~p\n", [I]), 49 | riak_core_vnode_master:start_vnode(I, ?MODULE). 50 | 51 | get_index(Preflist) -> 52 | riak_core_vnode_master:sync_command(Preflist, get_index, ?MASTER). 53 | 54 | get_counter(Preflist) -> 55 | riak_core_vnode_master:sync_command(Preflist, get_counter, ?MASTER). 56 | 57 | neverreply(Preflist) -> 58 | riak_core_vnode_master:command(Preflist, neverreply, ?MASTER). 59 | 60 | returnreply(Preflist) -> 61 | Ref = {neverreply, make_ref()}, 62 | riak_core_vnode_master:command(Preflist, returnreply, {raw, Ref, self()}, ?MASTER), 63 | {ok, Ref}. 64 | 65 | latereply(Preflist) -> 66 | Ref = {latereply, make_ref()}, 67 | riak_core_vnode_master:command(Preflist, latereply, {raw, Ref, self()}, ?MASTER), 68 | {ok, Ref}. 69 | 70 | crash(Preflist) -> 71 | riak_core_vnode_master:sync_command(Preflist, crash, ?MASTER). 72 | 73 | get_crash_reason(Preflist) -> 74 | riak_core_vnode_master:sync_command(Preflist, get_crash_reason, ?MASTER). 75 | 76 | stop(Preflist) -> 77 | riak_core_vnode_master:sync_command(Preflist, stop, ?MASTER). 78 | 79 | 80 | %% Callbacks 81 | 82 | init([Index]) -> 83 | {ok, #state{index=Index,counter=0}}. 84 | 85 | handle_command(get_index, _Sender, State) -> 86 | {reply, {ok, State#state.index}, State}; 87 | handle_command(get_counter, _Sender, State) -> 88 | {reply, {ok, State#state.counter}, State}; 89 | handle_command(get_crash_reason, _Sender, State) -> 90 | {reply, {ok, State#state.crash_reason}, State}; 91 | 92 | handle_command(crash, _Sender, State) -> 93 | spawn_link(fun() -> exit(State#state.index) end), 94 | {reply, ok, State}; 95 | handle_command(stop, Sender, State = #state{counter=Counter}) -> 96 | %% Send reply here as vnode_master:sync_command does a call 97 | %% which is cast on to the vnode process. Returning {stop,...} 98 | %% does not provide for returning a response. 99 | riak_core_vnode:reply(Sender, stopped), 100 | {stop, normal, State#state{counter = Counter + 1}}; 101 | handle_command(neverreply, _Sender, State = #state{counter=Counter}) -> 102 | {noreply, State#state{counter = Counter + 1}}; 103 | handle_command(returnreply, _Sender, State = #state{counter=Counter}) -> 104 | {reply, returnreply, State#state{counter = Counter + 1}}; 105 | handle_command(latereply, Sender, State = #state{counter=Counter}) -> 106 | spawn(fun() -> 107 | timer:sleep(1), 108 | riak_core_vnode:reply(Sender, latereply) 109 | end), 110 | {noreply, State#state{counter = Counter + 1}}. 111 | handle_exit(_Pid, Reason, State) -> 112 | {noreply, State#state{crash_reason=Reason}}. 113 | 114 | terminate(_Reason, _State) -> 115 | ok. 116 | -------------------------------------------------------------------------------- /src/riak_core_bucket.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc Functions for manipulating bucket properties. 24 | %% @type riak_core_bucketprops() = [{Propkey :: atom(), Propval :: term()}] 25 | 26 | -module(riak_core_bucket). 27 | 28 | -export([append_bucket_defaults/1, 29 | set_bucket/2, 30 | get_bucket/1, 31 | get_bucket/2]). 32 | 33 | -ifdef(TEST). 34 | -include_lib("eunit/include/eunit.hrl"). 35 | -endif. 36 | 37 | %% @doc Add a list of defaults to global list of defaults for new 38 | %% buckets. If any item is in Items is already set in the 39 | %% current defaults list, the new setting is omitted, and the old 40 | %% setting is kept. Omitting the new setting is intended 41 | %% behavior, to allow settings from app.config to override any 42 | %% hard-coded values. 43 | append_bucket_defaults(Items) when is_list(Items) -> 44 | OldDefaults = app_helper:get_env(riak_core, default_bucket_props, []), 45 | NewDefaults = merge_props(OldDefaults, Items), 46 | application:set_env(riak_core, default_bucket_props, NewDefaults). 47 | 48 | 49 | %% @spec set_bucket(riak_object:bucket(), BucketProps::riak_core_bucketprops()) -> ok 50 | %% @doc Set the given BucketProps in Bucket. 51 | set_bucket(Name, BucketProps) -> 52 | F = fun(Ring, _Args) -> 53 | OldBucket = get_bucket(Name), 54 | NewBucket = merge_props(BucketProps, OldBucket), 55 | {new_ring, riak_core_ring:update_meta({bucket,Name}, 56 | NewBucket, 57 | Ring)} 58 | end, 59 | {ok, _NewRing} = riak_core_ring_manager:ring_trans(F, undefined), 60 | ok. 61 | 62 | %% @spec merge_props(list(), list()) -> list() 63 | %% @doc Merge two sets of bucket props. If duplicates exist, the 64 | %% entries in Overriding are chosen before those in Other. 65 | merge_props(Overriding, Other) -> 66 | lists:ukeymerge(1, lists:ukeysort(1, Overriding), 67 | lists:ukeysort(1, Other)). 68 | 69 | %% @spec get_bucket(riak_object:bucket()) -> 70 | %% {ok, BucketProps :: riak_core_bucketprops()} 71 | %% @doc Return the complete current list of properties for Bucket. 72 | %% Properties include but are not limited to: 73 | %%
 74 | %% n_val: how many replicas of objects in this bucket (default: 3)
 75 | %% allow_mult: can objects in this bucket have siblings? (default: false)
 76 | %% linkfun: a function returning a m/r FunTerm for link extraction
 77 | %% 
78 | %% 79 | get_bucket(Name) -> 80 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 81 | get_bucket(Name, Ring). 82 | 83 | 84 | %% @spec get_bucket(Name, Ring::riak_core_ring:riak_core_ring()) -> 85 | %% BucketProps :: riak_core_bucketprops() 86 | %% @private 87 | get_bucket(Name, Ring) -> 88 | case riak_core_ring:get_meta({bucket, Name}, Ring) of 89 | undefined -> 90 | [{name, Name} 91 | |app_helper:get_env(riak_core, default_bucket_props)]; 92 | {ok, Bucket} -> Bucket 93 | end. 94 | 95 | 96 | %% =================================================================== 97 | %% EUnit tests 98 | %% =================================================================== 99 | -ifdef(TEST). 100 | 101 | simple_set_test() -> 102 | application:load(riak_core), 103 | %% appending an empty list of defaults makes up for the fact that 104 | %% riak_core_app:start/2 is not called during eunit runs 105 | %% (that's where the usual defaults are set at startup), 106 | %% while also not adding any trash that might affect other tests 107 | append_bucket_defaults([]), 108 | riak_core_ring_events:start_link(), 109 | riak_core_ring_manager:start_link(test), 110 | ok = set_bucket(a_bucket,[{key,value}]), 111 | Bucket = get_bucket(a_bucket), 112 | riak_core_ring_manager:stop(), 113 | ?assertEqual(value, proplists:get_value(key, Bucket)). 114 | 115 | -endif. 116 | -------------------------------------------------------------------------------- /src/riak_core_ring_handler.erl: -------------------------------------------------------------------------------- 1 | %% This file is provided to you under the Apache License, 2 | %% Version 2.0 (the "License"); you may not use this file 3 | %% except in compliance with the License. You may obtain 4 | %% a copy of the License at 5 | %% 6 | %% http://www.apache.org/licenses/LICENSE-2.0 7 | %% 8 | %% Unless required by applicable law or agreed to in writing, 9 | %% software distributed under the License is distributed on an 10 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | %% KIND, either express or implied. See the License for the 12 | %% specific language governing permissions and limitations 13 | %% under the License. 14 | 15 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 16 | 17 | -module(riak_core_ring_handler). 18 | -behaviour(gen_event). 19 | 20 | %% gen_event callbacks 21 | -export([init/1, handle_event/2, handle_call/2, 22 | handle_info/2, terminate/2, code_change/3]). 23 | -record(state, {}). 24 | 25 | 26 | %% =================================================================== 27 | %% gen_event callbacks 28 | %% =================================================================== 29 | 30 | init([]) -> 31 | %% Pull the initial ring and make sure all vnodes are started 32 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 33 | ensure_vnodes_started(Ring), 34 | {ok, #state{}}. 35 | 36 | handle_event({ring_update, Ring}, State) -> 37 | %% Make sure all vnodes are started... 38 | ensure_vnodes_started(Ring), 39 | {ok, State}. 40 | 41 | handle_call(_Event, State) -> 42 | {ok, ok, State}. 43 | 44 | handle_info(_Info, State) -> 45 | {ok, State}. 46 | 47 | terminate(_Reason, _State) -> 48 | ok. 49 | 50 | code_change(_OldVsn, State, _Extra) -> 51 | {ok, State}. 52 | 53 | 54 | 55 | %% =================================================================== 56 | %% Internal functions 57 | %% =================================================================== 58 | 59 | ensure_vnodes_started(Ring) -> 60 | case riak_core:vnode_modules() of 61 | [] -> 62 | ok; 63 | AppMods -> 64 | case ensure_vnodes_started(AppMods, Ring, []) of 65 | [] -> riak_core_ring_manager:refresh_my_ring(); 66 | _ -> ok 67 | end 68 | end. 69 | 70 | ensure_vnodes_started([], _Ring, Acc) -> 71 | lists:flatten(Acc); 72 | ensure_vnodes_started([{App, Mod}|T], Ring, Acc) -> 73 | ensure_vnodes_started(T, Ring, [ensure_vnodes_started({App,Mod},Ring)|Acc]). 74 | 75 | ensure_vnodes_started({App,Mod}, Ring) -> 76 | Startable = startable_vnodes(Mod, Ring), 77 | %% NOTE: This following is a hack. There's a basic 78 | %% dependency/race between riak_core (want to start vnodes 79 | %% right away to trigger possible handoffs) and riak_kv 80 | %% (needed to support those vnodes). The hack does not fix 81 | %% that dependency: internal techdebt todo list #A7 does. 82 | spawn_link(fun() -> 83 | %% Use a registered name as a lock to prevent the same 84 | %% vnode module from being started twice. 85 | RegName = list_to_atom( 86 | "riak_core_ring_handler_ensure_" 87 | ++ atom_to_list(Mod)), 88 | try register(RegName, self()) 89 | catch error:badarg -> 90 | exit(normal) 91 | end, 92 | wait_for_app(App, 100, 100), 93 | [Mod:start_vnode(I) || I <- Startable], 94 | exit(normal) 95 | end), 96 | Startable. 97 | 98 | startable_vnodes(Mod, Ring) -> 99 | AllMembers = riak_core_ring:all_members(Ring), 100 | case {length(AllMembers), hd(AllMembers) =:= node()} of 101 | {1, true} -> 102 | riak_core_ring:my_indices(Ring); 103 | _ -> 104 | {ok, Excl} = riak_core_handoff_manager:get_exclusions(Mod), 105 | case riak_core_ring:random_other_index(Ring, Excl) of 106 | no_indices -> 107 | case length(Excl) =:= riak_core_ring:num_partitions(Ring) of 108 | true -> 109 | []; 110 | false -> 111 | riak_core_ring:my_indices(Ring) 112 | end; 113 | RO -> 114 | [RO | riak_core_ring:my_indices(Ring)] 115 | end 116 | end. 117 | 118 | wait_for_app(_, 0, _) -> 119 | bummer; 120 | wait_for_app(App, Count, Sleep) -> 121 | case lists:keymember(App, 1, application:which_applications()) of 122 | true -> 123 | ok; 124 | false -> 125 | timer:sleep(Sleep), 126 | wait_for_app(App, Count - 1, Sleep) 127 | end. 128 | -------------------------------------------------------------------------------- /src/spiraltime.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc A set of sliding windows for recording N-per-second running stats. 24 | %% 25 | %% This keeps stats per second for the last minute. 26 | %% 27 | %% See git commit history for versions of this module which keep stats 28 | %% for more than 1 minute. 29 | 30 | -module(spiraltime). 31 | -author('Justin Sheehy '). 32 | -export([fresh/0,fresh/1,n/0,incr/2,incr/3, 33 | rep_second/1,rep_minute/1, 34 | test_spiraltime/0]). 35 | 36 | %% @type moment() = integer(). 37 | %% This is a number of seconds, as produced by 38 | %% calendar:datetime_to_gregorian_seconds(calendar:universal_time()) 39 | 40 | %% @type count() = integer(). 41 | %% The number of entries recorded in some time period. 42 | 43 | -record(spiral, {moment :: integer(), 44 | seconds :: [integer()] 45 | }). 46 | 47 | n() -> 48 | calendar:datetime_to_gregorian_seconds(calendar:universal_time()). 49 | 50 | %% @doc Create an empty spiral with which to begin recording entries. 51 | %% @spec fresh() -> spiral() 52 | fresh() -> 53 | fresh(n()). 54 | 55 | %% @doc Create an empty spiral with which to begin recording entries. 56 | %% @spec fresh(moment()) -> spiral() 57 | fresh(Moment) -> 58 | #spiral{moment=Moment, 59 | seconds=[0 || _ <- lists:seq(1,60)] 60 | }. 61 | 62 | fieldlen(#spiral.seconds) -> 60. 63 | 64 | nextfield(#spiral.seconds) -> done. 65 | 66 | %% @doc Produce the number of entries recorded in the last second. 67 | %% @spec rep_second(spiral()) -> {moment(), count()} 68 | rep_second(Spiral) -> 69 | {Spiral#spiral.moment, hd(Spiral#spiral.seconds)}. 70 | 71 | %% @doc Produce the number of entries recorded in the last minute. 72 | %% @spec rep_minute(spiral()) -> {moment(), count()} 73 | rep_minute(Spiral) -> 74 | {Minute,_} = lists:split(60,Spiral#spiral.seconds), 75 | {Spiral#spiral.moment, lists:sum(Minute)}. 76 | 77 | %% @doc Add N to the counter of events, as recently as possible. 78 | %% @spec incr(count(), spiral()) -> spiral() 79 | incr(N, Spiral) -> incr(N,n(),Spiral). 80 | 81 | %% @doc Add N to the counter of events occurring at Moment. 82 | %% @spec incr(count(), moment(), spiral()) -> spiral() 83 | incr(N, Moment, Spiral) when Spiral#spiral.moment =:= Moment -> 84 | % common case -- updates for "now" 85 | Spiral#spiral{seconds=[hd(Spiral#spiral.seconds)+N| 86 | tl(Spiral#spiral.seconds)]}; 87 | incr(_N, Moment, Spiral) when Spiral#spiral.moment - Moment > 60 -> 88 | Spiral; % updates more than a minute old are dropped! whee! 89 | incr(N, Moment, Spiral) -> 90 | S1 = update_moment(Moment, Spiral), 91 | {Front,Back} = lists:split(S1#spiral.moment - Moment, 92 | S1#spiral.seconds), 93 | S1#spiral{seconds=Front ++ [hd(Back)+N|tl(Back)]}. 94 | 95 | update_moment(Moment, Spiral) when Moment =< Spiral#spiral.moment -> 96 | Spiral; 97 | update_moment(Moment, Spiral) when Moment - Spiral#spiral.moment > 36288000 -> 98 | fresh(Moment); 99 | update_moment(Moment, Spiral) -> 100 | update_moment(Moment, push(0, Spiral#spiral{ 101 | moment=Spiral#spiral.moment+1}, 102 | #spiral.seconds)). 103 | 104 | getfield(Spiral,Field) -> element(Field, Spiral). 105 | setfield(Spiral,X,Field) -> setelement(Field, Spiral, X). 106 | 107 | push(_N, Spiral, done) -> 108 | Spiral; 109 | push(N, Spiral, Field) -> 110 | Full = [N|getfield(Spiral,Field)], 111 | Double = 2 * fieldlen(Field), 112 | case length(Full) of 113 | Double -> 114 | {Keep, _Past} = lists:split(fieldlen(Field), Full), 115 | push(lists:sum(Keep),setfield(Spiral,Keep,Field),nextfield(Field)); 116 | _ -> 117 | setfield(Spiral,Full,Field) 118 | end. 119 | 120 | test_spiraltime() -> 121 | Start = n(), 122 | S0 = fresh(Start), 123 | S1 = incr(17, Start, S0), 124 | PlusOne = Start+1, 125 | S2 = incr(3, PlusOne, S1), 126 | {PlusOne, 3} = rep_second(S2), 127 | {PlusOne, 20} = rep_minute(S2), 128 | true. 129 | -------------------------------------------------------------------------------- /src/riak_core.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Riak: A lightweight, decentralized key-value store. 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | -module(riak_core). 23 | -export([stop/0, stop/1]). 24 | -export([register_vnode_module/1, vnode_modules/0]). 25 | -export([add_guarded_event_handler/3, add_guarded_event_handler/4]). 26 | -export([delete_guarded_event_handler/3]). 27 | 28 | %% @spec stop() -> ok 29 | %% @doc Stop the riak application and the calling process. 30 | stop() -> stop("riak stop requested"). 31 | 32 | -ifdef(TEST). 33 | stop(Reason) -> 34 | error_logger:info_msg(io_lib:format("~p~n",[Reason])), 35 | % if we're in test mode, we don't want to halt the node, so instead 36 | % we just stop the application. 37 | application:stop(riak_core). 38 | -else. 39 | stop(Reason) -> 40 | % we never do an application:stop because that makes it very hard 41 | % to really halt the runtime, which is what we need here. 42 | error_logger:info_msg(io_lib:format("~p~n",[Reason])), 43 | init:stop(). 44 | -endif. 45 | 46 | vnode_modules() -> 47 | case application:get_env(riak_core, vnode_modules) of 48 | undefined -> []; 49 | {ok, Mods} -> Mods 50 | end. 51 | 52 | register_vnode_module(VNodeMod) when is_atom(VNodeMod) -> 53 | {ok, App} = case application:get_application(self()) of 54 | {ok, AppName} -> {ok, AppName}; 55 | undefined -> app_for_module(VNodeMod) 56 | end, 57 | case application:get_env(riak_core, vnode_modules) of 58 | undefined -> 59 | application:set_env(riak_core, vnode_modules, [{App,VNodeMod}]); 60 | {ok, Mods} -> 61 | application:set_env(riak_core, vnode_modules, [{App,VNodeMod}|Mods]) 62 | end, 63 | riak_core_ring_events:force_sync_update(). 64 | 65 | %% @spec add_guarded_event_handler(HandlerMod, Handler, Args) -> AddResult 66 | %% HandlerMod = module() 67 | %% Handler = module() | {module(), term()} 68 | %% Args = list() 69 | %% AddResult = ok | {error, Reason::term()} 70 | add_guarded_event_handler(HandlerMod, Handler, Args) -> 71 | add_guarded_event_handler(HandlerMod, Handler, Args, undefined). 72 | 73 | %% @spec add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> AddResult 74 | %% HandlerMod = module() 75 | %% Handler = module() | {module(), term()} 76 | %% Args = list() 77 | %% ExitFun = fun(Handler, Reason::term()) 78 | %% AddResult = ok | {error, Reason::term()} 79 | %% 80 | %% @doc Add a "guarded" event handler to a gen_event instance. 81 | %% A guarded handler is implemented as a supervised gen_server 82 | %% (riak_core_eventhandler_guard) that adds a supervised handler in its 83 | %% init() callback and exits when the handler crashes so it can be 84 | %% restarted by the supervisor. 85 | add_guarded_event_handler(HandlerMod, Handler, Args, ExitFun) -> 86 | riak_core_eventhandler_sup:start_guarded_handler(HandlerMod, Handler, Args, ExitFun). 87 | 88 | %% @spec delete_guarded_event_handler(HandlerMod, Handler, Args) -> Result 89 | %% HandlerMod = module() 90 | %% Handler = module() | {module(), term()} 91 | %% Args = term() 92 | %% Result = term() | {error, module_not_found} | {'EXIT', Reason} 93 | %% Reason = term() 94 | %% 95 | %% @doc Delete a guarded event handler from a gen_event instance. 96 | %% 97 | %% Args is an arbitrary term which is passed as one of the arguments to 98 | %% Module:terminate/2. 99 | %% 100 | %% The return value is the return value of Module:terminate/2. If the 101 | %% specified event handler is not installed, the function returns 102 | %% {error,module_not_found}. If the callback function fails with Reason, 103 | %% the function returns {'EXIT',Reason}. 104 | delete_guarded_event_handler(HandlerMod, Handler, Args) -> 105 | riak_core_eventhandler_sup:stop_guarded_handler(HandlerMod, Handler, Args). 106 | 107 | app_for_module(Mod) -> 108 | app_for_module(application:which_applications(), Mod). 109 | 110 | app_for_module([], _Mod) -> 111 | undefined; 112 | app_for_module([{App,_,_}|T], Mod) -> 113 | {ok, Mods} = application:get_key(App, modules), 114 | case lists:member(Mod, Mods) of 115 | true -> {ok, App}; 116 | false -> app_for_module(T, Mod) 117 | end. 118 | -------------------------------------------------------------------------------- /src/riak_core_handoff_receiver.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_handoff_receiver: incoming data handler for TCP-based handoff 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc incoming data handler for TCP-based handoff 24 | 25 | -module(riak_core_handoff_receiver). 26 | -include_lib("riak_core_handoff.hrl"). 27 | -behaviour(gen_server2). 28 | -export([start_link/0, % Don't use SSL 29 | start_link/1, % SSL options list, empty=no SSL 30 | set_socket/2]). 31 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 32 | terminate/2, code_change/3]). 33 | 34 | -record(state, {sock :: port(), 35 | ssl_opts :: [] | list(), 36 | tcp_mod :: atom(), 37 | partition :: non_neg_integer(), 38 | vnode_mod = riak_kv_vnode:: module(), 39 | vnode :: pid(), 40 | count = 0 :: non_neg_integer()}). 41 | 42 | 43 | start_link() -> 44 | start_link([]). 45 | 46 | start_link(SslOpts) -> 47 | gen_server2:start_link(?MODULE, [SslOpts], []). 48 | 49 | set_socket(Pid, Socket) -> 50 | gen_server2:call(Pid, {set_socket, Socket}). 51 | 52 | init([SslOpts]) -> 53 | {ok, #state{ssl_opts = SslOpts, 54 | tcp_mod = if SslOpts /= [] -> ssl; 55 | true -> gen_tcp 56 | end}}. 57 | 58 | handle_call({set_socket, Socket0}, _From, State = #state{ssl_opts = SslOpts}) -> 59 | SockOpts = [{active, once}, {packet, 4}, {header, 1}], 60 | Socket = if SslOpts /= [] -> 61 | {ok, Skt} = ssl:ssl_accept(Socket0, SslOpts, 30*1000), 62 | ok = ssl:setopts(Skt, SockOpts), 63 | Skt; 64 | true -> 65 | ok = inet:setopts(Socket0, SockOpts), 66 | Socket0 67 | end, 68 | {reply, ok, State#state { sock = Socket }}. 69 | 70 | handle_info({tcp_closed,_Socket},State=#state{partition=Partition,count=Count}) -> 71 | error_logger:info_msg("Handoff receiver for partition ~p exiting after processing ~p" 72 | " objects~n", [Partition, Count]), 73 | {stop, normal, State}; 74 | handle_info({tcp_error, _Socket, _Reason}, State=#state{partition=Partition,count=Count}) -> 75 | error_logger:info_msg("Handoff receiver for partition ~p exiting after processing ~p" 76 | " objects~n", [Partition, Count]), 77 | {stop, normal, State}; 78 | handle_info({tcp, Socket, Data}, State) -> 79 | [MsgType|MsgData] = Data, 80 | case catch(process_message(MsgType, MsgData, State)) of 81 | {'EXIT', Reason} -> 82 | error_logger:error_msg("Handoff receiver for partition ~p exiting abnormally after " 83 | "processing ~p objects: ~p\n", [State#state.partition, State#state.count, Reason]), 84 | {stop, normal, State}; 85 | NewState when is_record(NewState, state) -> 86 | InetMod = if NewState#state.ssl_opts /= [] -> ssl; 87 | true -> inet 88 | end, 89 | InetMod:setopts(Socket, [{active, once}]), 90 | {noreply, NewState} 91 | end; 92 | handle_info({ssl_closed, Socket}, State) -> 93 | handle_info({tcp_closed, Socket}, State); 94 | handle_info({ssl_error, Socket, Reason}, State) -> 95 | handle_info({tcp_error, Socket, Reason}, State); 96 | handle_info({ssl, Socket, Data}, State) -> 97 | handle_info({tcp, Socket, Data}, State). 98 | 99 | process_message(?PT_MSG_INIT, MsgData, State=#state{vnode_mod=VNodeMod}) -> 100 | <> = MsgData, 101 | error_logger:info_msg("Receiving handoff data for partition ~p:~p~n", [VNodeMod, Partition]), 102 | {ok, VNode} = riak_core_vnode_master:get_vnode_pid(Partition, VNodeMod), 103 | State#state{partition=Partition, vnode=VNode}; 104 | process_message(?PT_MSG_OBJ, MsgData, State=#state{vnode=VNode, count=Count}) -> 105 | Msg = {handoff_data, MsgData}, 106 | gen_fsm:sync_send_all_state_event(VNode, Msg, 60000), 107 | State#state{count=Count+1}; 108 | process_message(?PT_MSG_OLDSYNC, MsgData, State=#state{sock=Socket, 109 | tcp_mod=TcpMod}) -> 110 | TcpMod:send(Socket, <>), 111 | <> = MsgData, 112 | VNodeMod = binary_to_atom(VNodeModBin, utf8), 113 | State#state{vnode_mod=VNodeMod}; 114 | process_message(?PT_MSG_SYNC, _MsgData, State=#state{sock=Socket, 115 | tcp_mod=TcpMod}) -> 116 | TcpMod:send(Socket, <>), 117 | State; 118 | process_message(?PT_MSG_CONFIGURE, MsgData, State) -> 119 | ConfProps = binary_to_term(MsgData), 120 | State#state{vnode_mod=proplists:get_value(vnode_mod, ConfProps), 121 | partition=proplists:get_value(partition, ConfProps)}; 122 | process_message(_, _MsgData, State=#state{sock=Socket, 123 | tcp_mod=TcpMod}) -> 124 | TcpMod:send(Socket, <<255:8,"unknown_msg">>), 125 | State. 126 | 127 | handle_cast(_Msg, State) -> {noreply, State}. 128 | 129 | terminate(_Reason, _State) -> ok. 130 | 131 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 132 | 133 | -------------------------------------------------------------------------------- /src/gen_nb_server.erl: -------------------------------------------------------------------------------- 1 | %% Copyright (c) 2009 Hypothetical Labs, Inc. 2 | 3 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 4 | %% of this software and associated documentation files (the "Software"), to deal 5 | %% in the Software without restriction, including without limitation the rights 6 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | %% copies of the Software, and to permit persons to whom the Software is 8 | %% furnished to do so, subject to the following conditions: 9 | %% 10 | %% The above copyright notice and this permission notice shall be included in 11 | %% all copies or substantial portions of the Software. 12 | %% 13 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | %% THE SOFTWARE. 20 | 21 | -module(gen_nb_server). 22 | 23 | -author('kevin@hypotheticalabs.com'). 24 | 25 | -behaviour(gen_server). 26 | 27 | %% API 28 | -export([start_link/4]). 29 | 30 | %% Behavior callbacks 31 | -export([behaviour_info/1]). 32 | 33 | %% gen_server callbacks 34 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 35 | terminate/2, code_change/3]). 36 | 37 | -define(SERVER, ?MODULE). 38 | 39 | -record(state, {cb, 40 | sock, 41 | server_state}). 42 | 43 | %% @hidden 44 | behaviour_info(callbacks) -> 45 | [{init, 1}, 46 | {handle_call, 3}, 47 | {handle_cast, 2}, 48 | {handle_info, 2}, 49 | {terminate, 2}, 50 | {sock_opts, 0}, 51 | {new_connection, 2}]; 52 | 53 | behaviour_info(_) -> 54 | undefined. 55 | 56 | %% @spec start_link(CallbackModule, IpAddr, Port, InitParams) -> Result 57 | %% CallbackModule = atom() 58 | %% IpAddr = string() 59 | %% Port = integer() 60 | %% InitParams = [any()] 61 | %% Result = {ok, pid()} | {error, any()} 62 | %% @doc Start server listening on IpAddr:Port 63 | start_link(CallbackModule, IpAddr, Port, InitParams) -> 64 | gen_server:start_link(?MODULE, [CallbackModule, IpAddr, Port, InitParams], []). 65 | 66 | %% @hidden 67 | init([CallbackModule, IpAddr, Port, InitParams]) -> 68 | case CallbackModule:init(InitParams) of 69 | {ok, ServerState} -> 70 | case listen_on(CallbackModule, IpAddr, Port) of 71 | {ok, Sock} -> 72 | {ok, #state{cb=CallbackModule, sock=Sock, server_state=ServerState}}; 73 | Error -> 74 | CallbackModule:terminate(Error, ServerState), 75 | Error 76 | end; 77 | Err -> 78 | Err 79 | end. 80 | 81 | %% @hidden 82 | handle_call(Request, From, #state{cb=Callback, server_state=ServerState}=State) -> 83 | case Callback:handle_call(Request, From, ServerState) of 84 | {reply, Reply, NewServerState} -> 85 | {reply, Reply, State#state{server_state=NewServerState}}; 86 | {reply, Reply, NewServerState, Arg} when Arg =:= hibernate orelse is_number(Arg) -> 87 | {reply, Reply, State#state{server_state=NewServerState}, Arg}; 88 | {noreply, NewServerState} -> 89 | {noreply, State#state{server_state=NewServerState}}; 90 | {noreply, NewServerState, Arg} when Arg =:= hibernate orelse is_number(Arg) -> 91 | {noreply, State#state{server_state=NewServerState}, Arg}; 92 | {stop, Reason, NewServerState} -> 93 | {stop, Reason, State#state{server_state=NewServerState}}; 94 | {stop, Reason, Reply, NewServerState} -> 95 | {stop, Reason, Reply, State#state{server_state=NewServerState}} 96 | end. 97 | 98 | %% @hidden 99 | handle_cast(Msg, #state{cb=Callback, server_state=ServerState}=State) -> 100 | case Callback:handle_cast(Msg, ServerState) of 101 | {noreply, NewServerState} -> 102 | {noreply, State#state{server_state=NewServerState}}; 103 | {noreply, NewServerState, Arg} when Arg =:= hibernate orelse is_number(Arg) -> 104 | {noreply, State#state{server_state=NewServerState}, Arg}; 105 | {stop, Reason, NewServerState} -> 106 | {stop, Reason, State#state{server_state=NewServerState}} 107 | end. 108 | 109 | %% @hidden 110 | handle_info({inet_async, ListSock, _Ref, {ok, CliSocket}}, #state{cb=Callback, server_state=ServerState}=State) -> 111 | inet_db:register_socket(CliSocket, inet_tcp), 112 | case Callback:new_connection(CliSocket, ServerState) of 113 | {ok, NewServerState} -> 114 | prim_inet:async_accept(ListSock, -1), 115 | {noreply, State#state{server_state=NewServerState}}; 116 | {stop, Reason, NewServerState} -> 117 | {stop, Reason, State#state{server_state=NewServerState}} 118 | end; 119 | 120 | handle_info(Info, #state{cb=Callback, server_state=ServerState}=State) -> 121 | case Callback:handle_info(Info, ServerState) of 122 | {noreply, NewServerState} -> 123 | {noreply, State#state{server_state=NewServerState}}; 124 | {noreply, NewServerState, Arg} when Arg =:= hibernate orelse is_number(Arg) -> 125 | {noreply, State#state{server_state=NewServerState}, Arg}; 126 | {stop, Reason, NewServerState} -> 127 | {stop, Reason, State#state{server_state=NewServerState}} 128 | end. 129 | 130 | %% @hidden 131 | terminate(Reason, #state{cb=Callback, sock=Sock, server_state=ServerState}) -> 132 | gen_tcp:close(Sock), 133 | Callback:terminate(Reason, ServerState), 134 | ok. 135 | 136 | %% @hidden 137 | code_change(_OldVsn, State, _Extra) -> 138 | {ok, State}. 139 | 140 | %% Internal functions 141 | 142 | %% @hidden 143 | %% @spec listen_on(CallbackModule, IpAddr, Port) -> Result 144 | %% CallbackModule = atom() 145 | %% IpAddr = string() 146 | %% Port = integer() 147 | %% Result = {ok, port()} | {error, any()} 148 | listen_on(CallbackModule, IpAddr, Port) -> 149 | SockOpts = [{ip, convert(IpAddr)}|CallbackModule:sock_opts()], 150 | case gen_tcp:listen(Port, SockOpts) of 151 | {ok, LSock} -> 152 | {ok, _Ref} = prim_inet:async_accept(LSock, -1), 153 | {ok, LSock}; 154 | Err -> 155 | Err 156 | end. 157 | 158 | %% @hidden 159 | %% @spec convert(Addr) -> Result 160 | %% Addr = string() 161 | %% Result = {integer(), integer(), integer(), integer()} 162 | %% @doc Converts text IP addresses "0.0.0.0" to tuples {0, 0, 0, 0} 163 | convert(Addr) -> 164 | T = string:tokens(Addr, "."), 165 | list_to_tuple([list_to_integer(X) || X <- T]). 166 | -------------------------------------------------------------------------------- /src/bloom.erl: -------------------------------------------------------------------------------- 1 | % ``The contents of this file are subject to the Erlang Public License, 2 | %% Version 1.1, (the "License"); you may not use this file except in 3 | %% compliance with the License. You should have received a copy of the 4 | %% Erlang Public License along with this software. If not, it can be 5 | %% retrieved via the world wide web at http://www.erlang.org/. 6 | %% 7 | %% Software distributed under the License is distributed on an "AS IS" 8 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 9 | %% the License for the specific language governing rights and limitations 10 | %% under the License. 11 | %% 12 | -module(bloom). 13 | -author("Paulo Sergio Almeida "). 14 | -export([sbf/1, sbf/2, sbf/3, sbf/4, 15 | bloom/1, bloom/2, 16 | member/2, add/2, 17 | size/1, capacity/1]). 18 | -export([is_element/2, add_element/2]). % alternative names 19 | -import(math, [log/1, pow/2]). 20 | 21 | is_element(E, B) -> member(E, B). 22 | add_element(E, B) -> add(E, B). 23 | 24 | %% Based on 25 | %% Scalable Bloom Filters 26 | %% Paulo Sérgio Almeida, Carlos Baquero, Nuno Preguiça, David Hutchison 27 | %% Information Processing Letters 28 | %% Volume 101, Issue 6, 31 March 2007, Pages 255-261 29 | %% 30 | %% Provides scalable bloom filters that can grow indefinitely while 31 | %% ensuring a desired maximum false positive probability. Also provides 32 | %% standard partitioned bloom filters with a maximum capacity. Bit arrays 33 | %% are dimensioned as a power of 2 to enable reusing hash values across 34 | %% filters through bit operations. Double hashing is used (no need for 35 | %% enhanced double hashing for partitioned bloom filters). 36 | 37 | %% modified slightly by Justin Sheehy to make it a single file 38 | %% (incorporated the array-based bitarray internally) 39 | 40 | -define(W, 27). 41 | 42 | -record(bloom, { 43 | e, % error probability 44 | n, % maximum number of elements 45 | mb, % 2^mb = m, the size of each slice (bitvector) 46 | size, % number of elements 47 | a % list of bitvectors 48 | }). 49 | 50 | -record(sbf, { 51 | e, % error probability 52 | r, % error probability ratio 53 | s, % log 2 of growth ratio 54 | size, % number of elements 55 | b % list of plain bloom filters 56 | }). 57 | 58 | %% Constructors for (fixed capacity) bloom filters 59 | %% 60 | %% N - capacity 61 | %% E - error probability 62 | 63 | bloom(N) -> bloom(N, 0.001). 64 | bloom(N, E) when is_number(N), N > 0, 65 | is_float(E), E > 0, E < 1, 66 | N >= 4/E -> % rule of thumb; due to double hashing 67 | bloom(size, N, E). 68 | 69 | bloom(Mode, Dim, E) -> 70 | K = 1 + trunc(log2(1/E)), 71 | P = pow(E, 1 / K), 72 | case Mode of 73 | size -> Mb = 1 + trunc(-log2(1 - pow(1 - P, 1 / Dim))); 74 | bits -> Mb = Dim 75 | end, 76 | M = 1 bsl Mb, 77 | N = trunc(log(1-P) / log(1-1/M)), 78 | #bloom{e=E, n=N, mb=Mb, size = 0, 79 | a = [bitarray_new(1 bsl Mb) || _ <- lists:seq(1, K)]}. 80 | 81 | log2(X) -> log(X) / log(2). 82 | 83 | %% Constructors for scalable bloom filters 84 | %% 85 | %% N - initial capacity before expanding 86 | %% E - error probability 87 | %% S - growth ratio when full (log 2) can be 1, 2 or 3 88 | %% R - tightening ratio of error probability 89 | 90 | sbf(N) -> sbf(N, 0.001). 91 | sbf(N, E) -> sbf(N, E, 1). 92 | sbf(N, E, 1) -> sbf(N, E, 1, 0.85); 93 | sbf(N, E, 2) -> sbf(N, E, 2, 0.75); 94 | sbf(N, E, 3) -> sbf(N, E, 3, 0.65). 95 | sbf(N, E, S, R) when is_number(N), N > 0, 96 | is_float(E), E > 0, E < 1, 97 | is_integer(S), S > 0, S < 4, 98 | is_float(R), R > 0, R < 1, 99 | N >= 4/(E*(1-R)) -> % rule of thumb; due to double hashing 100 | #sbf{e=E, s=S, r=R, size=0, b=[bloom(N, E*(1-R))]}. 101 | 102 | %% Returns number of elements 103 | %% 104 | size(#bloom{size=Size}) -> Size; 105 | size(#sbf{size=Size}) -> Size. 106 | 107 | %% Returns capacity 108 | %% 109 | capacity(#bloom{n=N}) -> N; 110 | capacity(#sbf{}) -> infinity. 111 | 112 | %% Test for membership 113 | %% 114 | member(Elem, #bloom{mb=Mb}=B) -> 115 | Hashes = make_hashes(Mb, Elem), 116 | hash_member(Hashes, B); 117 | member(Elem, #sbf{b=[H|_]}=Sbf) -> 118 | Hashes = make_hashes(H#bloom.mb, Elem), 119 | hash_member(Hashes, Sbf). 120 | 121 | hash_member(Hashes, #bloom{mb=Mb, a=A}) -> 122 | Mask = 1 bsl Mb -1, 123 | {I1, I0} = make_indexes(Mask, Hashes), 124 | all_set(Mask, I1, I0, A); 125 | hash_member(Hashes, #sbf{b=B}) -> 126 | lists:any(fun(X) -> hash_member(Hashes, X) end, B). 127 | 128 | make_hashes(Mb, E) when Mb =< 16 -> 129 | erlang:phash2({E}, 1 bsl 32); 130 | make_hashes(Mb, E) when Mb =< 32 -> 131 | {erlang:phash2({E}, 1 bsl 32), erlang:phash2([E], 1 bsl 32)}. 132 | 133 | make_indexes(Mask, {H0, H1}) when Mask > 1 bsl 16 -> masked_pair(Mask, H0, H1); 134 | make_indexes(Mask, {H0, _}) -> make_indexes(Mask, H0); 135 | make_indexes(Mask, H0) -> masked_pair(Mask, H0 bsr 16, H0). 136 | 137 | masked_pair(Mask, X, Y) -> {X band Mask, Y band Mask}. 138 | 139 | all_set(_Mask, _I1, _I, []) -> true; 140 | all_set(Mask, I1, I, [H|T]) -> 141 | case bitarray_get(I, H) of 142 | true -> all_set(Mask, I1, (I+I1) band Mask, T); 143 | false -> false 144 | end. 145 | 146 | %% Adds element to set 147 | %% 148 | add(Elem, #bloom{mb=Mb} = B) -> 149 | Hashes = make_hashes(Mb, Elem), 150 | hash_add(Hashes, B); 151 | add(Elem, #sbf{size=Size, r=R, s=S, b=[H|T]=Bs}=Sbf) -> 152 | #bloom{mb=Mb, e=E, n=N, size=HSize} = H, 153 | Hashes = make_hashes(Mb, Elem), 154 | case hash_member(Hashes, Sbf) of 155 | true -> Sbf; 156 | false -> 157 | case HSize < N of 158 | true -> Sbf#sbf{size=Size+1, b=[hash_add(Hashes, H)|T]}; 159 | false -> 160 | B = add(Elem, bloom(bits, Mb + S, E * R)), 161 | Sbf#sbf{size=Size+1, b=[B|Bs]} 162 | end 163 | end. 164 | 165 | hash_add(Hashes, #bloom{mb=Mb, a=A, size=Size} = B) -> 166 | Mask = 1 bsl Mb -1, 167 | {I1, I0} = make_indexes(Mask, Hashes), 168 | case all_set(Mask, I1, I0, A) of 169 | true -> B; 170 | false -> B#bloom{size=Size+1, a=set_bits(Mask, I1, I0, A, [])} 171 | end. 172 | 173 | set_bits(_Mask, _I1, _I, [], Acc) -> lists:reverse(Acc); 174 | set_bits(Mask, I1, I, [H|T], Acc) -> 175 | set_bits(Mask, I1, (I+I1) band Mask, T, [bitarray_set(I, H) | Acc]). 176 | 177 | bitarray_new(N) -> array:new((N-1) div ?W + 1, {default, 0}). 178 | 179 | bitarray_set(I, A) -> 180 | AI = I div ?W, 181 | V = array:get(AI, A), 182 | V1 = V bor (1 bsl (I rem ?W)), 183 | array:set(AI, V1, A). 184 | 185 | bitarray_get(I, A) -> 186 | AI = I div ?W, 187 | V = array:get(AI, A), 188 | V band (1 bsl (I rem ?W)) =/= 0. 189 | 190 | -------------------------------------------------------------------------------- /src/riak_core_sysmon_minder.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | -module(riak_core_sysmon_minder). 24 | 25 | -behaviour(gen_server). 26 | 27 | %% API 28 | -export([start_link/0]). 29 | 30 | %% gen_server callbacks 31 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 32 | terminate/2, code_change/3]). 33 | 34 | -record(state, {}). 35 | 36 | %%%=================================================================== 37 | %%% API 38 | %%%=================================================================== 39 | 40 | %%-------------------------------------------------------------------- 41 | %% @doc 42 | %% Starts the server 43 | %% 44 | %% @spec start_link() -> {ok, Pid} | ignore | {error, Error} 45 | %% @end 46 | %%-------------------------------------------------------------------- 47 | start_link() -> 48 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 49 | 50 | %%%=================================================================== 51 | %%% gen_server callbacks 52 | %%%=================================================================== 53 | 54 | %%-------------------------------------------------------------------- 55 | %% @private 56 | %% @doc 57 | %% Initializes the server 58 | %% 59 | %% @spec init(Args) -> {ok, State} | 60 | %% {ok, State, Timeout} | 61 | %% ignore | 62 | %% {stop, Reason} 63 | %% @end 64 | %%-------------------------------------------------------------------- 65 | init([]) -> 66 | %% Add our system_monitor event handler. We do that here because 67 | %% we have a process at our disposal (i.e. ourself) to receive the 68 | %% notification in the very unlikely event that the 69 | %% riak_core_sysmon_handler has crashed and been removed from the 70 | %% riak_sysmon_handler gen_event server. (If we had a supervisor 71 | %% or app-starting process add the handler, then if the handler 72 | %% crashes, nobody will act on the crash notification.) 73 | riak_core_sysmon_handler:add_handler(), 74 | 75 | {ok, #state{}}. 76 | 77 | %%-------------------------------------------------------------------- 78 | %% @private 79 | %% @doc 80 | %% Handling call messages 81 | %% 82 | %% @spec handle_call(Request, From, State) -> 83 | %% {reply, Reply, State} | 84 | %% {reply, Reply, State, Timeout} | 85 | %% {noreply, State} | 86 | %% {noreply, State, Timeout} | 87 | %% {stop, Reason, Reply, State} | 88 | %% {stop, Reason, State} 89 | %% @end 90 | %%-------------------------------------------------------------------- 91 | handle_call(_Request, _From, State) -> 92 | Reply = ok, 93 | {reply, Reply, State}. 94 | 95 | %%-------------------------------------------------------------------- 96 | %% @private 97 | %% @doc 98 | %% Handling cast messages 99 | %% 100 | %% @spec handle_cast(Msg, State) -> {noreply, State} | 101 | %% {noreply, State, Timeout} | 102 | %% {stop, Reason, State} 103 | %% @end 104 | %%-------------------------------------------------------------------- 105 | handle_cast(_Msg, State) -> 106 | {noreply, State}. 107 | 108 | %%-------------------------------------------------------------------- 109 | %% @private 110 | %% @doc 111 | %% Handling all non call/cast messages 112 | %% 113 | %% @spec handle_info(Info, State) -> {noreply, State} | 114 | %% {noreply, State, Timeout} | 115 | %% {stop, Reason, State} 116 | %% @end 117 | %%-------------------------------------------------------------------- 118 | handle_info({gen_event_EXIT, riak_core_sysmon_handler, _}, State) -> 119 | %% SASL will create an error message, no need for us to duplicate it. 120 | %% 121 | %% Our handler should never crash, but it did indeed crash. If 122 | %% there's a pathological condition somewhere that's generating 123 | %% lots of unforseen things that crash core's custom handler, we 124 | %% could make things worse by jumping back into the exploding 125 | %% volcano. Wait a little bit before jumping back. Besides, the 126 | %% system_monitor data is nice but is not critical: there is no 127 | %% need to make things worse if things are indeed bad, and if we 128 | %% miss a few seconds of system_monitor events, the world will not 129 | %% end. 130 | timer:sleep(2*1000), 131 | riak_core_sysmon_handler:add_handler(), 132 | {noreply, State}; 133 | handle_info(_Info, State) -> 134 | {noreply, State}. 135 | 136 | %%-------------------------------------------------------------------- 137 | %% @private 138 | %% @doc 139 | %% This function is called by a gen_server when it is about to 140 | %% terminate. It should be the opposite of Module:init/1 and do any 141 | %% necessary cleaning up. When it returns, the gen_server terminates 142 | %% with Reason. The return value is ignored. 143 | %% 144 | %% @spec terminate(Reason, State) -> void() 145 | %% @end 146 | %%-------------------------------------------------------------------- 147 | terminate(_Reason, _State) -> 148 | ok. 149 | 150 | %%-------------------------------------------------------------------- 151 | %% @private 152 | %% @doc 153 | %% Convert process state when code is changed 154 | %% 155 | %% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} 156 | %% @end 157 | %%-------------------------------------------------------------------- 158 | code_change(_OldVsn, State, _Extra) -> 159 | {ok, State}. 160 | 161 | %%%=================================================================== 162 | %%% Internal functions 163 | %%%=================================================================== 164 | -------------------------------------------------------------------------------- /src/riak_core_config.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc A module to provide access to riak_core configuration information. 24 | %% @type riak_core_bucketprops() = [{Propkey :: atom(), Propval :: term()}] 25 | 26 | -module(riak_core_config). 27 | 28 | -author('Kelly McLaughlin '). 29 | 30 | -export([http_ip_and_port/0, ring_state_dir/0, ring_creation_size/0, 31 | default_bucket_props/0, cluster_name/0, gossip_interval/0, 32 | target_n_val/0]). 33 | 34 | -ifdef(TEST). 35 | -include_lib("eunit/include/eunit.hrl"). 36 | -endif. 37 | 38 | %% =================================================================== 39 | %% Public API 40 | %% =================================================================== 41 | 42 | %% @spec http_ip_and_port() -> {string(), integer()} | error 43 | %% @doc Get the HTTP IP address and port number environment variables. 44 | http_ip_and_port() -> 45 | case get_riak_core_env(http) of 46 | [{WebIp, WebPort} | _] -> 47 | {WebIp, WebPort}; 48 | [] -> 49 | error; 50 | undefined -> 51 | %% Fallback to pre-0.14 HTTP config. 52 | %% TODO: Remove in 0.16 53 | WebIp = get_riak_core_env(web_ip), 54 | WebPort = get_riak_core_env(web_port), 55 | if 56 | WebIp == undefined -> 57 | error; 58 | WebPort == undefined -> 59 | error; 60 | true -> 61 | error_logger:info_msg("Found HTTP config for riak_core using pre-0.14 config " 62 | "values; please update the config file to use new HTTP " 63 | "binding configuration values.\n"), 64 | {WebIp, WebPort} 65 | end 66 | end. 67 | 68 | %% @spec ring_state_dir() -> string() | undefined 69 | %% @doc Get the ring_state_dir environment variable. 70 | ring_state_dir() -> 71 | get_riak_core_env(ring_state_dir). 72 | 73 | %% @spec ring_creation_size() -> integer() | undefined 74 | %% @doc Get the ring_creation_size environment variable. 75 | ring_creation_size() -> 76 | get_riak_core_env(ring_creation_size). 77 | 78 | %% @spec cluster_name() -> string() | undefined 79 | %% @doc Get the cluster_name environment variable. 80 | cluster_name() -> 81 | get_riak_core_env(cluster_name). 82 | 83 | %% @spec gossip_interval() -> integer() | undefined 84 | %% @doc Get the gossip_interval environment variable. 85 | gossip_interval() -> 86 | get_riak_core_env(gossip_interval). 87 | 88 | %% @spec target_n_val() -> integer() | undefined 89 | %% @doc Get the target_n_val environment variable. 90 | target_n_val() -> 91 | get_riak_core_env(target_n_val). 92 | 93 | %% @spec default_bucket_props() -> BucketProps::riak_core_bucketprops() | undefined 94 | %% @doc Get the default_bucket_props environment variable. 95 | default_bucket_props() -> 96 | get_riak_core_env(default_bucket_props). 97 | 98 | %% @private 99 | get_riak_core_env(Key) -> 100 | app_helper:get_env(riak_core, Key). 101 | 102 | 103 | %% =================================================================== 104 | %% EUnit tests 105 | %% =================================================================== 106 | -ifdef(TEST). 107 | 108 | riak_core_config_test_() -> 109 | { setup, 110 | fun setup/0, 111 | fun cleanup/1, 112 | [ 113 | fun http_ip_and_port_test_case/0, 114 | fun default_bucket_props_test_case/0, 115 | fun target_n_val_test_case/0, 116 | fun gossip_interval_test_case/0, 117 | fun cluster_name_test_case/0, 118 | fun ring_creation_size_test_case/0, 119 | fun ring_state_dir_test_case/0, 120 | fun non_existent_var_test_case/0 121 | ] 122 | }. 123 | 124 | http_ip_and_port_test_case() -> 125 | ?assertEqual(error, http_ip_and_port()), 126 | %% Test the pre-0.14 style config 127 | application:set_env(riak_core, web_ip, "127.0.0.1"), 128 | application:set_env(riak_core, web_port, 8098), 129 | ?assertEqual({"127.0.0.1", 8098}, http_ip_and_port()), 130 | %% Test the config for 0.14 and later 131 | application:set_env(riak_core, http, [{"localhost", 9000}]), 132 | ?assertEqual({"localhost", 9000}, http_ip_and_port()). 133 | 134 | 135 | default_bucket_props_test_case() -> 136 | DefaultBucketProps = [{allow_mult,false}, 137 | {chash_keyfun,{riak_core_util,chash_std_keyfun}}, 138 | {last_write_wins,false}, 139 | {n_val,3}, 140 | {postcommit,[]}, 141 | {precommit,[]}], 142 | application:set_env(riak_core, default_bucket_props, DefaultBucketProps), 143 | ?assertEqual(DefaultBucketProps, default_bucket_props()). 144 | 145 | target_n_val_test_case() -> 146 | ?assertEqual(4, target_n_val()). 147 | 148 | gossip_interval_test_case() -> 149 | %% Explicitly set the value because other 150 | %% unit tests change the default. 151 | application:set_env(riak_core, gossip_interval, 60000), 152 | ?assertEqual(60000, gossip_interval()). 153 | 154 | cluster_name_test_case() -> 155 | ?assertEqual("default", cluster_name()). 156 | 157 | ring_creation_size_test_case() -> 158 | %% Explicitly set the value because other 159 | %% unit tests change the default. 160 | application:set_env(riak_core, ring_creation_size, 64), 161 | ?assertEqual(64, ring_creation_size()). 162 | 163 | ring_state_dir_test_case() -> 164 | ?assertEqual("data/ring", ring_state_dir()). 165 | 166 | non_existent_var_test_case() -> 167 | ?assertEqual(undefined, get_riak_core_env(bogus)). 168 | 169 | setup() -> 170 | application:load(riak_core). 171 | 172 | cleanup(_Pid) -> 173 | application:unload(riak_core). 174 | 175 | -endif. 176 | -------------------------------------------------------------------------------- /src/riak_core_sysmon_handler.erl: -------------------------------------------------------------------------------- 1 | %% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. 2 | %% 3 | %% This file is provided to you under the Apache License, 4 | %% Version 2.0 (the "License"); you may not use this file 5 | %% except in compliance with the License. You may obtain 6 | %% a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, 11 | %% software distributed under the License is distributed on an 12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | %% KIND, either express or implied. See the License for the 14 | %% specific language governing permissions and limitations 15 | %% under the License. 16 | 17 | %% @doc A custom event handler to the `riak_sysmon' application's 18 | %% `system_monitor' event manager. 19 | %% 20 | %% This module attempts to discover more information about a process 21 | %% that generates a system_monitor event. 22 | 23 | -module(riak_core_sysmon_handler). 24 | 25 | -behaviour(gen_event). 26 | 27 | %% API 28 | -export([add_handler/0]). 29 | %% Perhaps useful to the outside world. 30 | -export([format_pretty_proc_info/1, format_pretty_proc_info/2, 31 | get_pretty_proc_info/1, get_pretty_proc_info/2]). 32 | 33 | %% gen_event callbacks 34 | -export([init/1, handle_event/2, handle_call/2, 35 | handle_info/2, terminate/2, code_change/3]). 36 | 37 | -record(state, {}). 38 | 39 | %%%=================================================================== 40 | %%% gen_event callbacks 41 | %%%=================================================================== 42 | 43 | add_handler() -> 44 | %% Vulnerable to race conditions (installing handler multiple 45 | %% times), but risk is zero in the common OTP app startup case. 46 | case lists:member(?MODULE, 47 | gen_event:which_handlers(riak_sysmon_handler)) of 48 | true -> 49 | ok; 50 | false -> 51 | riak_sysmon_filter:add_custom_handler(?MODULE, []) 52 | end. 53 | 54 | %%%=================================================================== 55 | %%% gen_event callbacks 56 | %%%=================================================================== 57 | 58 | %%-------------------------------------------------------------------- 59 | %% @private 60 | %% @doc 61 | %% Whenever a new event handler is added to an event manager, 62 | %% this function is called to initialize the event handler. 63 | %% 64 | %% @spec init(Args) -> {ok, State} 65 | %% @end 66 | %%-------------------------------------------------------------------- 67 | init([]) -> 68 | {ok, #state{}}. 69 | 70 | %%-------------------------------------------------------------------- 71 | %% @private 72 | %% @doc 73 | %% Whenever an event manager receives an event sent using 74 | %% gen_event:notify/2 or gen_event:sync_notify/2, this function is 75 | %% called for each installed event handler to handle the event. 76 | %% 77 | %% @spec handle_event(Event, State) -> 78 | %% {ok, State} | 79 | %% {swap_handler, Args1, State1, Mod2, Args2} | 80 | %% remove_handler 81 | %% @end 82 | %%-------------------------------------------------------------------- 83 | handle_event({monitor, Pid, Type, Info}, State) -> 84 | Pretty = format_pretty_proc_info(Pid, almost_current_function), 85 | error_logger:info_msg("monitor ~w ~w ~s ~w\n", 86 | [Type, Pid, Pretty, Info]), 87 | {ok, State}; 88 | handle_event(Event, State) -> 89 | error_logger:info_msg("Monitor ~p\n", [Event]), 90 | {ok, State}. 91 | 92 | %%-------------------------------------------------------------------- 93 | %% @private 94 | %% @doc 95 | %% Whenever an event manager receives a request sent using 96 | %% gen_event:call/3,4, this function is called for the specified 97 | %% event handler to handle the request. 98 | %% 99 | %% @spec handle_call(Request, State) -> 100 | %% {ok, Reply, State} | 101 | %% {swap_handler, Reply, Args1, State1, Mod2, Args2} | 102 | %% {remove_handler, Reply} 103 | %% @end 104 | %%-------------------------------------------------------------------- 105 | handle_call(_Call, State) -> 106 | Reply = not_supported, 107 | {ok, Reply, State}. 108 | 109 | %%-------------------------------------------------------------------- 110 | %% @private 111 | %% @doc 112 | %% This function is called for each installed event handler when 113 | %% an event manager receives any other message than an event or a 114 | %% synchronous request (or a system message). 115 | %% 116 | %% @spec handle_info(Info, State) -> 117 | %% {ok, State} | 118 | %% {swap_handler, Args1, State1, Mod2, Args2} | 119 | %% remove_handler 120 | %% @end 121 | %%-------------------------------------------------------------------- 122 | handle_info(die_for_testing_purposes_only, _State) -> 123 | %% exit({told_to_die, lists:duplicate(500000, $x)}); 124 | exit({told_to_die, lists:duplicate(50, $x)}); 125 | handle_info(Info, State) -> 126 | error_logger:info_msg("handle_info got ~p\n", [Info]), 127 | {ok, State}. 128 | 129 | %%-------------------------------------------------------------------- 130 | %% @private 131 | %% @doc 132 | %% Whenever an event handler is deleted from an event manager, this 133 | %% function is called. It should be the opposite of Module:init/1 and 134 | %% do any necessary cleaning up. 135 | %% 136 | %% @spec terminate(Reason, State) -> void() 137 | %% @end 138 | %%-------------------------------------------------------------------- 139 | terminate(_Reason, _State) -> 140 | ok. 141 | 142 | %%-------------------------------------------------------------------- 143 | %% @private 144 | %% @doc 145 | %% Convert process state when code is changed 146 | %% 147 | %% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} 148 | %% @end 149 | %%-------------------------------------------------------------------- 150 | code_change(_OldVsn, State, _Extra) -> 151 | {ok, State}. 152 | 153 | %%%=================================================================== 154 | %%% Internal functions 155 | %%%=================================================================== 156 | 157 | format_pretty_proc_info(Pid) -> 158 | format_pretty_proc_info(Pid, current_function). 159 | 160 | format_pretty_proc_info(Pid, Acf) -> 161 | try 162 | case get_pretty_proc_info(Pid, Acf) of 163 | undefined -> 164 | ""; 165 | Res -> 166 | io_lib:format("~w", [Res]) 167 | end 168 | catch X:Y -> 169 | io_lib:format("Pid ~w, ~W ~W at ~w\n", 170 | [Pid, X, 20, Y, 20, erlang:get_stacktrace()]) 171 | end. 172 | 173 | get_pretty_proc_info(Pid) -> 174 | get_pretty_proc_info(Pid, current_function). 175 | 176 | get_pretty_proc_info(Pid, Acf) -> 177 | case process_info(Pid, [registered_name, initial_call, current_function]) of 178 | undefined -> 179 | undefined; 180 | [] -> 181 | undefined; 182 | [{registered_name, RN0}, ICT1, {_, CF}] -> 183 | ICT = case proc_lib:translate_initial_call(Pid) of 184 | {proc_lib, init_p, 5} -> % not by proc_lib, see docs 185 | ICT1; 186 | ICT2 -> 187 | {initial_call, ICT2} 188 | end, 189 | RNL = if RN0 == [] -> []; 190 | true -> [{name, RN0}] 191 | end, 192 | RNL ++ [ICT, {Acf, CF}] 193 | end. 194 | -------------------------------------------------------------------------------- /src/riak_core_apl.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Active Preference Lists 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | %% Get active preference list - preference list with secondary nodes 23 | %% substituted. 24 | %% ------------------------------------------------------------------- 25 | -module(riak_core_apl). 26 | -export([active_owners/1, active_owners/2, 27 | get_apl/3, get_apl/4, get_apl_ann/4, 28 | get_primary_apl/3, get_primary_apl/4 29 | ]). 30 | 31 | -export_type([preflist/0, preflist2/0]). 32 | -ifdef(TEST). 33 | -include_lib("eunit/include/eunit.hrl"). 34 | -endif. 35 | 36 | -type index() :: non_neg_integer(). 37 | -type n_val() :: non_neg_integer(). 38 | -type ring() :: riak_core_ring:riak_core_ring(). 39 | -type preflist() :: [{index(), node()}]. 40 | -type preflist2() :: [{{index(), node()}, primary|fallback}]. 41 | 42 | %% Return preflist of all active primary nodes (with no 43 | %% substituion of fallbacks). Used to simulate a 44 | %% preflist with N=ring_size 45 | -spec active_owners(atom()) -> preflist(). 46 | active_owners(Service) -> 47 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 48 | active_owners(Ring, riak_core_node_watcher:nodes(Service)). 49 | 50 | -spec active_owners(ring(), [node()]) -> preflist(). 51 | active_owners(Ring, UpNodes) -> 52 | UpNodes1 = ordsets:from_list(UpNodes), 53 | Primaries = riak_core_ring:all_owners(Ring), 54 | {Up, _Pangs} = check_up(Primaries, UpNodes1, [], []), 55 | lists:reverse(Up). 56 | 57 | %% Get the active preflist taking account of which nodes are up 58 | -spec get_apl(binary(), n_val(), atom()) -> preflist(). 59 | get_apl(DocIdx, N, Service) -> 60 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 61 | get_apl(DocIdx, N, Ring, riak_core_node_watcher:nodes(Service)). 62 | 63 | %% Get the active preflist taking account of which nodes are up 64 | %% for a given ring/upnodes list 65 | -spec get_apl(binary(), n_val(), ring(), [node()]) -> preflist(). 66 | get_apl(DocIdx, N, Ring, UpNodes) -> 67 | [{Partition, Node} || {{Partition, Node}, _Type} <- 68 | get_apl_ann(DocIdx, N, Ring, UpNodes)]. 69 | 70 | %% Get the active preflist taking account of which nodes are up 71 | %% for a given ring/upnodes list and annotate each node with type of 72 | %% primary/fallback 73 | -spec get_apl_ann(binary(), n_val(), ring(), [node()]) -> preflist2(). 74 | get_apl_ann(DocIdx, N, Ring, UpNodes) -> 75 | UpNodes1 = ordsets:from_list(UpNodes), 76 | Preflist = riak_core_ring:preflist(DocIdx, Ring), 77 | {Primaries, Fallbacks} = lists:split(N, Preflist), 78 | {Up, Pangs} = check_up(Primaries, UpNodes1, [], []), 79 | lists:reverse(Up) ++ find_fallbacks(Pangs, Fallbacks, UpNodes1, []). 80 | 81 | %% Same as get_apl, but returns only the primaries. 82 | -spec get_primary_apl(binary(), n_val(), atom()) -> preflist(). 83 | get_primary_apl(DocIdx, N, Service) -> 84 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 85 | get_primary_apl(DocIdx, N, Ring, riak_core_node_watcher:nodes(Service)). 86 | 87 | %% Same as get_apl, but returns only the primaries. 88 | -spec get_primary_apl(binary(), n_val(), ring(), [node()]) -> preflist(). 89 | get_primary_apl(DocIdx, N, Ring, UpNodes) -> 90 | UpNodes1 = ordsets:from_list(UpNodes), 91 | Preflist = riak_core_ring:preflist(DocIdx, Ring), 92 | {Primaries, _} = lists:split(N, Preflist), 93 | {Up, _} = check_up(Primaries, UpNodes1, [], []), 94 | lists:reverse(Up). 95 | 96 | %% Split a preference list into up and down lists 97 | -spec check_up(preflist(), [node()], preflist2(), preflist()) -> {preflist2(), preflist()}. 98 | check_up([], _UpNodes, Up, Pangs) -> 99 | {Up, Pangs}; 100 | check_up([{Partition,Node}|Rest], UpNodes, Up, Pangs) -> 101 | case is_up(Node, UpNodes) of 102 | true -> 103 | check_up(Rest, UpNodes, [{{Partition, Node}, primary} | Up], Pangs); 104 | false -> 105 | check_up(Rest, UpNodes, Up, [{Partition, Node} | Pangs]) 106 | end. 107 | 108 | %% Find fallbacks for downed nodes in the preference list 109 | -spec find_fallbacks(preflist(), preflist(), [node()], preflist2()) -> preflist2(). 110 | find_fallbacks(_Pangs, [], _UpNodes, Secondaries) -> 111 | Secondaries; 112 | find_fallbacks([], _Fallbacks, _UpNodes, Secondaries) -> 113 | Secondaries; 114 | find_fallbacks([{Partition, _Node}|Rest]=Pangs, [{_,FN}|Fallbacks], UpNodes, Secondaries) -> 115 | case is_up(FN, UpNodes) of 116 | true -> 117 | find_fallbacks(Rest, Fallbacks, UpNodes, 118 | [{{Partition, FN}, fallback} | Secondaries]); 119 | false -> 120 | find_fallbacks(Pangs, Fallbacks, UpNodes, Secondaries) 121 | end. 122 | 123 | %% Return true if a node is up 124 | is_up(Node, UpNodes) -> 125 | ordsets:is_element(Node, UpNodes). 126 | 127 | -ifdef(TEST). 128 | 129 | smallest_test() -> 130 | Ring = riak_core_ring:fresh(1,node()), 131 | ?assertEqual([{0,node()}], get_apl(last_in_ring(), 1, Ring, [node()])). 132 | 133 | four_node_test() -> 134 | Nodes = [nodea, nodeb, nodec, noded], 135 | Ring = perfect_ring(8, Nodes), 136 | ?assertEqual([{0,nodea}, 137 | {182687704666362864775460604089535377456991567872,nodeb}, 138 | {365375409332725729550921208179070754913983135744,nodec}], 139 | get_apl(last_in_ring(), 3, Ring, Nodes)), 140 | %% With a node down 141 | ?assertEqual([{182687704666362864775460604089535377456991567872,nodeb}, 142 | {365375409332725729550921208179070754913983135744,nodec}, 143 | {0,noded}], 144 | get_apl(last_in_ring(), 3, Ring, [nodeb, nodec, noded])), 145 | %% With two nodes down 146 | ?assertEqual([{365375409332725729550921208179070754913983135744,nodec}, 147 | {0,nodec}, 148 | {182687704666362864775460604089535377456991567872,noded}], 149 | get_apl(last_in_ring(), 3, Ring, [nodec, noded])), 150 | %% With the other two nodes down 151 | ?assertEqual([{0,nodea}, 152 | {182687704666362864775460604089535377456991567872,nodeb}, 153 | {365375409332725729550921208179070754913983135744,nodea}], 154 | get_apl(last_in_ring(), 3, Ring, [nodea, nodeb])). 155 | 156 | 157 | %% Create a perfect ring - RingSize must be a multiple of nodes 158 | perfect_ring(RingSize, Nodes) when RingSize rem length(Nodes) =:= 0 -> 159 | Ring = riak_core_ring:fresh(RingSize,node()), 160 | Owners = riak_core_ring:all_owners(Ring), 161 | TransferNode = 162 | fun({Idx,_CurOwner}, {Ring0, [NewOwner|Rest]}) -> 163 | {riak_core_ring:transfer_node(Idx, NewOwner, Ring0), Rest ++ [NewOwner]} 164 | end, 165 | {PerfectRing, _} = lists:foldl(TransferNode, {Ring, Nodes}, Owners), 166 | PerfectRing. 167 | 168 | last_in_ring() -> 169 | <<1461501637330902918203684832716283019655932542975:160/unsigned>>. 170 | -endif. 171 | -------------------------------------------------------------------------------- /src/riak_core_tracer.erl: -------------------------------------------------------------------------------- 1 | %% 2 | %% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. 3 | %% 4 | %% This file is provided to you under the Apache License, 5 | %% Version 2.0 (the "License"); you may not use this file 6 | %% except in compliance with the License. You may obtain 7 | %% a copy of the License at 8 | %% 9 | %% http://www.apache.org/licenses/LICENSE-2.0 10 | %% 11 | %% Unless required by applicable law or agreed to in writing, 12 | %% software distributed under the License is distributed on an 13 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %% KIND, either express or implied. See the License for the 15 | %% specific language governing permissions and limitations 16 | %% under the License. 17 | %% 18 | %% ------------------------------------------------------------------- 19 | -module(riak_core_tracer). 20 | 21 | -behaviour(gen_server). 22 | 23 | %% API 24 | -export([start_link/0, 25 | stop/0, 26 | reset/0, 27 | filter/2, 28 | collect/0, collect/1, collect/2, 29 | results/0, 30 | stop_collect/0]). 31 | -export([test_all_events/1]). 32 | 33 | -export([all_events/1]). 34 | -export([trigger_sentinel/0]). 35 | 36 | %% gen_server callbacks 37 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 38 | terminate/2, code_change/3]). 39 | 40 | -define(SERVER, ?MODULE). 41 | 42 | -record(state, {trace=[], 43 | filters=[], 44 | mfs=[], 45 | stop_tref, 46 | stop_from, 47 | tracing=false}). 48 | 49 | %%=================================================================== 50 | %% API 51 | %%=================================================================== 52 | 53 | start_link() -> 54 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 55 | 56 | stop() -> 57 | gen_server:call(?SERVER, stop). 58 | 59 | reset() -> 60 | gen_server:call(?SERVER, reset). 61 | 62 | %% Set up a filter on trace messages to the list of [{M, F}]s. The 63 | %% tracer function should return a list of events to log 64 | filter(MFs, Filter) -> 65 | gen_server:call(?SERVER, {filter, MFs, Filter}). 66 | 67 | %% Collect traces 68 | collect() -> 69 | collect(100). 70 | 71 | collect(Duration) -> 72 | collect(Duration, nodes()). 73 | 74 | collect(Duration, Nodes) -> 75 | gen_server:call(?SERVER, {collect, Duration, Nodes}). 76 | 77 | %% Stop collection 78 | stop_collect() -> 79 | gen_server:call(?SERVER, stop_collect). 80 | 81 | %% Return the trace 82 | results() -> 83 | gen_server:call(?SERVER, results). 84 | 85 | all_events({trace, Pid, call, {M,F,A}}) -> 86 | [{node(Pid), {M,F,A}}]. 87 | 88 | test_all_events(Ms) -> 89 | riak_core_tracer:start_link(), 90 | riak_core_tracer:reset(), 91 | riak_core_tracer:filter(Ms, fun all_events/1), 92 | riak_core_tracer:collect(5000). 93 | 94 | %%=================================================================== 95 | %% gen_server callbacks 96 | %%=================================================================== 97 | 98 | init([]) -> 99 | {ok, #state{}}. 100 | 101 | handle_call(reset, _From, State) -> 102 | cancel_timer(State#state.stop_tref), 103 | {reply, ok, #state{}}; 104 | handle_call({filter, MFs, Filter}, _From, State) -> 105 | {reply, ok, State#state{mfs=MFs ++ State#state.mfs, 106 | filters = [Filter | State#state.filters]}}; 107 | handle_call({collect, Duration, Nodes}, _From, State) -> 108 | cancel_timer(State#state.stop_tref), 109 | Tref = timer:send_after(Duration, collect_timeout), 110 | dbg:stop_clear(), 111 | dbg:tracer(process, {fun ({trace, _, call, {?MODULE, trigger_sentinel, _}}, Pid) -> 112 | gen_server:cast(Pid, stop_sentinel), 113 | Pid; 114 | (Msg, Pid) -> 115 | Entries = lists:flatten( 116 | [begin 117 | case catch F(Msg) of 118 | {'EXIT', _} -> 119 | []; 120 | E -> 121 | E 122 | end 123 | end || F <- State#state.filters]), 124 | case Entries of 125 | [] -> 126 | ok; 127 | _ -> 128 | {Mega, Secs, Micro} = now(), 129 | Ts = 1000000 * (1000000 * Mega + Secs) + Micro, 130 | TsEntries = [{Ts, E} || E <- Entries], 131 | gen_server:call(Pid, {traces, TsEntries}) 132 | end, 133 | Pid 134 | end, self()}), 135 | dbg:p(all, call), 136 | [{ok, N} = dbg:n(N) || N <- Nodes], 137 | dbg:tpl(?MODULE, trigger_sentinel, []), 138 | add_tracers(State#state.mfs), 139 | {reply, ok, State#state{trace=[], stop_tref = Tref, tracing = true}}; 140 | handle_call(stop_collect, From, State = #state{tracing = true}) -> 141 | %% Trigger the sentinel so that we wait for the trace buffer to flush 142 | ?MODULE:trigger_sentinel(), 143 | {noreply, State#state{stop_from = From, tracing = stopping}}; 144 | handle_call(stop_collect, _From, State) -> 145 | {reply, State#state.tracing, State}; 146 | handle_call({traces, Entries}, _From, State) -> 147 | {reply, ok, State#state{trace=Entries ++ State#state.trace}}; 148 | handle_call(results, _From, State) -> 149 | case lists:sort(State#state.trace) of 150 | [] -> 151 | R = []; 152 | STrace -> 153 | {MinTs,_} = hd(STrace), 154 | R = zero_ts(MinTs, STrace, []) 155 | end, 156 | {reply, R, State}; 157 | handle_call(stop, _From, State) -> 158 | {stop, normal, ok, State}. 159 | 160 | handle_cast(stop_sentinel, State) -> 161 | dbg:stop_clear(), 162 | case State#state.stop_from of 163 | undefined -> 164 | ok; 165 | StopFrom -> 166 | gen_server:reply(StopFrom, ok) 167 | end, 168 | {noreply, State#state{stop_from = undefined}}; 169 | handle_cast(_Msg, State) -> 170 | {noreply, State}. 171 | 172 | handle_info(collect_timeout, State = #state{tracing = true}) -> 173 | handle_call(stop_collect, undefined, State); 174 | handle_info(collect_timeout, State) -> 175 | {noreply, State}. 176 | 177 | terminate(_Reason, _State) -> 178 | ok. 179 | 180 | code_change(_OldVsn, State, _Extra) -> 181 | {ok, State}. 182 | 183 | %%%=================================================================== 184 | %%% Internal functions 185 | %%%=================================================================== 186 | 187 | add_tracers([]) -> 188 | ok; 189 | add_tracers([{M, F} | Rest]) -> 190 | dbg:tpl(M, F, [{'_',[],[{message,{return_trace}}]}]), 191 | add_tracers(Rest); 192 | add_tracers([M | Rest]) -> 193 | dbg:tpl(M,[{'_',[],[{message,{return_trace}}]}]), 194 | add_tracers(Rest). 195 | 196 | cancel_timer(undefined) -> 197 | ok; 198 | cancel_timer(Tref) -> 199 | catch timer:cancel(Tref), 200 | receive 201 | stop -> 202 | ok 203 | after 204 | 0 -> 205 | ok 206 | end. 207 | 208 | zero_ts(_Offset, [], Acc) -> 209 | lists:reverse(Acc); 210 | zero_ts(Offset, [{Ts,Trace}|Rest], Acc) -> 211 | zero_ts(Offset, Rest, [{Ts - Offset, Trace} | Acc]). 212 | 213 | trigger_sentinel() -> 214 | ok. 215 | -------------------------------------------------------------------------------- /src/riak_core_handoff_sender.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_handoff_sender: send a partition's data via TCP-based handoff 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc send a partition's data via TCP-based handoff 24 | 25 | -module(riak_core_handoff_sender). 26 | -export([start_link/3, get_handoff_ssl_options/0]). 27 | -include_lib("riak_core_vnode.hrl"). 28 | -include_lib("riak_core_handoff.hrl"). 29 | -define(ACK_COUNT, 1000). 30 | 31 | start_link(TargetNode, Module, Partition) -> 32 | Self = self(), 33 | SslOpts = get_handoff_ssl_options(), 34 | Pid = spawn_link(fun()->start_fold(TargetNode, Module,Partition, Self, SslOpts) end), 35 | {ok, Pid}. 36 | 37 | start_fold(TargetNode, Module, Partition, ParentPid, SslOpts) -> 38 | try 39 | error_logger:info_msg("Starting handoff of partition ~p ~p to ~p~n", 40 | [Module, Partition, TargetNode]), 41 | [_Name,Host] = string:tokens(atom_to_list(TargetNode), "@"), 42 | {ok, Port} = get_handoff_port(TargetNode), 43 | SockOpts = [binary, {packet, 4}, {header,1}, {active, false}], 44 | {Socket, TcpMod} = 45 | if SslOpts /= [] -> 46 | {ok, Skt} = ssl:connect(Host, Port, SslOpts ++ SockOpts, 47 | 15000), 48 | {Skt, ssl}; 49 | true -> 50 | {ok, Skt} = gen_tcp:connect(Host, Port, SockOpts, 15000), 51 | {Skt, gen_tcp} 52 | end, 53 | 54 | %% Piggyback the sync command from previous releases to send 55 | %% the vnode type across. If talking to older nodes they'll 56 | %% just do a sync, newer nodes will decode the module name. 57 | %% After 0.12.0 the calls can be switched to use PT_MSG_SYNC 58 | %% and PT_MSG_CONFIGURE 59 | VMaster = list_to_atom(atom_to_list(Module) ++ "_master"), 60 | ModBin = atom_to_binary(Module, utf8), 61 | Msg = <>, 62 | ok = TcpMod:send(Socket, Msg), 63 | {ok,[?PT_MSG_OLDSYNC|<<"sync">>]} = TcpMod:recv(Socket, 0), 64 | M = <>, 65 | ok = TcpMod:send(Socket, M), 66 | StartFoldTime = now(), 67 | {Socket,ParentPid,Module,TcpMod,_Ack,SentCount,ErrStatus} = 68 | riak_core_vnode_master:sync_command({Partition, node()}, 69 | ?FOLD_REQ{ 70 | foldfun=fun visit_item/3, 71 | acc0={Socket,ParentPid,Module,TcpMod,0,0,ok}}, 72 | VMaster, infinity), 73 | EndFoldTime = now(), 74 | case ErrStatus of 75 | ok -> 76 | error_logger:info_msg("Handoff of partition ~p ~p to ~p " 77 | "completed: sent ~p objects in ~.2f " 78 | "seconds\n", 79 | [Module, Partition, TargetNode, 80 | SentCount, 81 | timer:now_diff( 82 | EndFoldTime, 83 | StartFoldTime) / 1000000]), 84 | gen_fsm:send_event(ParentPid, handoff_complete); 85 | {error, ErrReason} -> 86 | error_logger:error_msg("Handoff of partition ~p ~p to ~p " 87 | "FAILED: (~p) after sending ~p objects " 88 | "in ~.2f seconds\n", 89 | [Module, Partition, TargetNode, 90 | ErrReason, SentCount, 91 | timer:now_diff( 92 | EndFoldTime, 93 | StartFoldTime) / 1000000]), 94 | gen_fsm:send_event(ParentPid, {handoff_error, 95 | fold_error, ErrReason}) 96 | end 97 | catch 98 | Err:Reason -> 99 | error_logger:error_msg("Handoff sender ~p ~p failed ~p:~p\n", 100 | [Module, Partition, Err,Reason]), 101 | gen_fsm:send_event(ParentPid, {handoff_error, Err, Reason}) 102 | end. 103 | 104 | %% When a tcp error occurs, the ErrStatus argument is set to {error, Reason}. 105 | %% Since we can't abort the fold, this clause is just a no-op. 106 | visit_item(_K, _V, {Socket, ParentPid, Module, TcpMod, Ack, Total, 107 | {error, Reason}}) -> 108 | {Socket, ParentPid, Module, TcpMod, Ack, Total, {error, Reason}}; 109 | visit_item(K, V, {Socket, ParentPid, Module, TcpMod, ?ACK_COUNT, Total, _Err}) -> 110 | M = <>, 111 | case TcpMod:send(Socket, M) of 112 | ok -> 113 | case TcpMod:recv(Socket, 0) of 114 | {ok,[?PT_MSG_OLDSYNC|<<"sync">>]} -> 115 | visit_item(K, V, {Socket, ParentPid, Module, TcpMod, 0, Total, ok}); 116 | {error, Reason} -> 117 | {Socket, ParentPid, Module, TcpMod, 0, Total, {error, Reason}} 118 | end; 119 | {error, Reason} -> 120 | {Socket, ParentPid, Module, TcpMod, 0, Total, {error, Reason}} 121 | end; 122 | visit_item(K, V, {Socket, ParentPid, Module, TcpMod, Ack, Total, _ErrStatus}) -> 123 | BinObj = Module:encode_handoff_item(K, V), 124 | M = <>, 125 | case TcpMod:send(Socket, M) of 126 | ok -> 127 | {Socket, ParentPid, Module, TcpMod, Ack+1, Total+1, ok}; 128 | {error, Reason} -> 129 | {Socket, ParentPid, Module, TcpMod, Ack, Total, {error, Reason}} 130 | end. 131 | 132 | get_handoff_port(Node) when is_atom(Node) -> 133 | case catch(gen_server2:call({riak_core_handoff_listener, Node}, handoff_port, infinity)) of 134 | {'EXIT', _} -> 135 | %% Check old location from previous release 136 | gen_server2:call({riak_kv_handoff_listener, Node}, handoff_port, infinity); 137 | Other -> Other 138 | end. 139 | 140 | get_handoff_ssl_options() -> 141 | case app_helper:get_env(riak_core, handoff_ssl_options, []) of 142 | [] -> 143 | []; 144 | Props -> 145 | try 146 | %% We'll check if the file(s) exist but won't check 147 | %% file contents' sanity. 148 | ZZ = [{_, {ok, _}} = {ToCheck, file:read_file(Path)} || 149 | ToCheck <- [certfile, keyfile, cacertfile, dhfile], 150 | Path <- [proplists:get_value(ToCheck, Props)], 151 | Path /= undefined], 152 | spawn(fun() -> self() ! ZZ end), % Avoid term...never used err 153 | %% Props are OK 154 | Props 155 | catch 156 | error:{badmatch, {FailProp, BadMat}} -> 157 | error_logger:error_msg("riak_core handoff_ssl_options " 158 | "config error: property ~p: ~p. " 159 | "Disabling handoff SSL\n", 160 | [FailProp, BadMat]), 161 | []; 162 | X:Y -> 163 | error_logger:error_msg("riak_core handoff_ssl_options " 164 | "failure {~p, ~p} processing config " 165 | "~p. Disabling handoff SSL\n", 166 | [X, Y, Props]), 167 | [] 168 | end 169 | end. 170 | -------------------------------------------------------------------------------- /src/priority_queue.erl: -------------------------------------------------------------------------------- 1 | %% The contents of this file are subject to the Mozilla Public License 2 | %% Version 1.1 (the "License"); you may not use this file except in 3 | %% compliance with the License. You may obtain a copy of the License at 4 | %% http://www.mozilla.org/MPL/ 5 | %% 6 | %% Software distributed under the License is distributed on an "AS IS" 7 | %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 8 | %% License for the specific language governing rights and limitations 9 | %% under the License. 10 | %% 11 | %% The Original Code is RabbitMQ. 12 | %% 13 | %% The Initial Developers of the Original Code are LShift Ltd, 14 | %% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. 15 | %% 16 | %% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, 17 | %% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd 18 | %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial 19 | %% Technologies LLC, and Rabbit Technologies Ltd. 20 | %% 21 | %% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift 22 | %% Ltd. Portions created by Cohesive Financial Technologies LLC are 23 | %% Copyright (C) 2007-2009 Cohesive Financial Technologies 24 | %% LLC. Portions created by Rabbit Technologies Ltd are Copyright 25 | %% (C) 2007-2009 Rabbit Technologies Ltd. 26 | %% 27 | %% All Rights Reserved. 28 | %% 29 | %% Contributor(s): ______________________________________. 30 | %% 31 | 32 | %% Priority queues have essentially the same interface as ordinary 33 | %% queues, except that a) there is an in/3 that takes a priority, and 34 | %% b) we have only implemented the core API we need. 35 | %% 36 | %% Priorities should be integers - the higher the value the higher the 37 | %% priority - but we don't actually check that. 38 | %% 39 | %% in/2 inserts items with priority 0. 40 | %% 41 | %% We optimise the case where a priority queue is being used just like 42 | %% an ordinary queue. When that is the case we represent the priority 43 | %% queue as an ordinary queue. We could just call into the 'queue' 44 | %% module for that, but for efficiency we implement the relevant 45 | %% functions directly in here, thus saving on inter-module calls and 46 | %% eliminating a level of boxing. 47 | %% 48 | %% When the queue contains items with non-zero priorities, it is 49 | %% represented as a sorted kv list with the inverted Priority as the 50 | %% key and an ordinary queue as the value. Here again we use our own 51 | %% ordinary queue implemention for efficiency, often making recursive 52 | %% calls into the same function knowing that ordinary queues represent 53 | %% a base case. 54 | 55 | 56 | -module(priority_queue). 57 | 58 | -export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3, 59 | out/1, out/2, pout/1, join/2]). 60 | 61 | %%---------------------------------------------------------------------------- 62 | 63 | -ifdef(use_specs). 64 | 65 | -type(priority() :: integer()). 66 | -type(squeue() :: {queue, [any()], [any()]}). 67 | -type(pqueue() :: squeue() | {pqueue, [{priority(), squeue()}]}). 68 | 69 | -spec(new/0 :: () -> pqueue()). 70 | -spec(is_queue/1 :: (any()) -> bool()). 71 | -spec(is_empty/1 :: (pqueue()) -> bool()). 72 | -spec(len/1 :: (pqueue()) -> non_neg_integer()). 73 | -spec(to_list/1 :: (pqueue()) -> [{priority(), any()}]). 74 | -spec(in/2 :: (any(), pqueue()) -> pqueue()). 75 | -spec(in/3 :: (any(), priority(), pqueue()) -> pqueue()). 76 | -spec(out/1 :: (pqueue()) -> {(empty | {value, any()}), pqueue()}). 77 | -spec(out/2 :: (priority(), pqueue()) -> {(empty | {value, any()}), pqueue()}). 78 | -spec(pout/1 :: (pqueue()) -> {(empty | {value, any(), priority()}), pqueue()}). 79 | -spec(join/2 :: (pqueue(), pqueue()) -> pqueue()). 80 | 81 | -endif. 82 | 83 | %%---------------------------------------------------------------------------- 84 | 85 | new() -> 86 | {queue, [], []}. 87 | 88 | is_queue({queue, R, F}) when is_list(R), is_list(F) -> 89 | true; 90 | is_queue({pqueue, Queues}) when is_list(Queues) -> 91 | lists:all(fun ({P, Q}) -> is_integer(P) andalso is_queue(Q) end, 92 | Queues); 93 | is_queue(_) -> 94 | false. 95 | 96 | is_empty({queue, [], []}) -> 97 | true; 98 | is_empty(_) -> 99 | false. 100 | 101 | len({queue, R, F}) when is_list(R), is_list(F) -> 102 | length(R) + length(F); 103 | len({pqueue, Queues}) -> 104 | lists:sum([len(Q) || {_, Q} <- Queues]). 105 | 106 | to_list({queue, In, Out}) when is_list(In), is_list(Out) -> 107 | [{0, V} || V <- Out ++ lists:reverse(In, [])]; 108 | to_list({pqueue, Queues}) -> 109 | [{-P, V} || {P, Q} <- Queues, {0, V} <- to_list(Q)]. 110 | 111 | in(Item, Q) -> 112 | in(Item, 0, Q). 113 | 114 | in(X, 0, {queue, [_] = In, []}) -> 115 | {queue, [X], In}; 116 | in(X, 0, {queue, In, Out}) when is_list(In), is_list(Out) -> 117 | {queue, [X|In], Out}; 118 | in(X, Priority, _Q = {queue, [], []}) -> 119 | in(X, Priority, {pqueue, []}); 120 | in(X, Priority, Q = {queue, _, _}) -> 121 | in(X, Priority, {pqueue, [{0, Q}]}); 122 | in(X, Priority, {pqueue, Queues}) -> 123 | P = -Priority, 124 | {pqueue, case lists:keysearch(P, 1, Queues) of 125 | {value, {_, Q}} -> 126 | lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); 127 | false -> 128 | lists:keysort(1, [{P, {queue, [X], []}} | Queues]) 129 | end}. 130 | 131 | out({queue, [], []} = Q) -> 132 | {empty, Q}; 133 | out({queue, [V], []}) -> 134 | {{value, V}, {queue, [], []}}; 135 | out({queue, [Y|In], []}) -> 136 | [V|Out] = lists:reverse(In, []), 137 | {{value, V}, {queue, [Y], Out}}; 138 | out({queue, In, [V]}) when is_list(In) -> 139 | {{value,V}, r2f(In)}; 140 | out({queue, In,[V|Out]}) when is_list(In) -> 141 | {{value, V}, {queue, In, Out}}; 142 | out({pqueue, [{P, Q} | Queues]}) -> 143 | {R, Q1} = out(Q), 144 | NewQ = case is_empty(Q1) of 145 | true -> case Queues of 146 | [] -> {queue, [], []}; 147 | [{0, OnlyQ}] -> OnlyQ; 148 | [_|_] -> {pqueue, Queues} 149 | end; 150 | false -> {pqueue, [{P, Q1} | Queues]} 151 | end, 152 | {R, NewQ}. 153 | 154 | out(_Priority, {queue, [], []} = Q) -> 155 | {empty, Q}; 156 | out(Priority, {queue, _, _} = Q) when Priority =< 0 -> 157 | out(Q); 158 | out(_Priority, {queue, _, _} = Q) -> 159 | {empty, Q}; 160 | out(Priority, {pqueue, [{P, _Q} | _Queues]} = Q) when Priority =< (-P) -> 161 | out(Q); 162 | out(_Priority, {pqueue, [_|_]} = Q) -> 163 | {empty, Q}. 164 | 165 | pout({queue, [], []} = Q) -> 166 | {empty, Q}; 167 | pout({queue, _, _} = Q) -> 168 | {{value, V}, Q1} = out(Q), 169 | {{value, V, 0}, Q1}; 170 | pout({pqueue, [{P, Q} | Queues]}) -> 171 | {{value, V}, Q1} = out(Q), 172 | NewQ = case is_empty(Q1) of 173 | true -> case Queues of 174 | [] -> {queue, [], []}; 175 | [{0, OnlyQ}] -> OnlyQ; 176 | [_|_] -> {pqueue, Queues} 177 | end; 178 | false -> {pqueue, [{P, Q1} | Queues]} 179 | end, 180 | {{value, V, -P}, NewQ}. 181 | 182 | join(A, {queue, [], []}) -> 183 | A; 184 | join({queue, [], []}, B) -> 185 | B; 186 | join({queue, AIn, AOut}, {queue, BIn, BOut}) -> 187 | {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; 188 | join(A = {queue, _, _}, {pqueue, BPQ}) -> 189 | {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, BPQ), 190 | Post1 = case Post of 191 | [] -> [ {0, A} ]; 192 | [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ]; 193 | _ -> [ {0, A} | Post ] 194 | end, 195 | {pqueue, Pre ++ Post1}; 196 | join({pqueue, APQ}, B = {queue, _, _}) -> 197 | {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, APQ), 198 | Post1 = case Post of 199 | [] -> [ {0, B} ]; 200 | [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ]; 201 | _ -> [ {0, B} | Post ] 202 | end, 203 | {pqueue, Pre ++ Post1}; 204 | join({pqueue, APQ}, {pqueue, BPQ}) -> 205 | {pqueue, merge(APQ, BPQ, [])}. 206 | 207 | merge([], BPQ, Acc) -> 208 | lists:reverse(Acc, BPQ); 209 | merge(APQ, [], Acc) -> 210 | lists:reverse(Acc, APQ); 211 | merge([{P, A}|As], [{P, B}|Bs], Acc) -> 212 | merge(As, Bs, [ {P, join(A, B)} | Acc ]); 213 | merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB -> 214 | merge(As, Bs, [ {PA, A} | Acc ]); 215 | merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) -> 216 | merge(As, Bs, [ {PB, B} | Acc ]). 217 | 218 | r2f([]) -> {queue, [], []}; 219 | r2f([_] = R) -> {queue, [], R}; 220 | r2f([X,Y]) -> {queue, [X], [Y]}; 221 | r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}. 222 | -------------------------------------------------------------------------------- /src/riak_core_vnode_master.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_vnode_master: dispatch to vnodes 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc dispatch to vnodes 24 | 25 | -module(riak_core_vnode_master). 26 | -include_lib("riak_core_vnode.hrl"). 27 | -behaviour(gen_server). 28 | -export([start_link/1, start_link/2, get_vnode_pid/2, 29 | start_vnode/2, command/3, command/4, sync_command/3, 30 | sync_command/4, 31 | sync_spawn_command/3, make_request/3, 32 | all_nodes/1, reg_name/1]). 33 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 34 | terminate/2, code_change/3]). 35 | -record(idxrec, {idx, pid, monref}). 36 | -record(state, {idxtab, sup_name, vnode_mod, legacy}). 37 | 38 | -define(DEFAULT_TIMEOUT, 5000). 39 | 40 | make_name(VNodeMod,Suffix) -> list_to_atom(atom_to_list(VNodeMod)++Suffix). 41 | reg_name(VNodeMod) -> make_name(VNodeMod, "_master"). 42 | 43 | start_link(VNodeMod) -> 44 | start_link(VNodeMod, undefined). 45 | 46 | start_link(VNodeMod, LegacyMod) -> 47 | RegName = reg_name(VNodeMod), 48 | gen_server:start_link({local, RegName}, ?MODULE, 49 | [VNodeMod,LegacyMod,RegName], []). 50 | 51 | start_vnode(Index, VNodeMod) -> 52 | RegName = reg_name(VNodeMod), 53 | gen_server:cast(RegName, {Index, start_vnode}). 54 | 55 | get_vnode_pid(Index, VNodeMod) -> 56 | RegName = reg_name(VNodeMod), 57 | gen_server:call(RegName, {Index, get_vnode}, infinity). 58 | 59 | command(Preflist, Msg, VMaster) -> 60 | command(Preflist, Msg, ignore, VMaster). 61 | 62 | %% Send the command to the preflist given with responses going to Sender 63 | command([], _Msg, _Sender, _VMaster) -> 64 | ok; 65 | command([{Index, Pid}|Rest], Msg, Sender, VMaster) when is_pid(Pid) -> 66 | gen_fsm:send_event(Pid, make_request(Msg, Sender, Index)), 67 | command(Rest, Msg, Sender, VMaster); 68 | command([{Index,Node}|Rest], Msg, Sender, VMaster) -> 69 | gen_server:cast({VMaster, Node}, make_request(Msg, Sender, Index)), 70 | command(Rest, Msg, Sender, VMaster); 71 | 72 | %% Send the command to an individual Index/Node combination 73 | command({Index,Node}, Msg, Sender, VMaster) -> 74 | gen_server:cast({VMaster, Node}, make_request(Msg, Sender, Index)). 75 | 76 | %% Send a synchronous command to an individual Index/Node combination. 77 | %% Will not return until the vnode has returned 78 | sync_command(IndexNode, Msg, VMaster) -> 79 | sync_command(IndexNode, Msg, VMaster, ?DEFAULT_TIMEOUT). 80 | 81 | sync_command({Index,Node}, Msg, VMaster, Timeout) -> 82 | %% Issue the call to the master, it will update the Sender with 83 | %% the From for handle_call so that the {reply} return gets 84 | %% sent here. 85 | gen_server:call({VMaster, Node}, 86 | make_request(Msg, {server, undefined, undefined}, Index), Timeout). 87 | 88 | 89 | %% Send a synchronous spawned command to an individual Index/Node combination. 90 | %% Will not return until the vnode has returned, but the vnode_master will 91 | %% continue to handle requests. 92 | sync_spawn_command({Index,Node}, Msg, VMaster) -> 93 | gen_server:call({VMaster, Node}, 94 | {spawn, make_request(Msg, {server, undefined, undefined}, Index)}, 95 | infinity). 96 | 97 | 98 | %% Make a request record - exported for use by legacy modules 99 | -spec make_request(vnode_req(), sender(), partition()) -> #riak_vnode_req_v1{}. 100 | make_request(Request, Sender, Index) -> 101 | #riak_vnode_req_v1{ 102 | index=Index, 103 | sender=Sender, 104 | request=Request}. 105 | 106 | %% Request a list of Pids for all vnodes 107 | all_nodes(VNodeMod) -> 108 | RegName = reg_name(VNodeMod), 109 | gen_server:call(RegName, all_nodes, infinity). 110 | 111 | %% @private 112 | init([VNodeMod, LegacyMod, RegName]) -> 113 | %% Get the current list of vnodes running in the supervisor. We use this 114 | %% to rebuild our ETS table for routing messages to the appropriate 115 | %% vnode. 116 | VnodePids = [Pid || {_, Pid, worker, _} 117 | <- supervisor:which_children(riak_core_vnode_sup)], 118 | IdxTable = ets:new(RegName, [{keypos, 2}]), 119 | 120 | %% In case this the vnode master is being restarted, scan the existing 121 | %% vnode children and work out which module and index they are responsible 122 | %% for. During startup it is possible that these vnodes may be shutting 123 | %% down as we check them if there are several types of vnodes active. 124 | PidIdxs = lists:flatten( 125 | [try 126 | [{Pid, riak_core_vnode:get_mod_index(Pid)}] 127 | catch 128 | _:_Err -> 129 | [] 130 | end || Pid <- VnodePids]), 131 | 132 | %% Populate the ETS table with processes running this VNodeMod (filtered 133 | %% in the list comprehension) 134 | F = fun(Pid, Idx) -> 135 | Mref = erlang:monitor(process, Pid), 136 | #idxrec { idx = Idx, pid = Pid, monref = Mref } 137 | end, 138 | IdxRecs = [F(Pid, Idx) || {Pid, {Mod, Idx}} <- PidIdxs, Mod =:= VNodeMod], 139 | true = ets:insert_new(IdxTable, IdxRecs), 140 | {ok, #state{idxtab=IdxTable, 141 | vnode_mod=VNodeMod, 142 | legacy=LegacyMod}}. 143 | 144 | handle_cast({Partition, start_vnode}, State) -> 145 | get_vnode(Partition, State), 146 | {noreply, State}; 147 | handle_cast(Req=?VNODE_REQ{index=Idx}, State) -> 148 | Pid = get_vnode(Idx, State), 149 | gen_fsm:send_event(Pid, Req), 150 | {noreply, State}; 151 | handle_cast(Other, State=#state{legacy=Legacy}) when Legacy =/= undefined -> 152 | case catch Legacy:rewrite_cast(Other) of 153 | {ok, ?VNODE_REQ{}=Req} -> 154 | handle_cast(Req, State); 155 | _ -> 156 | {noreply, State} 157 | end. 158 | 159 | handle_call(Req=?VNODE_REQ{index=Idx, sender={server, undefined, undefined}}, From, State) -> 160 | Pid = get_vnode(Idx, State), 161 | gen_fsm:send_event(Pid, Req?VNODE_REQ{sender={server, undefined, From}}), 162 | {noreply, State}; 163 | handle_call({spawn, 164 | Req=?VNODE_REQ{index=Idx, sender={server, undefined, undefined}}}, From, State) -> 165 | Pid = get_vnode(Idx, State), 166 | Sender = {server, undefined, From}, 167 | spawn_link( 168 | fun() -> gen_fsm:send_all_state_event(Pid, Req?VNODE_REQ{sender=Sender}) end), 169 | {noreply, State}; 170 | handle_call(all_nodes, _From, State) -> 171 | {reply, lists:flatten(ets:match(State#state.idxtab, {idxrec, '_', '$1', '_'})), State}; 172 | handle_call({Partition, get_vnode}, _From, State) -> 173 | Pid = get_vnode(Partition, State), 174 | {reply, {ok, Pid}, State}; 175 | handle_call(Other, From, State=#state{legacy=Legacy}) when Legacy =/= undefined -> 176 | case catch Legacy:rewrite_call(Other, From) of 177 | {ok, ?VNODE_REQ{}=Req} -> 178 | handle_call(Req, From, State); 179 | _ -> 180 | {noreply, State} 181 | end. 182 | 183 | handle_info({'DOWN', MonRef, process, _P, _I}, State) -> 184 | delmon(MonRef, State), 185 | {noreply, State}. 186 | 187 | %% @private 188 | terminate(_Reason, _State) -> 189 | ok. 190 | 191 | %% @private 192 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 193 | 194 | %% @private 195 | idx2vnode(Idx, _State=#state{idxtab=T}) -> 196 | case ets:match(T, {idxrec, Idx, '$1', '_'}) of 197 | [[VNodePid]] -> VNodePid; 198 | [] -> no_match 199 | end. 200 | 201 | %% @private 202 | delmon(MonRef, _State=#state{idxtab=T}) -> 203 | ets:match_delete(T, {idxrec, '_', '_', MonRef}). 204 | 205 | %% @private 206 | add_vnode_rec(I, _State=#state{idxtab=T}) -> ets:insert(T,I). 207 | 208 | %% @private 209 | get_vnode(Idx, State=#state{vnode_mod=Mod}) -> 210 | case idx2vnode(Idx, State) of 211 | no_match -> 212 | {ok, Pid} = riak_core_vnode_sup:start_vnode(Mod, Idx), 213 | MonRef = erlang:monitor(process, Pid), 214 | add_vnode_rec(#idxrec{idx=Idx,pid=Pid,monref=MonRef}, State), 215 | Pid; 216 | X -> X 217 | end. 218 | -------------------------------------------------------------------------------- /src/riak_core_util.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc Various functions that are useful throughout Riak. 24 | -module(riak_core_util). 25 | 26 | -export([moment/0, 27 | make_tmp_dir/0, 28 | compare_dates/2, 29 | reload_all/1, 30 | integer_to_list/2, 31 | unique_id_62/0, 32 | str_to_node/1, 33 | chash_key/1, 34 | chash_std_keyfun/1, 35 | chash_bucketonly_keyfun/1, 36 | mkclientid/1, 37 | start_app_deps/1, 38 | rpc_every_member/4]). 39 | 40 | -ifdef(TEST). 41 | -include_lib("eunit/include/eunit.hrl"). 42 | -endif. 43 | 44 | %% R14 Compatibility 45 | -compile({no_auto_import,[integer_to_list/2]}). 46 | 47 | %% =================================================================== 48 | %% Public API 49 | %% =================================================================== 50 | 51 | %% @spec moment() -> integer() 52 | %% @doc Get the current "moment". Current implementation is the 53 | %% number of seconds from year 0 to now, universal time, in 54 | %% the gregorian calendar. 55 | moment() -> calendar:datetime_to_gregorian_seconds(calendar:universal_time()). 56 | 57 | %% @spec compare_dates(string(), string()) -> boolean() 58 | %% @doc Compare two RFC1123 date strings or two now() tuples (or one 59 | %% of each). Return true if date A is later than date B. 60 | compare_dates(A={_,_,_}, B={_,_,_}) -> 61 | %% assume 3-tuples are now() times 62 | A > B; 63 | compare_dates(A, B) when is_list(A) -> 64 | %% assume lists are rfc1123 date strings 65 | compare_dates(rfc1123_to_now(A), B); 66 | compare_dates(A, B) when is_list(B) -> 67 | compare_dates(A, rfc1123_to_now(B)). 68 | 69 | %% 719528 days from Jan 1, 0 to Jan 1, 1970 70 | %% *86400 seconds/day 71 | -define(SEC_TO_EPOCH, 62167219200). 72 | 73 | rfc1123_to_now(String) when is_list(String) -> 74 | GSec = calendar:datetime_to_gregorian_seconds( 75 | httpd_util:convert_request_date(String)), 76 | ESec = GSec-?SEC_TO_EPOCH, 77 | Sec = ESec rem 1000000, 78 | MSec = ESec div 1000000, 79 | {MSec, Sec, 0}. 80 | 81 | %% @spec make_tmp_dir() -> string() 82 | %% @doc Create a unique directory in /tmp. Returns the path 83 | %% to the new directory. 84 | make_tmp_dir() -> 85 | TmpId = io_lib:format("riptemp.~p", 86 | [erlang:phash2({random:uniform(),self()})]), 87 | TempDir = filename:join("/tmp", TmpId), 88 | case filelib:is_dir(TempDir) of 89 | true -> make_tmp_dir(); 90 | false -> 91 | ok = file:make_dir(TempDir), 92 | TempDir 93 | end. 94 | 95 | %% @spec integer_to_list(Integer :: integer(), Base :: integer()) -> 96 | %% string() 97 | %% @doc Convert an integer to its string representation in the given 98 | %% base. Bases 2-62 are supported. 99 | integer_to_list(I, 10) -> 100 | erlang:integer_to_list(I); 101 | integer_to_list(I, Base) 102 | when is_integer(I), is_integer(Base),Base >= 2, Base =< 1+$Z-$A+10+1+$z-$a -> 103 | if I < 0 -> 104 | [$-|integer_to_list(-I, Base, [])]; 105 | true -> 106 | integer_to_list(I, Base, []) 107 | end; 108 | integer_to_list(I, Base) -> 109 | erlang:error(badarg, [I, Base]). 110 | 111 | %% @spec integer_to_list(integer(), integer(), string()) -> string() 112 | integer_to_list(I0, Base, R0) -> 113 | D = I0 rem Base, 114 | I1 = I0 div Base, 115 | R1 = if D >= 36 -> 116 | [D-36+$a|R0]; 117 | D >= 10 -> 118 | [D-10+$A|R0]; 119 | true -> 120 | [D+$0|R0] 121 | end, 122 | if I1 =:= 0 -> 123 | R1; 124 | true -> 125 | integer_to_list(I1, Base, R1) 126 | end. 127 | 128 | %% @spec unique_id_62() -> string() 129 | %% @doc Create a random identifying integer, returning its string 130 | %% representation in base 62. 131 | unique_id_62() -> 132 | Rand = crypto:sha(term_to_binary({make_ref(), now()})), 133 | <> = Rand, 134 | integer_to_list(I, 62). 135 | 136 | %% @spec reload_all(Module :: atom()) -> 137 | %% [{purge_response(), load_file_response()}] 138 | %% @type purge_response() = boolean() 139 | %% @type load_file_response() = {module, Module :: atom()}| 140 | %% {error, term()} 141 | %% @doc Ask each member node of the riak ring to reload the given 142 | %% Module. Return is a list of the results of code:purge/1 143 | %% and code:load_file/1 on each node. 144 | reload_all(Module) -> 145 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 146 | [{rpc:call(Node, code, purge, [Module]), 147 | rpc:call(Node, code, load_file, [Module])} || 148 | Node <- riak_core_ring:all_members(Ring)]. 149 | 150 | %% @spec mkclientid(RemoteNode :: term()) -> ClientID :: list() 151 | %% @doc Create a unique-enough id for vclock clients. 152 | mkclientid(RemoteNode) -> 153 | {{Y,Mo,D},{H,Mi,S}} = erlang:universaltime(), 154 | {_,_,NowPart} = now(), 155 | Id = erlang:phash2([Y,Mo,D,H,Mi,S,node(),RemoteNode,NowPart]), 156 | <>. 157 | 158 | %% @spec chash_key(BKey :: riak_object:bkey()) -> chash:index() 159 | %% @doc Create a binary used for determining replica placement. 160 | chash_key({Bucket,Key}) -> 161 | BucketProps = riak_core_bucket:get_bucket(Bucket), 162 | {chash_keyfun, {M, F}} = proplists:lookup(chash_keyfun, BucketProps), 163 | M:F({Bucket,Key}). 164 | 165 | %% @spec chash_std_keyfun(BKey :: riak_object:bkey()) -> chash:index() 166 | %% @doc Default object/ring hashing fun, direct passthrough of bkey. 167 | chash_std_keyfun({Bucket, Key}) -> chash:key_of({Bucket, Key}). 168 | 169 | %% @spec chash_bucketonly_keyfun(BKey :: riak_object:bkey()) -> chash:index() 170 | %% @doc Object/ring hashing fun that ignores Key, only uses Bucket. 171 | chash_bucketonly_keyfun({Bucket, _Key}) -> chash:key_of(Bucket). 172 | 173 | str_to_node(Node) when is_atom(Node) -> 174 | str_to_node(atom_to_list(Node)); 175 | str_to_node(NodeStr) -> 176 | case string:tokens(NodeStr, "@") of 177 | [NodeName] -> 178 | %% Node name only; no host name. If the local node has a hostname, 179 | %% append it 180 | case node_hostname() of 181 | [] -> 182 | list_to_atom(NodeName); 183 | Hostname -> 184 | list_to_atom(NodeName ++ "@" ++ Hostname) 185 | end; 186 | _ -> 187 | list_to_atom(NodeStr) 188 | end. 189 | 190 | node_hostname() -> 191 | NodeStr = atom_to_list(node()), 192 | case string:tokens(NodeStr, "@") of 193 | [_NodeName, Hostname] -> 194 | Hostname; 195 | _ -> 196 | [] 197 | end. 198 | 199 | %% @spec start_app_deps(App :: atom()) -> ok 200 | %% @doc Start depedent applications of App. 201 | start_app_deps(App) -> 202 | {ok, DepApps} = application:get_key(App, applications), 203 | [ensure_started(A) || A <- DepApps], 204 | ok. 205 | 206 | 207 | %% @spec ensure_started(Application :: atom()) -> ok 208 | %% @doc Start the named application if not already started. 209 | ensure_started(App) -> 210 | case application:start(App) of 211 | ok -> 212 | ok; 213 | {error, {already_started, App}} -> 214 | ok 215 | end. 216 | 217 | %% @spec rpc_every_member(atom(), atom(), [term()], integer()|infinity) 218 | %% -> {Results::[term()], BadNodes::[node()]} 219 | %% @doc Make an RPC call to the given module and function on each 220 | %% member of the cluster. See rpc:multicall/5 for a description 221 | %% of the return value. 222 | rpc_every_member(Module, Function, Args, Timeout) -> 223 | {ok, MyRing} = riak_core_ring_manager:get_my_ring(), 224 | Nodes = riak_core_ring:all_members(MyRing), 225 | rpc:multicall(Nodes, Module, Function, Args, Timeout). 226 | 227 | %% =================================================================== 228 | %% EUnit tests 229 | %% =================================================================== 230 | -ifdef(TEST). 231 | 232 | moment_test() -> 233 | M1 = riak_core_util:moment(), 234 | M2 = riak_core_util:moment(), 235 | ?assert(M2 >= M1). 236 | 237 | clientid_uniqueness_test() -> 238 | ClientIds = [mkclientid('somenode@somehost') || _I <- lists:seq(0, 10000)], 239 | length(ClientIds) =:= length(sets:to_list(sets:from_list(ClientIds))). 240 | 241 | -endif. 242 | 243 | -------------------------------------------------------------------------------- /src/chash.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% chash: basic consistent hashing 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc A consistent hashing implementation. The space described by the ring 24 | %% coincides with SHA-1 hashes, and so any two keys producing the same 25 | %% SHA-1 hash are considered identical within the ring. 26 | %% 27 | %% @reference Karger, D.; Lehman, E.; Leighton, T.; Panigrahy, R.; Levine, M.; 28 | %% Lewin, D. (1997). "Consistent hashing and random trees". Proceedings of the 29 | %% twenty-ninth annual ACM symposium on Theory of computing: 654~663. ACM Press 30 | %% New York, NY, USA 31 | 32 | -module(chash). 33 | -author('Justin Sheehy '). 34 | -author('Andy Gross '). 35 | 36 | -export([fresh/2,update/3,lookup/2,members/1,size/1,nodes/1, 37 | successors/2,successors/3, 38 | predecessors/2,predecessors/3, 39 | contains_name/2,key_of/1, 40 | merge_rings/2]). 41 | 42 | -define(RINGTOP, trunc(math:pow(2,160)-1)). % SHA-1 space 43 | -include_lib("eunit/include/eunit.hrl"). 44 | 45 | % @type chash() = {NumPartitions, [NodeEntry]} 46 | % NumPartitions = integer() 47 | % NodeEntry = {IndexAsInt, Node} 48 | % IndexAsInt = integer() 49 | % Node = chash_node(). 50 | % It is not recommended that code outside this module make use 51 | % of the structure of a chash. 52 | 53 | % @type index() = binary(). 54 | % Indices into the ring, used as keys for object location, are binary 55 | % representations of 160-bit integers. 56 | 57 | % @type chash_node() = term(). 58 | % A Node is the unique identifier for the owner of a given partition. 59 | % An Erlang Pid works well here, but the chash module allows it to 60 | % be any term. 61 | 62 | % @doc Create a brand new ring. The size and seednode are specified; 63 | % initially all partitions are owned by the seednode. If NumPartitions 64 | % is not much larger than the intended eventual number of 65 | % participating nodes, then performance will suffer. 66 | % @spec fresh(NumPartitions :: integer(), SeedNode :: chash_node()) -> chash() 67 | fresh(NumPartitions, SeedNode) -> 68 | Inc = ?RINGTOP div NumPartitions, 69 | {NumPartitions, [{IndexAsInt, SeedNode} || 70 | IndexAsInt <- lists:seq(0,(?RINGTOP-1),Inc)]}. 71 | 72 | % @doc Find the Node that owns the partition identified by IndexAsInt. 73 | % @spec lookup(IndexAsInt :: integer(), CHash :: chash()) -> chash_node() 74 | lookup(IndexAsInt, CHash) -> 75 | {_NumPartitions, Nodes} = CHash, 76 | {IndexAsInt, X} = proplists:lookup(IndexAsInt, Nodes), 77 | X. 78 | 79 | % @doc Return true if named Node owns any partitions in the ring, else false. 80 | % @spec contains_name(Name :: chash_node(), CHash :: chash()) -> bool() 81 | contains_name(Name, CHash) -> 82 | {_NumPartitions, Nodes} = CHash, 83 | [X || {_,X} <- Nodes, X == Name] =/= []. 84 | 85 | % @doc Make the partition beginning at IndexAsInt owned by Name'd node. 86 | % @spec update(IndexAsInt :: integer(), Name :: chash_node(), CHash :: chash()) 87 | % -> chash() 88 | update(IndexAsInt, Name, CHash) -> 89 | {NumPartitions, Nodes} = CHash, 90 | NewNodes = lists:keyreplace(IndexAsInt, 1, Nodes, {IndexAsInt, Name}), 91 | {NumPartitions, NewNodes}. 92 | 93 | % @doc Given an object key, return all NodeEntries in order starting at Index. 94 | % @spec successors(Index :: index(), CHash :: chash()) -> [NodeEntry] 95 | successors(Index, CHash) -> 96 | {NumPartitions, _Nodes} = CHash, 97 | successors(Index, CHash, NumPartitions). 98 | % @doc Given an object key, return the next N NodeEntries in order 99 | % starting at Index. 100 | % @spec successors(Index :: index(), CHash :: chash(), N :: integer()) 101 | % -> [NodeEntry] 102 | successors(Index, CHash, N) -> 103 | Num = max_n(N, CHash), 104 | {Res, _} = lists:split(Num, ordered_from(Index, CHash)), 105 | Res. 106 | 107 | % @doc Given an object key, return all NodeEntries in reverse order 108 | % starting at Index. 109 | % @spec predecessors(Index :: index(), CHash :: chash()) -> [NodeEntry] 110 | predecessors(Index, CHash) -> 111 | {NumPartitions, _Nodes} = CHash, 112 | predecessors(Index, CHash, NumPartitions). 113 | % @doc Given an object key, return the next N NodeEntries in reverse order 114 | % starting at Index. 115 | % @spec predecessors(Index :: index(), CHash :: chash(), N :: integer()) 116 | % -> [NodeEntry] 117 | predecessors(Index, CHash, N) -> 118 | Num = max_n(N, CHash), 119 | {Res, _} = lists:split(Num, lists:reverse(ordered_from(Index,CHash))), 120 | Res. 121 | 122 | % @doc Return either N or the number of partitions in the ring, whichever 123 | % is lesser. 124 | % @spec max_n(N :: integer(), CHash :: chash()) -> integer() 125 | max_n(N, {NumPartitions, _Nodes}) -> 126 | erlang:min(N, NumPartitions). 127 | 128 | % @doc Given an object key, return all NodeEntries in order starting at Index. 129 | % @spec ordered_from(Index :: index(), CHash :: chash()) -> [NodeEntry] 130 | ordered_from(Index, {NumPartitions, Nodes}) -> 131 | <> = Index, 132 | Inc = ?RINGTOP div NumPartitions, 133 | {A, B} = lists:split((IndexAsInt div Inc)+1, Nodes), 134 | B ++ A. 135 | 136 | % @doc Given any term used to name an object, produce that object's key 137 | % into the ring. Two names with the same SHA-1 hash value are 138 | % considered the same name. 139 | % @spec key_of(ObjectName :: term()) -> index() 140 | key_of(ObjectName) -> 141 | crypto:sha(term_to_binary(ObjectName)). 142 | 143 | % @doc Return all Nodes that own any partitions in the ring. 144 | % @spec members(CHash :: chash()) -> [Node] 145 | members(CHash) -> 146 | {_NumPartitions, Nodes} = CHash, 147 | lists:usort([X || {_Idx,X} <- Nodes]). 148 | 149 | % @doc Return the entire set of NodeEntries in the ring. 150 | % @spec nodes(CHash :: chash()) -> [NodeEntry] 151 | nodes(CHash) -> 152 | {_NumPartitions, Nodes} = CHash, 153 | Nodes. 154 | 155 | % @doc Return a randomized merge of two rings. 156 | % If multiple nodes are actively claiming nodes in the same 157 | % time period, churn will occur. Be prepared to live with it. 158 | % @spec merge_rings(CHashA :: chash(), CHashB :: chash()) -> chash() 159 | merge_rings(CHashA,CHashB) -> 160 | {NumPartitions, NodesA} = CHashA, 161 | {NumPartitions, NodesB} = CHashB, 162 | {NumPartitions, [{I,randomnode(A,B)} || 163 | {{I,A},{I,B}} <- lists:zip(NodesA,NodesB)]}. 164 | 165 | % @spec randomnode(NodeA :: chash_node(), NodeB :: chash_node()) -> chash_node() 166 | randomnode(NodeA,NodeA) -> NodeA; 167 | randomnode(NodeA,NodeB) -> lists:nth(random:uniform(2),[NodeA,NodeB]). 168 | 169 | % @doc Return the number of partitions in the ring. 170 | % @spec size(CHash :: chash()) -> integer() 171 | size(CHash) -> 172 | {_NumPartitions,Nodes} = CHash, 173 | length(Nodes). 174 | 175 | update_test() -> 176 | Node = 'old@host', NewNode = 'new@host', 177 | 178 | % Create a fresh ring... 179 | CHash = chash:fresh(5, Node), 180 | GetNthIndex = fun(N, {_, Nodes}) -> {Index, _} = lists:nth(N, Nodes), Index end, 181 | 182 | % Test update... 183 | FirstIndex = GetNthIndex(1, CHash), 184 | ThirdIndex = GetNthIndex(3, CHash), 185 | {5, [{_, NewNode}, {_, Node}, {_, Node}, {_, Node}, {_, Node}, {_, Node}]} = update(FirstIndex, NewNode, CHash), 186 | {5, [{_, Node}, {_, Node}, {_, NewNode}, {_, Node}, {_, Node}, {_, Node}]} = update(ThirdIndex, NewNode, CHash). 187 | 188 | contains_test() -> 189 | CHash = chash:fresh(8, the_node), 190 | ?assertEqual(true, contains_name(the_node,CHash)), 191 | ?assertEqual(false, contains_name(some_other_node,CHash)). 192 | 193 | max_n_test() -> 194 | CHash = chash:fresh(8, the_node), 195 | ?assertEqual(1, max_n(1,CHash)), 196 | ?assertEqual(8, max_n(11,CHash)). 197 | 198 | simple_size_test() -> 199 | ?assertEqual(8, length(chash:nodes(chash:fresh(8,the_node)))). 200 | 201 | successors_length_test() -> 202 | ?assertEqual(8, length(chash:successors(chash:key_of(0), 203 | chash:fresh(8,the_node)))). 204 | inverse_pred_test() -> 205 | CHash = chash:fresh(8,the_node), 206 | S = [I || {I,_} <- chash:successors(chash:key_of(4),CHash)], 207 | P = [I || {I,_} <- chash:predecessors(chash:key_of(4),CHash)], 208 | ?assertEqual(S,lists:reverse(P)). 209 | 210 | merge_test() -> 211 | CHashA = chash:fresh(8,node_one), 212 | CHashB = chash:update(0,node_one,chash:fresh(8,node_two)), 213 | CHash = chash:merge_rings(CHashA,CHashB), 214 | ?assertEqual(node_one,chash:lookup(0,CHash)). 215 | -------------------------------------------------------------------------------- /test/core_vnode_eqc.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% core_vnode_eqc: QuickCheck tests for riak_core_vnode code 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc QuickCheck tests for riak_core_vnode code 24 | 25 | %% Things to test... 26 | %% riak_core_vnode_master:command gets delivered to the right node 27 | %% riak_core_vnode_master:sync_command works 28 | 29 | -module(core_vnode_eqc). 30 | -ifdef(EQC). 31 | -include_lib("eqc/include/eqc.hrl"). 32 | -include_lib("eqc/include/eqc_fsm.hrl"). 33 | -include_lib("eunit/include/eunit.hrl"). 34 | -include_lib("riak_core_vnode.hrl"). 35 | -compile([export_all]). 36 | 37 | -record(qcst, {started, 38 | counters, % Dict of counters for each index 39 | indices, 40 | crash_reasons}). 41 | 42 | simple_test() -> 43 | simple_test(100). 44 | 45 | simple_test(N) -> 46 | ?assertEqual(true, quickcheck(numtests(N, prop_simple()))). 47 | 48 | prop_simple() -> 49 | ?FORALL(Cmds, commands(?MODULE, {stopped, initial_state_data()}), 50 | aggregate(command_names(Cmds), 51 | begin 52 | start_servers(), 53 | {H,S,Res} = run_commands(?MODULE, Cmds), 54 | stop_servers(), 55 | ?WHENFAIL( 56 | begin 57 | io:format("History: ~p\n", [H]), 58 | io:format("State: ~p\n", [S]), 59 | io:format("Result: ~p\n", [Res]) 60 | end, 61 | Res =:= ok) 62 | end)). 63 | 64 | active_index(#qcst{started=Started}) -> 65 | elements(Started). 66 | 67 | %% Generate a preflist element 68 | active_preflist1(S) -> 69 | {active_index(S), node()}. 70 | 71 | %% Generate a preflist - making sure the partitions are unique 72 | active_preflist(S) -> 73 | ?SUCHTHAT(Xs,list(active_preflist1(S)),lists:sort(Xs)==lists:usort(Xs)). 74 | 75 | initial_state() -> 76 | stopped. 77 | 78 | index(S) -> 79 | oneof(S#qcst.indices). 80 | 81 | initial_state_data() -> 82 | Ring = riak_core_ring:fresh(8, node()), 83 | riak_core_ring_manager:set_ring_global(Ring), 84 | #qcst{started=[], 85 | counters=orddict:new(), 86 | crash_reasons=orddict:new(), 87 | indices=[I || {I,_N} <- riak_core_ring:all_owners(Ring)] 88 | }. 89 | 90 | %% Mark the vnode as started 91 | next_state_data(_From,_To,S=#qcst{started=Started, 92 | counters=Counters, 93 | crash_reasons=CRs},_R, 94 | {call,?MODULE,start_vnode,[Index]}) -> 95 | S#qcst{started=[Index|Started], 96 | counters=orddict:store(Index, 0, Counters), 97 | crash_reasons=orddict:store(Index, undefined, CRs)}; 98 | next_state_data(_From,_To,S=#qcst{counters=Counters, crash_reasons=CRs},_R, 99 | {call,mock_vnode,stop,[{Index,_Node}]}) -> 100 | %% If a node is stopped, reset the counter ready for next 101 | %% time it is called which should start it 102 | S#qcst{counters=orddict:store(Index, 0, Counters), 103 | crash_reasons=orddict:store(Index, undefined, CRs)}; 104 | %% Update the counters for the index if a command that changes them 105 | next_state_data(_From,_To,S=#qcst{counters=Counters},_R, 106 | {call,_Mod,Func,[Preflist]}) 107 | when Func =:= neverreply; Func =:= returnreply; Func =:= latereply -> 108 | S#qcst{counters=lists:foldl(fun({I, _N}, C) -> 109 | orddict:update_counter(I, 1, C) 110 | end, Counters, Preflist)}; 111 | %% Update the counters for the index if a command that changes them 112 | next_state_data(_From,_To,S=#qcst{crash_reasons=CRs},_R, 113 | {call,mock_vnode,crash,[{Index,_Node}]}) -> 114 | S#qcst{crash_reasons=orddict:store(Index, Index, CRs)}; 115 | next_state_data(_From,_To,S,_R,_C) -> 116 | S. 117 | % 118 | 119 | stopped(S) -> 120 | [{running, {call,?MODULE,start_vnode,[index(S)]}}]. 121 | 122 | running(S) -> 123 | [ 124 | {history, {call,?MODULE,start_vnode,[index(S)]}}, 125 | {history, {call,mock_vnode,get_index,[active_preflist1(S)]}}, 126 | {history, {call,mock_vnode,get_counter,[active_preflist1(S)]}}, 127 | {history, {call,mock_vnode,crash,[active_preflist1(S)]}}, 128 | {history, {call,mock_vnode,get_crash_reason,[active_preflist1(S)]}}, 129 | {history, {call,mock_vnode,neverreply,[active_preflist(S)]}}, 130 | {history, {call,?MODULE,returnreply,[active_preflist(S)]}}, 131 | {history, {call,?MODULE,latereply,[active_preflist(S)]}}, 132 | {history, {call,?MODULE,restart_master,[]}}, 133 | {history, {call,mock_vnode,stop,[active_preflist1(S)]}}, 134 | {history, {call,riak_core_vnode_master,all_nodes,[mock_vnode]}} 135 | ]. 136 | 137 | precondition(_From,_To,#qcst{started=Started},{call,?MODULE,start_vnode,[Index]}) -> 138 | not lists:member(Index, Started); 139 | precondition(_From,_To,#qcst{started=Started},{call,_Mod,Func,[Preflist]}) 140 | when Func =:= get_index; Func =:= get_counter; Func =:= neverreply; Func =:= returnreply; 141 | Func =:= latereply; Func =:= crash; Func =:= get_crash_reason -> 142 | preflist_is_active(Preflist, Started); 143 | precondition(_From,_To,_S,_C) -> 144 | true. 145 | 146 | postcondition(_From,_To,_S, 147 | {call,mock_vnode,get_index,[{Index,_Node}]},{ok,ReplyIndex}) -> 148 | Index =:= ReplyIndex; 149 | postcondition(_From,_To,#qcst{crash_reasons=CRs}, 150 | {call,mock_vnode,get_crash_reason,[{Index,_Node}]},{ok, Reason}) -> 151 | %% there is the potential for a race here if get_crash_reason is called 152 | %% before the EXIT signal is sent to the vnode, but it didn't appear 153 | %% even with 1k tests - just a note in case a heisenbug rears its head 154 | %% on some future, less deterministic day. 155 | orddict:fetch(Index, CRs) =:= Reason; 156 | postcondition(_From,_To,#qcst{counters=Counters}, 157 | {call,mock_vnode,get_counter,[{Index,_Node}]},{ok,ReplyCount}) -> 158 | orddict:fetch(Index, Counters) =:= ReplyCount; 159 | postcondition(_From,_To,_S, 160 | {call,_Mod,Func,[]},Result) 161 | when Func =:= neverreply; Func =:= returnreply; Func =:= latereply -> 162 | Result =:= ok; 163 | postcondition(_From,_To,_S, 164 | {call,riak_core_vnode_master,all_nodes,[mock_vnode]},Result) -> 165 | Pids = [Pid || {_,Pid,_,_} <- supervisor:which_children(riak_core_vnode_sup)], 166 | lists:sort(Result) =:= lists:sort(Pids); 167 | postcondition(_From,_To,_S,_C,_R) -> 168 | true. 169 | 170 | %% Pre/post condition helpers 171 | 172 | preflist_is_active({Index,_Node}, Started) -> 173 | lists:member(Index, Started); 174 | preflist_is_active(Preflist, Started) -> 175 | lists:all(fun({Index,_Node}) -> lists:member(Index, Started) end, Preflist). 176 | 177 | 178 | %% Local versions of commands 179 | start_vnode(I) -> 180 | ok = mock_vnode:start_vnode(I). 181 | 182 | returnreply(Preflist) -> 183 | {ok, Ref} = mock_vnode:returnreply(Preflist), 184 | check_receive(length(Preflist), returnreply, Ref). 185 | 186 | latereply(Preflist) -> 187 | {ok, Ref} = mock_vnode:latereply(Preflist), 188 | check_receive(length(Preflist), latereply, Ref). 189 | 190 | 191 | check_receive(0, _Msg, _Ref) -> 192 | ok; 193 | check_receive(Replies, Msg, Ref) -> 194 | receive 195 | {Ref, Msg} -> 196 | check_receive(Replies-1, Msg, Ref); 197 | {Ref, OtherMsg} -> 198 | {error, {bad_msg, Msg, OtherMsg}} 199 | after 200 | 1000 -> 201 | {error, timeout} 202 | end. 203 | 204 | %% Server start/stop infrastructure 205 | 206 | start_servers() -> 207 | stop_servers(), 208 | {ok, _Sup} = riak_core_vnode_sup:start_link(), 209 | {ok, _VMaster} = riak_core_vnode_master:start_link(mock_vnode). 210 | 211 | stop_servers() -> 212 | %% Make sure VMaster is killed before sup as start_vnode is a cast 213 | %% and there may be a pending request to start the vnode. 214 | stop_pid(whereis(mock_vnode_master)), 215 | stop_pid(whereis(riak_core_vnode_sup)). 216 | 217 | restart_master() -> 218 | %% Call get status to make sure the riak_core_vnode_master 219 | %% has processed any commands that were cast to it. Otherwise 220 | %% commands like neverreply are not cast on to the vnode and the 221 | %% counters are not updated correctly. 222 | sys:get_status(mock_vnode_master), 223 | stop_pid(whereis(mock_vnode_master)), 224 | {ok, _VMaster} = riak_core_vnode_master:start_link(mock_vnode). 225 | 226 | stop_pid(undefined) -> 227 | ok; 228 | stop_pid(Pid) -> 229 | unlink(Pid), 230 | exit(Pid, shutdown), 231 | ok = wait_for_pid(Pid). 232 | 233 | wait_for_pid(Pid) -> 234 | Mref = erlang:monitor(process, Pid), 235 | receive 236 | {'DOWN',Mref,process,_,_} -> 237 | ok 238 | after 239 | 5000 -> 240 | {error, didnotexit} 241 | end. 242 | 243 | -endif. 244 | 245 | -------------------------------------------------------------------------------- /src/riak_core_gossip.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc riak_core_gossip takes care of the mechanics of shuttling a from one 24 | %% node to another upon request by other Riak processes. 25 | %% 26 | %% Additionally, it occasionally checks to make sure the current node has its 27 | %% fair share of partitions, and also sends a copy of the ring to some other 28 | %% random node, ensuring that all nodes eventually synchronize on the same 29 | %% understanding of the Riak cluster. This interval is configurable, but 30 | %% defaults to once per minute. 31 | 32 | -module(riak_core_gossip). 33 | 34 | -behaviour(gen_server). 35 | 36 | -export([start_link/0, stop/0]). 37 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 38 | terminate/2, code_change/3]). 39 | -export ([distribute_ring/1, send_ring/1, send_ring/2, remove_from_cluster/1]). 40 | 41 | -ifdef(TEST). 42 | -include_lib("eunit/include/eunit.hrl"). 43 | -endif. 44 | 45 | %% =================================================================== 46 | %% Public API 47 | %% =================================================================== 48 | 49 | %% distribute_ring/1 - 50 | %% Distribute a ring to all members of that ring. 51 | distribute_ring(Ring) -> 52 | gen_server:cast({?MODULE, node()}, {distribute_ring, Ring}). 53 | 54 | %% send_ring/1 - 55 | %% Send the current node's ring to some other node. 56 | send_ring(ToNode) -> send_ring(node(), ToNode). 57 | 58 | %% send_ring/2 - 59 | %% Send the ring from one node to another node. 60 | %% Does nothing if the two nodes are the same. 61 | send_ring(Node, Node) -> 62 | ok; 63 | send_ring(FromNode, ToNode) -> 64 | gen_server:cast({?MODULE, FromNode}, {send_ring_to, ToNode}). 65 | 66 | start_link() -> 67 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 68 | 69 | stop() -> 70 | gen_server:cast(?MODULE, stop). 71 | 72 | 73 | %% =================================================================== 74 | %% gen_server behaviour 75 | %% =================================================================== 76 | 77 | %% @private 78 | init(_State) -> 79 | schedule_next_gossip(), 80 | {ok, true}. 81 | 82 | 83 | %% @private 84 | handle_call(_, _From, State) -> 85 | {reply, ok, State}. 86 | 87 | 88 | %% @private 89 | handle_cast({send_ring_to, Node}, RingChanged) -> 90 | {ok, MyRing} = riak_core_ring_manager:get_my_ring(), 91 | gen_server:cast({?MODULE, Node}, {reconcile_ring, MyRing}), 92 | {noreply, RingChanged}; 93 | 94 | handle_cast({distribute_ring, Ring}, RingChanged) -> 95 | Nodes = riak_core_ring:all_members(Ring), 96 | gen_server:abcast(Nodes, ?MODULE, {reconcile_ring, Ring}), 97 | {noreply, RingChanged}; 98 | 99 | handle_cast({reconcile_ring, OtherRing}, RingChanged) -> 100 | % Compare the two rings, see if there is anything that 101 | % must be done to make them equal... 102 | {ok, MyRing} = riak_core_ring_manager:get_my_ring(), 103 | case riak_core_ring:reconcile(OtherRing, MyRing) of 104 | {no_change, _} -> 105 | {noreply, RingChanged}; 106 | 107 | {new_ring, ReconciledRing} -> 108 | % Rebalance the new ring and save it 109 | BalancedRing = claim_until_balanced(ReconciledRing), 110 | riak_core_ring_manager:set_my_ring(BalancedRing), 111 | 112 | % Finally, push it out to another node - expect at least two nodes now 113 | RandomNode = riak_core_ring:random_other_node(BalancedRing), 114 | send_ring(node(), RandomNode), 115 | {noreply, true} 116 | end; 117 | 118 | handle_cast(gossip_ring, _RingChanged) -> 119 | % First, schedule the next round of gossip... 120 | schedule_next_gossip(), 121 | 122 | % Gossip the ring to some random other node... 123 | {ok, MyRing} = riak_core_ring_manager:get_my_ring(), 124 | case riak_core_ring:random_other_node(MyRing) of 125 | no_node -> % must be single node cluster 126 | ok; 127 | RandomNode -> 128 | send_ring(node(), RandomNode) 129 | end, 130 | {noreply, false}; 131 | 132 | handle_cast(_, State) -> 133 | {noreply, State}. 134 | 135 | %% @private 136 | handle_info(_Info, State) -> {noreply, State}. 137 | 138 | %% @private 139 | terminate(_Reason, _State) -> 140 | ok. 141 | 142 | %% @private 143 | code_change(_OldVsn, State, _Extra) -> 144 | {ok, State}. 145 | 146 | 147 | %% =================================================================== 148 | %% Internal functions 149 | %% =================================================================== 150 | 151 | schedule_next_gossip() -> 152 | MaxInterval = app_helper:get_env(riak_core, gossip_interval), 153 | Interval = random:uniform(MaxInterval), 154 | timer:apply_after(Interval, gen_server, cast, [?MODULE, gossip_ring]). 155 | 156 | claim_until_balanced(Ring) -> 157 | {WMod, WFun} = app_helper:get_env(riak_core, wants_claim_fun), 158 | NeedsIndexes = apply(WMod, WFun, [Ring]), 159 | case NeedsIndexes of 160 | no -> 161 | Ring; 162 | {yes, _NumToClaim} -> 163 | {CMod, CFun} = app_helper:get_env(riak_core, choose_claim_fun), 164 | NewRing = CMod:CFun(Ring), 165 | claim_until_balanced(NewRing) 166 | end. 167 | 168 | 169 | remove_from_cluster(ExitingNode) -> 170 | % Set the remote node to stop claiming. 171 | % Ignore return of rpc as this should succeed even if node is offline 172 | rpc:call(ExitingNode, application, set_env, 173 | [riak_core, wants_claim_fun, {riak_core_claim, never_wants_claim}]), 174 | 175 | % Get a list of indices owned by the ExitingNode... 176 | {ok, Ring} = riak_core_ring_manager:get_my_ring(), 177 | AllOwners = riak_core_ring:all_owners(Ring), 178 | 179 | % Transfer indexes to other nodes... 180 | ExitRing = 181 | case attempt_simple_transfer(Ring, AllOwners, ExitingNode) of 182 | {ok, NR} -> 183 | NR; 184 | target_n_fail -> 185 | %% re-diagonalize 186 | %% first hand off all claims to *any* one else, 187 | %% just so rebalance doesn't include exiting node 188 | Members = riak_core_ring:all_members(Ring), 189 | Other = hd(lists:delete(ExitingNode, Members)), 190 | TempRing = lists:foldl( 191 | fun({I,N}, R) when N == ExitingNode -> 192 | riak_core_ring:transfer_node(I, Other, R); 193 | (_, R) -> R 194 | end, 195 | Ring, 196 | AllOwners), 197 | riak_core_claim:claim_rebalance_n(TempRing, Other) 198 | end, 199 | 200 | % Send the new ring to all nodes except the exiting node 201 | distribute_ring(ExitRing), 202 | 203 | % Set the new ring on the exiting node. This will trigger 204 | % it to begin handoff and cleanly leave the cluster. 205 | rpc:call(ExitingNode, riak_core_ring_manager, set_my_ring, [ExitRing]). 206 | 207 | 208 | attempt_simple_transfer(Ring, Owners, ExitingNode) -> 209 | TargetN = app_helper:get_env(riak_core, target_n_val), 210 | attempt_simple_transfer(Ring, Owners, 211 | TargetN, 212 | ExitingNode, 0, 213 | [{O,-TargetN} || O <- riak_core_ring:all_members(Ring), 214 | O /= ExitingNode]). 215 | attempt_simple_transfer(Ring, [{P, Exit}|Rest], TargetN, Exit, Idx, Last) -> 216 | %% handoff 217 | case [ N || {N, I} <- Last, Idx-I >= TargetN ] of 218 | [] -> 219 | target_n_fail; 220 | Candidates -> 221 | %% these nodes don't violate target_n in the reverse direction 222 | StepsToNext = fun(Node) -> 223 | length(lists:takewhile( 224 | fun({_, Owner}) -> Node /= Owner end, 225 | Rest)) 226 | end, 227 | case lists:filter(fun(N) -> 228 | Next = StepsToNext(N), 229 | (Next+1 >= TargetN) 230 | orelse (Next == length(Rest)) 231 | end, 232 | Candidates) of 233 | [] -> 234 | target_n_fail; 235 | Qualifiers -> 236 | %% these nodes don't violate target_n forward 237 | Chosen = lists:nth(random:uniform(length(Qualifiers)), 238 | Qualifiers), 239 | %% choose one, and do the rest of the ring 240 | attempt_simple_transfer( 241 | riak_core_ring:transfer_node(P, Chosen, Ring), 242 | Rest, TargetN, Exit, Idx+1, 243 | lists:keyreplace(Chosen, 1, Last, {Chosen, Idx})) 244 | end 245 | end; 246 | attempt_simple_transfer(Ring, [{_, N}|Rest], TargetN, Exit, Idx, Last) -> 247 | %% just keep track of seeing this node 248 | attempt_simple_transfer(Ring, Rest, TargetN, Exit, Idx+1, 249 | lists:keyreplace(N, 1, Last, {N, Idx})); 250 | attempt_simple_transfer(Ring, [], _, _, _, _) -> 251 | {ok, Ring}. 252 | -------------------------------------------------------------------------------- /src/vclock.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks. 24 | %% 25 | %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events 26 | %% in a distributed system". Communications of the ACM 21 (7): 558-565. 27 | %% 28 | %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of 29 | %% Distributed Systems". Workshop on Parallel and Distributed Algorithms: 30 | %% pp. 215-226 31 | 32 | -module(vclock). 33 | 34 | -author('Justin Sheehy '). 35 | -author('Andy Gross '). 36 | 37 | -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2, 38 | increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]). 39 | 40 | -ifdef(TEST). 41 | -include_lib("eunit/include/eunit.hrl"). 42 | -endif. 43 | 44 | -export_type([vclock/0, timestamp/0, vclock_node/0]). 45 | 46 | -opaque vclock() :: [vc_entry()]. 47 | % The timestamp is present but not used, in case a client wishes to inspect it. 48 | -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}. 49 | 50 | % Nodes can have any term() as a name, but they must differ from each other. 51 | -type vclock_node() :: term(). 52 | -type counter() :: integer(). 53 | -type timestamp() :: integer(). 54 | 55 | % @doc Create a brand new vclock. 56 | -spec fresh() -> vclock(). 57 | fresh() -> 58 | []. 59 | 60 | % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant! 61 | -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean(). 62 | descends(_, []) -> 63 | % all vclocks descend from the empty vclock 64 | true; 65 | descends(Va, Vb) -> 66 | [{NodeB, {CtrB, _T}}|RestB] = Vb, 67 | CtrA = 68 | case proplists:get_value(NodeB, Va) of 69 | undefined -> 70 | false; 71 | {CA, _TSA} -> CA 72 | end, 73 | case CtrA of 74 | false -> false; 75 | _ -> 76 | if 77 | CtrA < CtrB -> 78 | false; 79 | true -> 80 | descends(Va,RestB) 81 | end 82 | end. 83 | 84 | % @doc Combine all VClocks in the input list into their least possible 85 | % common descendant. 86 | -spec merge(VClocks :: [vclock()]) -> vclock() | []. 87 | merge([]) -> []; 88 | merge([SingleVclock]) -> SingleVclock; 89 | merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)). 90 | 91 | merge([], NClock) -> NClock; 92 | merge([AClock|VClocks],NClock) -> 93 | merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])). 94 | 95 | merge([], [], AccClock) -> lists:reverse(AccClock); 96 | merge([], [Left|Rest], AccClock) -> merge([], Rest, [Left|AccClock]); 97 | merge(Left, [], AccClock) -> merge([], Left, AccClock); 98 | merge(V=[{Node1,{Ctr1,TS1}}|VClock], 99 | N=[{Node2,{Ctr2,TS2}}|NClock], AccClock) -> 100 | if Node1 < Node2 -> 101 | merge(VClock, N, [{Node1,{Ctr1,TS1}}|AccClock]); 102 | Node1 > Node2 -> 103 | merge(V, NClock, [{Node2,{Ctr2,TS2}}|AccClock]); 104 | true -> 105 | ({_Ctr,_TS} = C1) = if Ctr1 > Ctr2 -> {Ctr1,TS1}; 106 | true -> {Ctr2,TS2} 107 | end, 108 | merge(VClock, NClock, [{Node1,C1}|AccClock]) 109 | end. 110 | 111 | % @doc Get the counter value in VClock set from Node. 112 | -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter() | undefined. 113 | get_counter(Node, VClock) -> 114 | case proplists:get_value(Node, VClock) of 115 | {Ctr, _TS} -> Ctr; 116 | undefined -> undefined 117 | end. 118 | 119 | % @doc Get the timestamp value in a VClock set from Node. 120 | -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined. 121 | get_timestamp(Node, VClock) -> 122 | case proplists:get_value(Node, VClock) of 123 | {_Ctr, TS} -> TS; 124 | undefined -> undefined 125 | end. 126 | 127 | % @doc Increment VClock at Node. 128 | -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock(). 129 | increment(Node, VClock) -> 130 | increment(Node, timestamp(), VClock). 131 | 132 | % @doc Increment VClock at Node. 133 | -spec increment(Node :: vclock_node(), IncTs :: timestamp(), 134 | VClock :: vclock()) -> vclock(). 135 | increment(Node, IncTs, VClock) -> 136 | {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of 137 | false -> 138 | {{1, IncTs}, VClock}; 139 | {value, {_N, {C, _T}}, ModV} -> 140 | {{C + 1, IncTs}, ModV} 141 | end, 142 | [{Node,C1}|NewV]. 143 | 144 | 145 | % @doc Return the list of all nodes that have ever incremented VClock. 146 | -spec all_nodes(VClock :: vclock()) -> [vclock_node()]. 147 | all_nodes(VClock) -> 148 | [X || {X,{_,_}} <- VClock]. 149 | 150 | % @doc Return a timestamp for a vector clock 151 | -spec timestamp() -> timestamp(). 152 | timestamp() -> 153 | calendar:datetime_to_gregorian_seconds(erlang:universaltime()). 154 | 155 | % @doc Compares two VClocks for equality. 156 | % Not very fast. 157 | -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean(). 158 | equal(VA,VB) -> 159 | VSet1 = sets:from_list(VA), 160 | VSet2 = sets:from_list(VB), 161 | case sets:size(sets:subtract(VSet1,VSet2)) > 0 of 162 | true -> false; 163 | false -> 164 | case sets:size(sets:subtract(VSet2,VSet1)) > 0 of 165 | true -> false; 166 | false -> true 167 | end 168 | end. 169 | 170 | % @doc Possibly shrink the size of a vclock, depending on current age and size. 171 | -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock(). 172 | prune(V,Now,BucketProps) -> 173 | %% This sort need to be deterministic, to avoid spurious merge conflicts later. 174 | %% We achieve this by using the node ID as secondary key. 175 | SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V), 176 | prune_vclock1(SortV,Now,BucketProps). 177 | % @private 178 | prune_vclock1(V,Now,BProps) -> 179 | case length(V) =< proplists:get_value(small_vclock,BProps) of 180 | true -> V; 181 | false -> 182 | {_,{_,HeadTime}} = hd(V), 183 | case (Now - HeadTime) < proplists:get_value(young_vclock,BProps) of 184 | true -> V; 185 | false -> prune_vclock1(V,Now,BProps,HeadTime) 186 | end 187 | end. 188 | % @private 189 | prune_vclock1(V,Now,BProps,HeadTime) -> 190 | % has a precondition that V is longer than small and older than young 191 | case length(V) > proplists:get_value(big_vclock,BProps) of 192 | true -> prune_vclock1(tl(V),Now,BProps); 193 | false -> 194 | case (Now - HeadTime) > proplists:get_value(old_vclock,BProps) of 195 | true -> prune_vclock1(tl(V),Now,BProps); 196 | false -> V 197 | end 198 | end. 199 | 200 | %% =================================================================== 201 | %% EUnit tests 202 | %% =================================================================== 203 | -ifdef(TEST). 204 | 205 | % @doc Serves as both a trivial test and some example code. 206 | example_test() -> 207 | A = vclock:fresh(), 208 | B = vclock:fresh(), 209 | A1 = vclock:increment(a, A), 210 | B1 = vclock:increment(b, B), 211 | true = vclock:descends(A1,A), 212 | true = vclock:descends(B1,B), 213 | false = vclock:descends(A1,B1), 214 | A2 = vclock:increment(a, A1), 215 | C = vclock:merge([A2, B1]), 216 | C1 = vclock:increment(c, C), 217 | true = vclock:descends(C1, A2), 218 | true = vclock:descends(C1, B1), 219 | false = vclock:descends(B1, C1), 220 | false = vclock:descends(B1, A1), 221 | ok. 222 | 223 | prune_small_test() -> 224 | % vclock with less entries than small_vclock will be untouched 225 | Now = riak_core_util:moment(), 226 | OldTime = Now - 32000000, 227 | SmallVC = [{<<"1">>, {1, OldTime}}, 228 | {<<"2">>, {2, OldTime}}, 229 | {<<"3">>, {3, OldTime}}], 230 | Props = [{small_vclock,4}], 231 | ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))). 232 | 233 | prune_young_test() -> 234 | % vclock with all entries younger than young_vclock will be untouched 235 | Now = riak_core_util:moment(), 236 | NewTime = Now - 1, 237 | VC = [{<<"1">>, {1, NewTime}}, 238 | {<<"2">>, {2, NewTime}}, 239 | {<<"3">>, {3, NewTime}}], 240 | Props = [{small_vclock,1},{young_vclock,1000}], 241 | ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))). 242 | 243 | prune_big_test() -> 244 | % vclock not preserved by small or young will be pruned down to 245 | % no larger than big_vclock entries 246 | Now = riak_core_util:moment(), 247 | NewTime = Now - 1000, 248 | VC = [{<<"1">>, {1, NewTime}}, 249 | {<<"2">>, {2, NewTime}}, 250 | {<<"3">>, {3, NewTime}}], 251 | Props = [{small_vclock,1},{young_vclock,1}, 252 | {big_vclock,2},{old_vclock,100000}], 253 | ?assert(length(prune(VC, Now, Props)) =:= 2). 254 | 255 | prune_old_test() -> 256 | % vclock not preserved by small or young will be pruned down to 257 | % no larger than big_vclock and no entries more than old_vclock ago 258 | Now = riak_core_util:moment(), 259 | NewTime = Now - 1000, 260 | OldTime = Now - 100000, 261 | VC = [{<<"1">>, {1, NewTime}}, 262 | {<<"2">>, {2, OldTime}}, 263 | {<<"3">>, {3, OldTime}}], 264 | Props = [{small_vclock,1},{young_vclock,1}, 265 | {big_vclock,2},{old_vclock,10000}], 266 | ?assert(length(prune(VC, Now, Props)) =:= 1). 267 | 268 | prune_order_test() -> 269 | % vclock with two nodes of the same timestamp will be pruned down 270 | % to the same node 271 | Now = riak_core_util:moment(), 272 | OldTime = Now - 100000, 273 | VC1 = [{<<"1">>, {1, OldTime}}, 274 | {<<"2">>, {2, OldTime}}], 275 | VC2 = lists:reverse(VC1), 276 | Props = [{small_vclock,1},{young_vclock,1}, 277 | {big_vclock,2},{old_vclock,10000}], 278 | ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)). 279 | 280 | accessor_test() -> 281 | VC = [{<<"1">>, {1, 1}}, 282 | {<<"2">>, {2, 2}}], 283 | ?assertEqual(1, get_counter(<<"1">>, VC)), 284 | ?assertEqual(1, get_timestamp(<<"1">>, VC)), 285 | ?assertEqual(2, get_counter(<<"2">>, VC)), 286 | ?assertEqual(2, get_timestamp(<<"2">>, VC)), 287 | ?assertEqual(undefined, get_counter(<<"3">>, VC)), 288 | ?assertEqual(undefined, get_timestamp(<<"3">>, VC)), 289 | ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)). 290 | 291 | merge_test() -> 292 | VC1 = [{<<"1">>, {1, 1}}, 293 | {<<"2">>, {2, 2}}, 294 | {<<"4">>, {4, 4}}], 295 | VC2 = [{<<"3">>, {3, 3}}, 296 | {<<"4">>, {3, 3}}], 297 | ?assertEqual([], merge(vclock:fresh())), 298 | ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}], 299 | merge([VC1, VC2])). 300 | 301 | -endif. 302 | -------------------------------------------------------------------------------- /src/riak_core_ring_manager.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc the local view of the cluster's ring configuration 24 | 25 | -module(riak_core_ring_manager). 26 | -include_lib("eunit/include/eunit.hrl"). 27 | -define(RING_KEY, riak_ring). 28 | -behaviour(gen_server2). 29 | 30 | -export([start_link/0, 31 | start_link/1, 32 | get_my_ring/0, 33 | refresh_my_ring/0, 34 | set_my_ring/1, 35 | write_ringfile/0, 36 | prune_ringfiles/0, 37 | read_ringfile/1, 38 | find_latest_ringfile/0, 39 | do_write_ringfile/1, 40 | ring_trans/2, 41 | stop/0]). 42 | 43 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 44 | terminate/2, code_change/3]). 45 | 46 | -ifdef(TEST). 47 | -export([set_ring_global/1]). 48 | -include_lib("eunit/include/eunit.hrl"). 49 | -endif. 50 | 51 | %% =================================================================== 52 | %% Public API 53 | %% =================================================================== 54 | 55 | start_link() -> 56 | gen_server2:start_link({local, ?MODULE}, ?MODULE, [live], []). 57 | 58 | 59 | %% Testing entry point 60 | start_link(test) -> 61 | gen_server2:start_link({local, ?MODULE}, ?MODULE, [test], []). 62 | 63 | 64 | %% @spec get_my_ring() -> {ok, riak_core_ring:riak_core_ring()} | {error, Reason} 65 | get_my_ring() -> 66 | case mochiglobal:get(?RING_KEY) of 67 | Ring when is_tuple(Ring) -> {ok, Ring}; 68 | undefined -> {error, no_ring} 69 | end. 70 | 71 | %% @spec refresh_my_ring() -> ok 72 | refresh_my_ring() -> 73 | gen_server2:call(?MODULE, refresh_my_ring, infinity). 74 | 75 | %% @spec set_my_ring(riak_core_ring:riak_core_ring()) -> ok 76 | set_my_ring(Ring) -> 77 | gen_server2:call(?MODULE, {set_my_ring, Ring}, infinity). 78 | 79 | 80 | %% @spec write_ringfile() -> ok 81 | write_ringfile() -> 82 | gen_server2:cast(?MODULE, write_ringfile). 83 | 84 | ring_trans(Fun, Args) -> 85 | gen_server2:call(?MODULE, {ring_trans, Fun, Args}, infinity). 86 | 87 | 88 | do_write_ringfile(Ring) -> 89 | {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), 90 | TS = io_lib:format(".~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", 91 | [Year, Month, Day, Hour, Minute, Second]), 92 | case app_helper:get_env(riak_core, ring_state_dir) of 93 | "" -> nop; 94 | Dir -> 95 | Cluster = app_helper:get_env(riak_core, cluster_name), 96 | FN = Dir ++ "/riak_core_ring." ++ Cluster ++ TS, 97 | ok = filelib:ensure_dir(FN), 98 | ok = file:write_file(FN, term_to_binary(Ring)) 99 | end. 100 | 101 | %% @spec find_latest_ringfile() -> string() 102 | find_latest_ringfile() -> 103 | Dir = app_helper:get_env(riak_core, ring_state_dir), 104 | case file:list_dir(Dir) of 105 | {ok, Filenames} -> 106 | Cluster = app_helper:get_env(riak_core, cluster_name), 107 | Timestamps = [list_to_integer(TS) || {"riak_core_ring", C1, TS} <- 108 | [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames], 109 | C1 =:= Cluster], 110 | SortedTimestamps = lists:reverse(lists:sort(Timestamps)), 111 | case SortedTimestamps of 112 | [Latest | _] -> 113 | {ok, Dir ++ "/riak_core_ring." ++ Cluster ++ "." ++ integer_to_list(Latest)}; 114 | _ -> 115 | {error, not_found} 116 | end; 117 | {error, Reason} -> 118 | {error, Reason} 119 | end. 120 | 121 | %% @spec read_ringfile(string()) -> riak_core_ring:riak_core_ring() 122 | read_ringfile(RingFile) -> 123 | {ok, Binary} = file:read_file(RingFile), 124 | binary_to_term(Binary). 125 | 126 | %% @spec prune_ringfiles() -> ok 127 | prune_ringfiles() -> 128 | case app_helper:get_env(riak_core, ring_state_dir) of 129 | "" -> ok; 130 | Dir -> 131 | Cluster = app_helper:get_env(riak_core, cluster_name), 132 | case file:list_dir(Dir) of 133 | {error,enoent} -> ok; 134 | {ok, []} -> ok; 135 | {ok, Filenames} -> 136 | Timestamps = [TS || {"riak_core_ring", C1, TS} <- 137 | [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames], 138 | C1 =:= Cluster], 139 | if Timestamps /= [] -> 140 | %% there are existing ring files 141 | TSPat = [io_lib:fread("~4d~2d~2d~2d~2d~2d",TS) || 142 | TS <- Timestamps], 143 | TSL = lists:reverse(lists:sort([TS || 144 | {ok,TS,[]} <- TSPat])), 145 | Keep = prune_list(TSL), 146 | KeepTSs = [lists:flatten( 147 | io_lib:format( 148 | "~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B",K)) 149 | || K <- Keep], 150 | DelFNs = [Dir ++ "/" ++ FN || FN <- Filenames, 151 | lists:all(fun(TS) -> 152 | string:str(FN,TS)=:=0 153 | end, KeepTSs)], 154 | [file:delete(DelFN) || DelFN <- DelFNs], 155 | ok; 156 | true -> 157 | %% directory wasn't empty, but there are no ring 158 | %% files in it 159 | ok 160 | end 161 | end 162 | end. 163 | 164 | 165 | %% @private (only used for test instances) 166 | stop() -> 167 | gen_server2:cast(?MODULE, stop). 168 | 169 | 170 | %% =================================================================== 171 | %% gen_server callbacks 172 | %% =================================================================== 173 | 174 | init([Mode]) -> 175 | case Mode of 176 | live -> 177 | Ring = riak_core_ring:fresh(); 178 | test -> 179 | Ring = riak_core_ring:fresh(16,node()) 180 | end, 181 | 182 | %% Set the ring and send initial notification to local observers that 183 | %% ring has changed. 184 | %% Do *not* save the ring to disk here. On startup we deliberately come 185 | %% up with a ring where the local node owns all partitions so that any 186 | %% fallback vnodes will be started so they can hand off. 187 | set_ring_global(Ring), 188 | riak_core_ring_events:ring_update(Ring), 189 | {ok, Mode}. 190 | 191 | 192 | handle_call({set_my_ring, Ring}, _From, State) -> 193 | prune_write_notify_ring(Ring), 194 | {reply,ok,State}; 195 | handle_call(refresh_my_ring, _From, State) -> 196 | %% This node is leaving the cluster so create a fresh ring file 197 | FreshRing = riak_core_ring:fresh(), 198 | set_ring_global(FreshRing), 199 | %% Make sure the fresh ring gets written before stopping 200 | do_write_ringfile(FreshRing), 201 | 202 | %% Handoff is complete and fresh ring is written 203 | %% so we can safely stop now. 204 | riak_core:stop("node removal completed, exiting."), 205 | 206 | {reply,ok,State}; 207 | handle_call({ring_trans, Fun, Args}, _From, State) -> 208 | {ok, Ring} = get_my_ring(), 209 | case catch Fun(Ring, Args) of 210 | {new_ring, NewRing} -> 211 | prune_write_notify_ring(NewRing), 212 | case riak_core_ring:random_other_node(NewRing) of 213 | no_node -> 214 | ignore; 215 | Node -> 216 | riak_core_gossip:send_ring(Node) 217 | end, 218 | {reply, {ok, NewRing}, State}; 219 | ignore -> 220 | {reply, not_changed, State}; 221 | Other -> 222 | error_logger:error_msg("ring_trans: invalid return value: ~p~n", 223 | [Other]), 224 | {reply, not_changed, State} 225 | end. 226 | handle_cast(stop, State) -> 227 | {stop,normal,State}; 228 | 229 | handle_cast(write_ringfile, test) -> 230 | {noreply,test}; 231 | 232 | handle_cast(write_ringfile, State) -> 233 | {ok, Ring} = get_my_ring(), 234 | do_write_ringfile(Ring), 235 | {noreply,State}. 236 | 237 | 238 | handle_info(_Info, State) -> 239 | {noreply, State}. 240 | 241 | 242 | %% @private 243 | terminate(_Reason, _State) -> 244 | ok. 245 | 246 | 247 | %% @private 248 | code_change(_OldVsn, State, _Extra) -> 249 | {ok, State}. 250 | 251 | 252 | %% =================================================================== 253 | %% Internal functions 254 | %% =================================================================== 255 | 256 | prune_list([X|Rest]) -> 257 | lists:usort(lists:append([[X],back(1,X,Rest),back(2,X,Rest), 258 | back(3,X,Rest),back(4,X,Rest),back(5,X,Rest)])). 259 | back(_N,_X,[]) -> []; 260 | back(N,X,[H|T]) -> 261 | case lists:nth(N,X) =:= lists:nth(N,H) of 262 | true -> back(N,X,T); 263 | false -> [H] 264 | end. 265 | 266 | %% Set the ring in mochiglobal. Exported during unit testing 267 | %% to make test setup simpler - no need to spin up a riak_core_ring_manager 268 | %% process. 269 | set_ring_global(Ring) -> 270 | mochiglobal:put(?RING_KEY, Ring). 271 | 272 | %% Persist a new ring file, set the global value and notify any listeners 273 | prune_write_notify_ring(Ring) -> 274 | riak_core_ring_manager:prune_ringfiles(), 275 | do_write_ringfile(Ring), 276 | set_ring_global(Ring), 277 | riak_core_ring_events:ring_update(Ring). 278 | 279 | 280 | %% =================================================================== 281 | %% Unit tests 282 | %% =================================================================== 283 | -ifdef(TEST). 284 | 285 | back_test() -> 286 | X = [1,2,3], 287 | List1 = [[1,2,3],[4,2,3], [7,8,3], [11,12,13], [1,2,3]], 288 | List2 = [[7,8,9], [1,2,3]], 289 | List3 = [[1,2,3]], 290 | ?assertEqual([[4,2,3]], back(1, X, List1)), 291 | ?assertEqual([[7,8,9]], back(1, X, List2)), 292 | ?assertEqual([], back(1, X, List3)), 293 | ?assertEqual([[7,8,3]], back(2, X, List1)), 294 | ?assertEqual([[11,12,13]], back(3, X, List1)). 295 | 296 | prune_list_test() -> 297 | TSList1 = [[2011,2,28,16,32,16],[2011,2,28,16,32,36],[2011,2,28,16,30,27],[2011,2,28,16,32,16],[2011,2,28,16,32,36]], 298 | TSList2 = [[2011,2,28,16,32,36],[2011,2,28,16,31,16],[2011,2,28,16,30,27],[2011,2,28,16,32,16],[2011,2,28,16,32,36]], 299 | PrunedList1 = [[2011,2,28,16,30,27],[2011,2,28,16,32,16]], 300 | PrunedList2 = [[2011,2,28,16,31,16],[2011,2,28,16,32,36]], 301 | ?assertEqual(PrunedList1, prune_list(TSList1)), 302 | ?assertEqual(PrunedList2, prune_list(TSList2)). 303 | 304 | set_ring_global_test() -> 305 | application:set_env(riak_core,ring_creation_size, 4), 306 | Ring = riak_core_ring:fresh(), 307 | set_ring_global(Ring), 308 | ?assertEqual(Ring, mochiglobal:get(?RING_KEY)). 309 | 310 | set_my_ring_test() -> 311 | application:set_env(riak_core,ring_creation_size, 4), 312 | Ring = riak_core_ring:fresh(), 313 | set_ring_global(Ring), 314 | ?assertEqual({ok, Ring}, get_my_ring()). 315 | 316 | refresh_my_ring_test() -> 317 | application:set_env(riak_core, ring_creation_size, 4), 318 | application:set_env(riak_core, ring_state_dir, "/tmp"), 319 | application:set_env(riak_core, cluster_name, "test"), 320 | riak_core_ring_events:start_link(), 321 | riak_core_ring_manager:start_link(test), 322 | riak_core_vnode_sup:start_link(), 323 | riak_core_vnode_master:start_link(riak_core_vnode), 324 | riak_core_test_util:setup_mockring1(), 325 | ?assertEqual(ok, riak_core_ring_manager:refresh_my_ring()), 326 | riak_core_ring_manager:stop(), 327 | %% Cleanup the ring file created for this test 328 | {ok, RingFile} = find_latest_ringfile(), 329 | file:delete(RingFile). 330 | 331 | -endif. 332 | 333 | --------------------------------------------------------------------------------