├── .github └── workflows │ └── test.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── include └── lbm_kv.hrl ├── rebar.config ├── rebar.lock ├── src ├── lbm_kv.app.src ├── lbm_kv.erl ├── lbm_kv_merge.erl ├── lbm_kv_mon.erl └── lbm_kv_vclock.erl └── test ├── lbm_kv_dist_tests.erl └── lbm_kv_tests.erl /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: push 2 | 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | name: OTP ${{matrix.otp_release}} 7 | strategy: 8 | matrix: 9 | otp_release: 10 | - 22.2 11 | - 21.3 12 | - 20.3 13 | - 19.3.6.8 14 | steps: 15 | - uses: actions/checkout@v2.0.0 16 | - uses: gleam-lang/setup-erlang@v1.1.2 17 | with: 18 | otp-version: ${{matrix.otp_release}} 19 | - run: rebar3 as ci do eunit 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | .rebar 3 | _build 4 | deps 5 | doc 6 | ebin 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | script: 3 | - rebar3 as ci eunit 4 | otp_release: 5 | - 20.1 6 | - 18.3 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | This license applies to all files in this project except for 'lbm_kv_vclock.erl'. 4 | 5 | Copyright (c) 2014 Lindenbaum GmbH 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | The Apache License, Version 2.0 26 | 27 | This license applies to 'lbm_kv_vclock.erl' solely. 28 | 29 | Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 30 | 31 | This file is provided to you under the Apache License, Version 2.0 (the 32 | "License"); you may not use this file except in compliance with the License. 33 | You may obtain a copy of the License at 34 | 35 | http://www.apache.org/licenses/LICENSE-2.0 36 | 37 | Unless required by applicable law or agreed to in writing, software distributed 38 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 39 | CONDITIONS OF ANY KIND, either express or implied. See the License for the 40 | specific language governing permissions and limitations under the License. 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/lindenbaum/lbm_kv/workflows/.github/workflows/test.yml/badge.svg?branch=master)](https://github.com/lindenbaum/lbm_kv/actions) 2 | [![Old Build Status (TRAVIS)](https://travis-ci.org/lindenbaum/lbm_kv.png?branch=master)](https://travis-ci.org/lindenbaum/lbm_kv) 3 | 4 | lbm_kv 5 | ====== 6 | 7 | A dynamically-distributed, highly-available, partition-tolerant, in-memory 8 | key-value store built with [Mnesia](http://www.erlang.org/doc/apps/mnesia/). 9 | 10 | One of the main goals of this application is to enable developers to enjoy the 11 | pleasures of distributed Mnesia without the need of exploring the complex 12 | background. Therefore, `lbm_kv` provides a primitive API along with code to 13 | handle and work around the dirty details and pitfals related to distributed 14 | Mnesia. 15 | 16 | Why use it? 17 | ----------- 18 | 19 | Mnesia is a powerful DBMS with support for table replication, transactions, 20 | netsplit detection and much more. _So why use something on top of it?_ 21 | Unfortunately, as with other powerful DBMSs its use is quite complex and making 22 | a Mnesia cluster dynamic requires a lot of research and the use of 23 | _undocumented_ features. `lbm_kv` is here to release you from this pain. 24 | 25 | What does lbm_kv offer? 26 | ----------------------- 27 | 28 | * Mnesia replication management in dynamic Erlang clusters 29 | * automated table merges and netsplit recovery based on 30 | [vector clocks](https://en.wikipedia.org/wiki/Vector_clock) and user-provided 31 | callbacks 32 | * a primitive and (hopefully) intuitive API 33 | * small, documented, fully-typed code-base 34 | * no additional/transitive dependencies introduced 35 | 36 | How does it work? 37 | ----------------- 38 | 39 | `lbm_kv` is a simple Erlang application that gets dropped into your release. It 40 | is not necessary to know the cluster topology in advance, since `lbm_kv` can 41 | handle dynamic clusters. It listens for new node connections and replicates all 42 | its tables to the new nodes. When connected nodes go down, `lbm_kv` 43 | automatically shrinks the Mnesia cluster to the remaining nodes preserving the 44 | writability to its tables. The user decides when and what tables to create, no 45 | internal tables are created behind the scenes. 46 | 47 | `lbm_kv` is able to merge tables automatically (based on lamport/vector clocks). 48 | This is needed when a netsplit gets resolved or when the same table gets created 49 | on several nodes independently (not a special case for `lbm_kv`). If `lbm_kv` 50 | cannot merge two table entries itself, it will look for a user-defined callback 51 | to help with the merge. This `handle_conflict/3` callback is specified in the 52 | `lbm_kv` behaviour and needs to reside in a module with the same name as the 53 | table to merge values for, e.g. if your table is called `my_table` the callback 54 | to implement would be `my_table:handle_conflict/3`. 55 | 56 | If no appropriate callback is found or the callback throws an exception during 57 | the conflict resolution, `lbm_kv` will deterministically __restart__ one of the 58 | offending nodes using `init:restart/0` (the restarted node will be the one that 59 | tried to perform the merge). 60 | 61 | Please note that a merge cannot delete values (except for the case when the 62 | user callback gets involved). This means that if a mapping gets deleted during 63 | a netsplit, the mapping might get re-established when the netsplit gets 64 | resolved. 65 | 66 | Examples 67 | -------- 68 | 69 | A very simple example application/release can be found 70 | [here](https://github.com/lindenbaum/sequencer_sample). 71 | -------------------------------------------------------------------------------- /include/lbm_kv.hrl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%%============================================================================= 20 | 21 | -ifndef(lbm_kv_hrl_). 22 | -define(lbm_kv_hrl_, 1). 23 | 24 | %% The record defining an `lbm_kv' table entry. 25 | -record(lbm_kv, { 26 | key :: lbm_kv:key() | '_', 27 | val :: lbm_kv:value() | '_', 28 | ver :: lbm_kv:version() | '_'}). 29 | 30 | %% A special define using a `hidden' mnesia feature to set the `cookie' of a 31 | %% table (at creation time only). This is needed to be able to merge schemas 32 | %% of nodes. That created the same table independently (while not yet 33 | %% connected). Please note that this bypasses a mnesia-builtin security 34 | %% mechanism that classifies tables with the same name and different cookie as 35 | %% incompatible by default. If two nodes have at least one table with the same 36 | %% name and differing cookie a schema merge and thus a mnesia-connection between 37 | %% these nodes will be refused by mnesia. 38 | -define(LBM_KV_COOKIE, {{0,0,0}, lbm_kv}). 39 | 40 | %% The options used in `mnesia:create_table/2'. 41 | -define(LBM_KV_TABLE_OPTS, [{record_name, lbm_kv}, 42 | {attributes, record_info(fields, lbm_kv)}, 43 | {cookie, ?LBM_KV_COOKIE}, 44 | {ram_copies, [node() | nodes()]}]). 45 | 46 | %% Default timeout for RPC calls. 47 | -define(LBM_KV_RPC_TIMEOUT, 2000). 48 | 49 | %% Simple debug macro. 50 | -ifdef(DEBUG). 51 | -define(LBM_KV_DBG(Fmt, Args), io:format(Fmt, Args)). 52 | -else. 53 | -define(LBM_KV_DBG(Fmt, Args), begin _ = Fmt, _ = Args, ok end). 54 | -endif. 55 | 56 | -endif. %% lbm_kv_hrl_ 57 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% Copyright (c) 2012-2018 Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%%============================================================================= 20 | 21 | {profiles, 22 | [ 23 | {test, 24 | [ 25 | {deps, [{proper, "1.2.0"}]}, 26 | {proper_opts, [long_result, verbose]} 27 | ]}, 28 | {ci, 29 | [ 30 | {erl_opts, [debug_info, {d, 'DEBUG'}, {d, 'EXCLUDE_FLAKY', true}]}, 31 | {deps, [{proper, "1.2.0"}]}, 32 | {proper_opts, [long_result, verbose]} 33 | ]} 34 | ]}. 35 | 36 | {plugins, 37 | [ 38 | rebar3_proper 39 | ]}. 40 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /src/lbm_kv.app.src: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014-2016, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%%============================================================================= 20 | {application, 21 | lbm_kv, 22 | [ 23 | {description, "A simple, distributed, Mnesia-based key-value-store."}, 24 | {vsn, "0.0.3"}, 25 | {registered, 26 | [ 27 | lbm_kv_mon 28 | ]}, 29 | {applications, 30 | [ 31 | kernel, 32 | stdlib, 33 | sasl, 34 | mnesia 35 | ]}, 36 | {mod, {lbm_kv, []}}, 37 | {env, []}, 38 | {maintainers, ["Tobias Schlager", "Sven Heyll", "Timo Koepke"]}, 39 | {licenses, ["MIT"]}, 40 | {links, [{"Github", "https://github.com/lindenbaum/lbm_kv"}]} 41 | ]}. 42 | -------------------------------------------------------------------------------- /src/lbm_kv.erl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%% 20 | %%% @doc 21 | %%% Provides simple, Mnesia-based, distributed key value tables. When started, 22 | %%% this application distributes Mnesia (and all `lbm_kv' tables) over all 23 | %%% dynamically connected nodes. The Mnesia cluster can grow and shrink 24 | %%% dynamically. 25 | %%% 26 | %%% All tables created, have key/value semantic (after all its still Mnesia). 27 | %%% A new key-value-table can be created using {@link create/1}. The table will 28 | %%% automatically be replicated to other nodes as new node connections are 29 | %%% detected. Every connected node has read and write access to all Mnesia 30 | %%% tables. If desired, it is possible to use the default Mnesia API to 31 | %%% manipulate `lbm_kv' tables. However, `lbm_kv' uses vector clocks that need 32 | %%% to be updated on every write to be able to use automatic netsplit recovery! 33 | %%% Use the `#lbm_kv{}' record from the `lbm_kv.hrl' header file to match 34 | %%% `lbm_kv' table entries. 35 | %%% 36 | %%% Every `lbm_kv' table uses vector clocks to keep track of the its entries. 37 | %%% In case of new node connections or netsplits, `lbm_kv' will use these to 38 | %%% merge the island itself without interaction. However, if there are diverged 39 | %%% entries `lbm_kv' will look for a user defined callback to resolve the 40 | %%% conflict. If no such callback can be found one of the conflicting nodes will 41 | %%% be restarted! 42 | %%% 43 | %%% To be able to use `lbm_kv' none of the connected nodes is allowed to have 44 | %%% `disk_copies' of its `schema' table, because Mnesia will fail to merge 45 | %%% schemas on disk nodes (which means that it is likely they can't 46 | %%% participate). If you need `disk_copies', you're on your own here. Do not 47 | %%% mess with table replication and mnesia configuration changes yourself! 48 | %%% There's a lot of black magic happening inside Mnesia and `lbm_kv' will do 49 | %%% the necessary tricks and workarounds for you. At best you should avoid 50 | %%% having tables created from outside `lbm_kv'. At least do not create tables 51 | %%% with conflicting names. 52 | %%% @end 53 | %%%============================================================================= 54 | 55 | -module(lbm_kv). 56 | 57 | -behaviour(application). 58 | -behaviour(supervisor). 59 | 60 | %% API 61 | -export([create/1, 62 | put/3, 63 | put/2, 64 | del/2, 65 | get/2, 66 | get/3, 67 | match_key/2, 68 | match_key/3, 69 | match/3, 70 | match/4, 71 | update/2, 72 | update/3, 73 | info/0]). 74 | 75 | %% Application callbacks 76 | -export([start/2, stop/1]). 77 | 78 | %% supervisor callbacks 79 | -export([init/1]). 80 | 81 | -type table() :: atom(). 82 | -type key() :: term(). 83 | %% Unfortunately, Mnesia is quite picky when it comes to allowed types for 84 | %% keys, e.g. all special atoms of `match_specs' are not allowed and lead to 85 | %% undefined behaviour when used. 86 | -type value() :: term(). 87 | %% Unfortunately, Mnesia is quite picky when it comes to allowed types for 88 | %% values, e.g. all special atoms of `match_specs' are not allowed and lead to 89 | %% undefined behaviour when used. 90 | -type version() :: lbm_kv_vclock:vclock(). 91 | %% A type describing the version of a table entry. 92 | 93 | -type update_fun() :: fun((key(), {value, value()} | undefined) -> 94 | {value, value()} | term()). 95 | %% The definition for a function passed to {@link update/2,3}. If there is no 96 | %% mapping associated with a key the atom `undefined' is passed to the function, 97 | %% otherwise the value will be provided as the tuple `{value, Value}'. To add 98 | %% a not yet existing or change an existing mapping the function must return a 99 | %% tuple of the similar form. Any other return value will delete the mapping 100 | %% (if any). 101 | 102 | -export_type([table/0, key/0, value/0, version/0, update_fun/0]). 103 | 104 | -include("lbm_kv.hrl"). 105 | 106 | %%%============================================================================= 107 | %%% Behaviour 108 | %%%============================================================================= 109 | 110 | -callback handle_conflict(key(), Local :: value(), Remote :: value()) -> 111 | {value, value()} | delete | term(). 112 | %% An optional callback that will be called on the node performing a table 113 | %% merge (usually an arbitrary node) whenever an entry of table cannot be 114 | %% merged automatically. The callback must be implemented in a module with the 115 | %% same name as the respective table name, e.g. to handle conflicts for values 116 | %% in the table `my_table' the module/function `my_table:handle_conflict/3' has 117 | %% to be implemented. 118 | %% 119 | %% The function can resolve conflicts in several ways. It can provide a (new) 120 | %% value for `Key' by returning `{value, Val}', it can delete all associations 121 | %% for `Key' on all nodes by returning `delete' or it can ignore the 122 | %% inconsistency by returning anything else or crash. When ignoring an 123 | %% inconsistency the values for key will depend on the location of retrieval 124 | %% until a new value gets written for `Key'. 125 | %% 126 | %% If an appropriate callback is not provided, the default conflict resolution 127 | %% strategy is to __restart__ one of the conflicting node islands! 128 | 129 | %%%============================================================================= 130 | %%% API 131 | %%%============================================================================= 132 | 133 | %%------------------------------------------------------------------------------ 134 | %% @doc 135 | %% Create a new key value table which will be replicated as RAM copy across all 136 | %% nodes in the cluster. The table will only be created, if not yet existing. 137 | %% This can be called multiple times (even) on the same node. 138 | %% 139 | %% The table will be ready for reads and writes when this function returns. 140 | %% @end 141 | %%------------------------------------------------------------------------------ 142 | -spec create(table()) -> ok | {error, term()}. 143 | create(Table) -> 144 | case mnesia:create_table(Table, ?LBM_KV_TABLE_OPTS) of 145 | {atomic, ok} -> 146 | await_table(Table); 147 | {aborted, {already_exists, Table}} -> 148 | await_table(Table); 149 | {aborted, Reason} -> 150 | {error, Reason} 151 | end. 152 | 153 | %%------------------------------------------------------------------------------ 154 | %% @doc 155 | %% Puts a key value pair into a table returning the previous mappings. The 156 | %% previous mapping will be overridden if existed. 157 | %% @end 158 | %%------------------------------------------------------------------------------ 159 | -spec put(table(), key(), value()) -> 160 | {ok, [{key(), value()}]} | {error, term()}. 161 | put(Table, Key, Value) -> ?MODULE:put(Table, [{Key, Value}]). 162 | 163 | %%------------------------------------------------------------------------------ 164 | %% @doc 165 | %% Puts multiple key value pairs into a table returning the previous mappings. 166 | %% All previous mappings will be overridden. 167 | %% @end 168 | %%------------------------------------------------------------------------------ 169 | -spec put(table(), [{key(), value()}]) -> 170 | {ok, [{key(), value()}]} | {error, term()}. 171 | put(_Table, []) -> 172 | {ok, []}; 173 | put(Table, KeyValues) when is_list(KeyValues) -> 174 | do(fun() -> strip_l(r_and_w(Table, KeyValues)) end). 175 | 176 | %%------------------------------------------------------------------------------ 177 | %% @doc 178 | %% Deletes all values for the given key or list of keys from a table. Previous 179 | %% values for the keys will be returned. 180 | %% @end 181 | %%------------------------------------------------------------------------------ 182 | -spec del(table(), key() | [key()]) -> 183 | {ok, [{key(), value()}]} | {error, term()}. 184 | del(_Table, []) -> {ok, []}; 185 | del(Table, KeyOrKeys) -> do(fun() -> strip_l(r_and_d(Table, KeyOrKeys)) end). 186 | 187 | %%------------------------------------------------------------------------------ 188 | %% @doc 189 | %% Similar to {@link get/3} with `Type' set to transaction. 190 | %% @end 191 | %%------------------------------------------------------------------------------ 192 | -spec get(table(), key()) -> {ok, [{key(), value()}]} | {error, term()}. 193 | get(Table, Key) -> get(Table, Key, transaction). 194 | 195 | %%------------------------------------------------------------------------------ 196 | %% @doc 197 | %% Retrieves the entry for the given key from a table. Specifying `dirty' will 198 | %% issue a faster dirty read operation (no isolation/atomicity). 199 | %% @end 200 | %%------------------------------------------------------------------------------ 201 | -spec get(table(), key(), dirty | transaction) -> 202 | {ok, [{key(), value()}]} | {error, term()}. 203 | get(Table, Key, dirty) -> dirty_r(Table, Key); 204 | get(Table, Key, transaction) -> do(fun() -> strip_l(r(Table, Key, read)) end). 205 | 206 | %%------------------------------------------------------------------------------ 207 | %% @doc 208 | %% Similar to {@link match_key/3} with `Type' set to transaction. 209 | %% @end 210 | %%------------------------------------------------------------------------------ 211 | -spec match_key(table(), key()) -> {ok, [{key(), value()}]} | {error, term()}. 212 | match_key(Table, KeySpec) -> match_key(Table, KeySpec, transaction). 213 | 214 | %%------------------------------------------------------------------------------ 215 | %% @doc 216 | %% Retrieves the entries that match the given key spec from a table. Specifying 217 | %% `dirty' will issue a faster dirty select operation (no isolation/atomicity). 218 | %% @end 219 | %%------------------------------------------------------------------------------ 220 | -spec match_key(table(), key(), dirty | transaction) -> 221 | {ok, [{key(), value()}]} | {error, term()}. 222 | match_key(Table, KeySpec, Type) -> match(Table, KeySpec, '_', Type). 223 | 224 | %%------------------------------------------------------------------------------ 225 | %% @doc 226 | %% Similar to {@link match/4} with `Type' set to transaction. 227 | %% @end 228 | %%------------------------------------------------------------------------------ 229 | -spec match(table(), key(), value()) -> 230 | {ok, [{key(), value()}]} | {error, term()}. 231 | match(Table, KeySpec, ValueSpec) -> 232 | match(Table, KeySpec, ValueSpec, transaction). 233 | 234 | %%------------------------------------------------------------------------------ 235 | %% @doc 236 | %% Retrieves the entries that match the given key and value spec from a table. 237 | %% Specifying `dirty' will issue a faster dirty select operation (no 238 | %% isolation/atomicity). 239 | %% @end 240 | %%------------------------------------------------------------------------------ 241 | -spec match(table(), key(), value(), dirty | transaction) -> 242 | {ok, [{key(), value()}]} | {error, term()}. 243 | match(Table, KeySpec, ValueSpec, dirty) -> 244 | dirty_m(Table, KeySpec, ValueSpec); 245 | match(Table, KeySpec, ValueSpec, transaction) -> 246 | do(fun() -> strip_l(m(Table, KeySpec, ValueSpec, read)) end). 247 | 248 | %%------------------------------------------------------------------------------ 249 | %% @doc 250 | %% Updates all mappings of a table. This function can be used to modify or 251 | %% delete random mappings. 252 | %% 253 | %% `Fun' will be invoked consecutively for all table entries and will be invoked 254 | %% exactly once per contained key. To modify a mapping simply return 255 | %% `{value, NewVal}', to preserve the current mapping just return a tuple with 256 | %% the old value. All other return values will cause the current mapping to be 257 | %% deleted. {@link update/2} returns a list with all previous mappings. 258 | %% @end 259 | %%------------------------------------------------------------------------------ 260 | -spec update(table(), update_fun()) -> 261 | {ok, [{key(), value()}], [{key(), value()}]} | 262 | {error, term()}. 263 | update(Table, Fun) when is_function(Fun) -> 264 | do(fun() -> strip_t(u(Table, Fun)) end). 265 | 266 | %%------------------------------------------------------------------------------ 267 | %% @doc 268 | %% Updates the mapping associated with `Key'. This function can be used to 269 | %% modify, add or delete a mapping to `Key'. 270 | %% 271 | %% `Fun' will be called regardless whether a mapping currently exists or not. 272 | %% The argument passed to the function is either `{value, Value}' or `undefined' 273 | %% if there is no mapping at the moment. 274 | %% 275 | %% To add or change a mapping return `{value, NewValue}'. Returning this with 276 | %% the old value will simply preserve the mapping. Returning anything else will 277 | %% remove the mapping from the table. {@link update/3} returns a list with the 278 | %% previous mappings as well as a list of current mappings for `Key'. The form 279 | %% is `{ok, {OldMappings, NewMappings}}' 280 | %% @end 281 | %%------------------------------------------------------------------------------ 282 | -spec update(table(), key(), update_fun()) -> 283 | {ok, {[{key(), value()}], [{key(), value()}]}} | 284 | {error, term()}. 285 | update(Table, Key, Fun) when is_function(Fun) -> 286 | do(fun() -> strip_t(u(Table, Fun, Key)) end). 287 | 288 | %%------------------------------------------------------------------------------ 289 | %% @doc 290 | %% Print information about the `lbm_kv' state to stdout. 291 | %% @end 292 | %%------------------------------------------------------------------------------ 293 | -spec info() -> ok. 294 | info() -> lbm_kv_mon:info(). 295 | 296 | %%%============================================================================= 297 | %%% Application callbacks 298 | %%%============================================================================= 299 | 300 | %%------------------------------------------------------------------------------ 301 | %% @private 302 | %%------------------------------------------------------------------------------ 303 | start(_StartType, _StartArgs) -> supervisor:start_link(?MODULE, []). 304 | 305 | %%------------------------------------------------------------------------------ 306 | %% @private 307 | %%------------------------------------------------------------------------------ 308 | stop(_State) -> ok. 309 | 310 | %%%============================================================================= 311 | %%% supervisor callbacks 312 | %%%============================================================================= 313 | 314 | %%------------------------------------------------------------------------------ 315 | %% @private 316 | %%------------------------------------------------------------------------------ 317 | init([]) -> {ok, {{one_for_one, 5, 1}, [spec(lbm_kv_mon, [])]}}. 318 | 319 | %%%============================================================================= 320 | %%% internal functions 321 | %%%============================================================================= 322 | 323 | %%------------------------------------------------------------------------------ 324 | %% @private 325 | %%------------------------------------------------------------------------------ 326 | spec(M, As) -> {M, {M, start_link, As}, permanent, 1000, worker, [M]}. 327 | 328 | %%------------------------------------------------------------------------------ 329 | %% @private 330 | %% Blocks the calling process until a certain table is available to this node. 331 | %%------------------------------------------------------------------------------ 332 | await_table(Table) -> 333 | Timeout = application:get_env(?MODULE, wait_timeout, 10000), 334 | case mnesia:wait_for_tables([Table], Timeout) of 335 | ok -> 336 | ok; 337 | {timeout, [Table]} -> 338 | {error, timeout}; 339 | Error -> 340 | Error 341 | end. 342 | 343 | %%------------------------------------------------------------------------------ 344 | %% @private 345 | %% Spawns `Fun' in a mnesia transaction. If aborted, a transaction may be 346 | %% retried depending on the values set in the application environment. A 347 | %% transaction abort might take place when replica nodes go down/up. In this 348 | %% case retries could be successful because of the automatic replica management. 349 | %% Retries are off by default. 350 | %%------------------------------------------------------------------------------ 351 | do(Fun) -> 352 | do(Fun, 353 | application:get_env(?MODULE, retry_timeout, 500), 354 | application:get_env(?MODULE, retries, 0)). 355 | do(Fun, Timeout, Retries) -> 356 | case mnesia:transaction(Fun) of 357 | {atomic, Result} -> 358 | {ok, Result}; 359 | {aborted, _} when Retries > 0 -> 360 | ok = timer:sleep(Timeout), 361 | do(Fun, Timeout, Retries - 1); 362 | {aborted, Reason} -> 363 | {error, Reason} 364 | end. 365 | 366 | %%------------------------------------------------------------------------------ 367 | %% @private 368 | %% Get every entry associated with `Key' from `Tab' in a dirty manner, no 369 | %% transaction required. 370 | %%------------------------------------------------------------------------------ 371 | -spec dirty_r(table(), key()) -> [{key(), value()}] | {error, term()}. 372 | dirty_r(Tab, Key) -> dirty(Tab, Key, dirty_read). 373 | 374 | %%------------------------------------------------------------------------------ 375 | %% @private 376 | %% Select every entry matching the given key and value spec from `Tab' in a 377 | %% dirty manner, no transaction required. 378 | %%------------------------------------------------------------------------------ 379 | -spec dirty_m(table(), key(), value()) -> [{key(), value()}] | {error, term()}. 380 | dirty_m(Tab, KeySpec, ValueSpec) -> 381 | dirty(Tab, m_spec(KeySpec, ValueSpec), dirty_select). 382 | 383 | %%------------------------------------------------------------------------------ 384 | %% @private 385 | %%------------------------------------------------------------------------------ 386 | -spec dirty(table(), key(), dirty_read | dirty_select) -> 387 | [{key(), value()}] | {error, term()}. 388 | dirty(Tab, Key, Function) -> 389 | try mnesia:Function(Tab, Key) of 390 | Records -> {ok, strip_l(Records)} 391 | catch 392 | exit:{aborted, Reason} -> {error, Reason} 393 | end. 394 | 395 | %%------------------------------------------------------------------------------ 396 | %% @private 397 | %% Read `Key' from `Tab', only allowed within transaction context. 398 | %%------------------------------------------------------------------------------ 399 | -spec r(table(), key(), read | write) -> [#lbm_kv{}]. 400 | r(Tab, Key, Lock) -> mnesia:read(Tab, Key, Lock). 401 | 402 | %%------------------------------------------------------------------------------ 403 | %% @private 404 | %% Read every entry matching the given key and value specs from `Tab', only 405 | %% allowed within transaction 406 | %% context. 407 | %%------------------------------------------------------------------------------ 408 | -spec m(table(), key(), value(), read | write) -> [#lbm_kv{}]. 409 | m(Tab, KeySpec, ValueSpec, Lock) -> 410 | mnesia:select(Tab, m_spec(KeySpec, ValueSpec), Lock). 411 | 412 | %%------------------------------------------------------------------------------ 413 | %% @private 414 | %%------------------------------------------------------------------------------ 415 | -spec m_spec(key(), value()) -> [tuple()]. 416 | m_spec(KeySpec, ValueSpec) -> 417 | [{#lbm_kv{key = KeySpec, val = ValueSpec, _ = '_'}, [], ['$_']}]. 418 | 419 | %%------------------------------------------------------------------------------ 420 | %% @private 421 | %% Establish mapping `Key' to `Val'in `Tab', only allowed within transaction 422 | %% context. 423 | %%------------------------------------------------------------------------------ 424 | -spec w(table(), key(), value(), version()) -> ok. 425 | w(Tab, Key, Val, Ver) -> 426 | NewVer = lbm_kv_vclock:increment(node(), Ver), 427 | mnesia:write(Tab, #lbm_kv{key = Key, val = Val, ver = NewVer}, write). 428 | 429 | %%------------------------------------------------------------------------------ 430 | %% @private 431 | %% Delete mapping `Key' to `Val' from `Tab' (if any), only allowed within 432 | %% transaction context. 433 | %%------------------------------------------------------------------------------ 434 | -spec d(table(), key(), read | write) -> ok. 435 | d(Tab, Key, Lock) -> mnesia:delete(Tab, Key, Lock). 436 | 437 | %%------------------------------------------------------------------------------ 438 | %% @private 439 | %% Read all mappings for `Key' or `Keys' in `Tab', delete them and return the 440 | %% previous mappings. Only allowed within transaction context. 441 | %%------------------------------------------------------------------------------ 442 | -spec r_and_d(table(), key() | [key()]) -> [#lbm_kv{}]. 443 | r_and_d(Tab, Keys) when is_list(Keys) -> 444 | lists:append([r_and_d(Tab, Key) || Key <- Keys]); 445 | r_and_d(Tab, Key) -> 446 | Records = r(Tab, Key, write), 447 | ok = d(Tab, Key, write), 448 | Records. 449 | 450 | %%------------------------------------------------------------------------------ 451 | %% @private 452 | %% Similar to {@link r_and_w/3} but operates on an input list. 453 | %%------------------------------------------------------------------------------ 454 | -spec r_and_w(table(), [{key(), value()}]) -> [#lbm_kv{}]. 455 | r_and_w(Tab, KeyValues) -> 456 | lists:append([r_and_w(Tab, Key, Val) || {Key, Val} <- KeyValues]). 457 | 458 | %%------------------------------------------------------------------------------ 459 | %% @private 460 | %% Read all mappings for `Key' in `Tab', establish a new mapping from `Key' to 461 | %% `Val' and return the previous mappings. Only allowed within transaction 462 | %% context. 463 | %%------------------------------------------------------------------------------ 464 | -spec r_and_w(table(), key(), value()) -> [#lbm_kv{}]. 465 | r_and_w(Tab, Key, Val) -> 466 | case r(Tab, Key, write) of 467 | Records = [#lbm_kv{key = Key, val = Val} | _] -> 468 | ok; %% no change, no write 469 | Records = [#lbm_kv{key = Key, ver = Ver} | _] -> 470 | ok = w(Tab, Key, Val, Ver); 471 | Records = [] -> 472 | ok = w(Tab, Key, Val, lbm_kv_vclock:fresh()) 473 | end, 474 | Records. 475 | 476 | %%------------------------------------------------------------------------------ 477 | %% @private 478 | %% Update the mappings for all `Key's in a table. Only allowed within 479 | %% transaction context. 480 | %%------------------------------------------------------------------------------ 481 | -spec u(table(), update_fun()) -> {[#lbm_kv{}], [#lbm_kv{}]}. 482 | u(Tab, Fun) -> 483 | {A, B} = lists:unzip([u(Tab, Fun, Key) || Key <- mnesia:all_keys(Tab)]), 484 | {lists:append(A), lists:append(B)}. 485 | 486 | %%------------------------------------------------------------------------------ 487 | %% @private 488 | %% Update the mapping for `Key'. Only allowed within transaction context. 489 | %%------------------------------------------------------------------------------ 490 | -spec u(table(), update_fun(), key()) -> {[#lbm_kv{}], [#lbm_kv{}]}. 491 | u(Tab, Fun, Key) -> 492 | case r(Tab, Key, write) of 493 | Old = [#lbm_kv{key = Key, val = Val, ver = Ver} | _] -> 494 | case Fun(Key, {value, Val}) of 495 | {value, Val} -> 496 | ok; %% no change, no write 497 | {value, NewVal} -> 498 | ok = d(Tab, Key, write), 499 | ok = w(Tab, Key, NewVal, Ver); 500 | _ -> 501 | ok = d(Tab, Key, write) 502 | end; 503 | Old = [] -> 504 | case Fun(Key, undefined) of 505 | {value, Val} -> 506 | ok = w(Tab, Key, Val, lbm_kv_vclock:fresh()); 507 | _ -> 508 | ok %% delete on non-existent entry 509 | end 510 | end, 511 | {Old, r(Tab, Key, read)}. 512 | 513 | %%------------------------------------------------------------------------------ 514 | %% @private 515 | %%------------------------------------------------------------------------------ 516 | -spec strip_l([#lbm_kv{}]) -> [{key(), value()}]. 517 | strip_l(Records) -> [{K, V} || #lbm_kv{key = K, val = V} <- Records]. 518 | 519 | %%------------------------------------------------------------------------------ 520 | %% @private 521 | %%------------------------------------------------------------------------------ 522 | -spec strip_t({[#lbm_kv{}], [#lbm_kv{}]}) -> 523 | {[{key(), value()}], [{key(), value()}]}. 524 | strip_t({A, B}) -> {strip_l(A), strip_l(B)}. 525 | -------------------------------------------------------------------------------- /src/lbm_kv_merge.erl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%% 20 | %%% @doc 21 | %%% This module implements the `lbm_kv' table merge strategy. Currently this 22 | %%% strategy is based on vector clocks provided in {@link lbm_kv_vclock}. If 23 | %%% the algorithm encounters diverged entries for a specific key, it tries to 24 | %%% call a user defined callback for the respective table. As last resort one 25 | %%% of the nodes with conflicting tables will be restarted. 26 | %%% 27 | %%% For more information about user defined callbacks, refer to the {@lbm_kv} 28 | %%% behaviour description. 29 | %%% 30 | %%% This code is inspired by the work put in the `unsplit' project by Ulf Wiger, 31 | %%% the man deserves some credit! 32 | %%% 33 | %%% @see https://github.com/uwiger/unsplit 34 | %%% @end 35 | %%%============================================================================= 36 | 37 | -module(lbm_kv_merge). 38 | 39 | %% Internal API 40 | -export([tables/2]). 41 | 42 | %% Remoting API 43 | -export([handle_actions/1]). 44 | 45 | -include("lbm_kv.hrl"). 46 | 47 | %%%============================================================================= 48 | %%% Internal API 49 | %%%============================================================================= 50 | 51 | %%------------------------------------------------------------------------------ 52 | %% @private 53 | %% This function runs inside the {@link mnesia_schema:merge_schema/1} 54 | %% transaction locking all tables to merge. However, since the merged schema 55 | %% must first be committed to be able to make ACID compliant writes, all table 56 | %% merge actions must be dirty opertations. 57 | %% 58 | %% It is sufficient to merge from an arbitrary node from the passed island. The 59 | %% other island should already be consistent. Although dirty, merge actions will 60 | %% be replicated to the other nodes of the island. 61 | %%------------------------------------------------------------------------------ 62 | -spec tables([lbm_kv:table()], [node()]) -> ok | {error, term()}. 63 | tables(_Tables, []) -> 64 | ok; 65 | tables(Tables, [Node | _]) -> 66 | ?LBM_KV_DBG("Merging with ~s:~n", [Node]), 67 | tables(Tables, Node, ok). 68 | tables([Table | Tables], Node, ok) -> 69 | ?LBM_KV_DBG(" * ~w~n", [Table]), 70 | tables(Tables, Node, merge_table(Node, Table)); 71 | tables(_, _, Result) -> 72 | Result. 73 | 74 | %%------------------------------------------------------------------------------ 75 | %% @private 76 | %% This is an internal remoting API function that handles remote merge actions. 77 | %%------------------------------------------------------------------------------ 78 | -spec handle_actions([{atom(), [term()]}]) -> ok. 79 | handle_actions(Actions) -> lists:foreach(fun handle_action/1, Actions). 80 | 81 | %%%============================================================================= 82 | %%% Internal functions 83 | %%%============================================================================= 84 | 85 | %%------------------------------------------------------------------------------ 86 | %% @private 87 | %% Merges the values found in `Table' from the local and `Remote' node. 88 | %%------------------------------------------------------------------------------ 89 | merge_table(Remote, Table) -> 90 | Keys = get_all_keys([node(), Remote], Table), 91 | case merge_entries(Keys, node(), Remote, Table, {[], []}) of 92 | {ok, {LocalActions, RemoteActions}} -> 93 | case rpc_merge(Remote, RemoteActions) of 94 | ok -> handle_actions(LocalActions); 95 | Error -> Error 96 | end; 97 | Error -> 98 | Error 99 | end. 100 | 101 | %%------------------------------------------------------------------------------ 102 | %% @private 103 | %% Returns the local and remote merge actions for a table. 104 | %%------------------------------------------------------------------------------ 105 | merge_entries([], _, _, _, Acc) -> 106 | {ok, Acc}; 107 | merge_entries([Key | Keys], Local, Remote, Table, Acc = {LAcc, RAcc}) -> 108 | case merge_entry(Local, Remote, Table, Key) of 109 | {all, Action} -> 110 | ?LBM_KV_DBG(" - ~w => {all,~w}~n", [Key, Action]), 111 | NewAcc = {[Action | LAcc], [Action | RAcc]}, 112 | merge_entries(Keys, Local, Remote, Table, NewAcc); 113 | {local, Action} -> 114 | ?LBM_KV_DBG(" - ~w => {local,~w}~n", [Key, Action]), 115 | NewAcc = {[Action | LAcc], RAcc}, 116 | merge_entries(Keys, Local, Remote, Table, NewAcc); 117 | {remote, Action} -> 118 | ?LBM_KV_DBG(" - ~w => {remote,~w}~n", [Key, Action]), 119 | NewAcc = {LAcc, [Action | RAcc]}, 120 | merge_entries(Keys, Local, Remote, Table, NewAcc); 121 | noop -> 122 | ?LBM_KV_DBG(" - ~w => noop~n", [Key]), 123 | merge_entries(Keys, Local, Remote, Table, Acc); 124 | Error = {error, _} -> 125 | Error 126 | end. 127 | 128 | %%------------------------------------------------------------------------------ 129 | %% @private 130 | %% Return the merge action for `Key' in `Table'. All dirty mnesia operations 131 | %% are allowed as merge actions. The returned action must be of the form 132 | %% `noop' or `{all | local | remote, {DirtyMnesiaFunction, FunctionArgs}}'. 133 | %%------------------------------------------------------------------------------ 134 | merge_entry(Local, Remote, Table, Key) -> 135 | case {get_records(Local, Table, Key), get_records(Remote, Table, Key)} of 136 | {Records, Records} -> 137 | noop; 138 | {[Record], []} -> 139 | {remote, {dirty_write, [Table, Record]}}; 140 | {[], [Record]} -> 141 | {local, {dirty_write, [Table, Record]}}; 142 | {[#lbm_kv{val = V}], [Record = #lbm_kv{val = V}]} -> 143 | {local, {dirty_write, [Table, Record]}}; 144 | {[L = #lbm_kv{ver = LVer}], [R = #lbm_kv{ver = RVer}]} -> 145 | case lbm_kv_vclock:descends(LVer, RVer) of 146 | true -> 147 | {remote, {dirty_write, [Table, L]}}; 148 | false -> 149 | case lbm_kv_vclock:descends(RVer, LVer) of 150 | true -> {local, {dirty_write, [Table, R]}}; 151 | false -> user_callback(Table, Key, L, R) 152 | end 153 | end; 154 | {[LRecord], [RRecord]} -> %% merging non-lbm_kv table 155 | user_callback(Table, Key, LRecord, RRecord); 156 | {{error, Reason}, _} -> 157 | {error, {Local, Reason}}; 158 | {_, {error, Reason}} -> 159 | {error, {Remote, Reason}} 160 | end. 161 | 162 | %%------------------------------------------------------------------------------ 163 | %% @private 164 | %% Call a user provided function to handle a conflicting entry. This can happen 165 | %% on an arbitrary node (the one that connects the nodes and merges the 166 | %% schemas). 167 | %% 168 | %% For more information refer to the {@lbm_kv} behaviour description. 169 | %% 170 | %% Why is this function written as it is (no pattern matching on #lbm_kv{})? 171 | %% This hidden feature could (in the future) be used to call the user-provided 172 | %% callback to merge non-lbm_kv tables ;) 173 | %%------------------------------------------------------------------------------ 174 | user_callback(Table, Key, LRecord, RRecord) when is_atom(Table) -> 175 | case code:ensure_loaded(Table) of 176 | {module, Table} -> 177 | case erlang:function_exported(Table, handle_conflict, 3) of 178 | true -> 179 | LVal = get_value(LRecord), 180 | RVal = get_value(RRecord), 181 | try {Table:handle_conflict(Key, LVal, RVal), LRecord} of 182 | {{value, LVal}, _} -> 183 | {remote, {dirty_write, [Table, LRecord]}}; 184 | {{value, RVal}, _} -> 185 | {local, {dirty_write, [Table, RRecord]}}; 186 | {{value, Val}, #lbm_kv{ver = OldVer}} -> 187 | Ver = lbm_kv_vclock:increment(node(), OldVer), 188 | Record = #lbm_kv{key = Key, val = Val, ver = Ver}, 189 | {all, {dirty_write, [Table, Record]}}; 190 | {{value, Record}, _} -> 191 | {all, {dirty_write, [Table, Record]}}; 192 | {delete, _} -> 193 | {all, {dirty_delete, [Table, Key]}}; 194 | _ -> 195 | noop 196 | catch Class:Exception -> 197 | error_logger:error_msg( 198 | "~w:handle_conflict/3 raised ~w on key ~w: ~w", 199 | [Table, Class, Key, Exception]), 200 | {error, {diverged, Table, Key}} 201 | end; 202 | false -> 203 | {error, {diverged, Table, Key}} 204 | end; 205 | _ -> 206 | {error, {diverged, Table, Key}} 207 | end; 208 | user_callback(Table, Key, _, _) -> 209 | {error, {diverged, Table, Key}}. 210 | 211 | %%------------------------------------------------------------------------------ 212 | %% @private 213 | %%------------------------------------------------------------------------------ 214 | get_value(#lbm_kv{val = Val}) -> Val; 215 | get_value(Val) -> Val. 216 | 217 | %%------------------------------------------------------------------------------ 218 | %% @private 219 | %% Returns the record for `Key' on `Node'. 220 | %%------------------------------------------------------------------------------ 221 | get_records(Node, Table, Key) -> rpc_mnesia(Node, dirty_read, [Table, Key]). 222 | 223 | %%------------------------------------------------------------------------------ 224 | %% @private 225 | %% Return the list of keys of `Table' on `Nodes'. 226 | %%------------------------------------------------------------------------------ 227 | get_all_keys(Nodes, Table) -> 228 | lists:usort([K || N <- Nodes, K <- rpc_mnesia(N, dirty_all_keys, [Table])]). 229 | 230 | %%------------------------------------------------------------------------------ 231 | %% @private 232 | %% Make an RPC call to the mnesia module on node `Node'. The `rpc' module knows 233 | %% when a call is local and optimizes that. 234 | %%------------------------------------------------------------------------------ 235 | rpc_mnesia(Node, Function, Args) -> 236 | Timeout = application:get_env(lbm_kv, rpc_timeout, ?LBM_KV_RPC_TIMEOUT), 237 | check_rpc(rpc:call(Node, mnesia, Function, Args, Timeout)). 238 | 239 | %%------------------------------------------------------------------------------ 240 | %% @private 241 | %% Make subsequent RPC calls to this module on `Node' handing over merge 242 | %% actions in batches of a configurable size. This is done to limit the size 243 | %% of terms sent over the distributed erlang connection. Not doing so might 244 | %% result in connection loss due to inter-node heartbeats timing out. 245 | %%------------------------------------------------------------------------------ 246 | rpc_merge(Node, Actions) -> 247 | BatchSize = application:get_env(lbm_kv, batch_size, 10), 248 | Timeout = application:get_env(lbm_kv, rpc_timeout, ?LBM_KV_RPC_TIMEOUT), 249 | rpc_merge(Node, Actions, BatchSize, Timeout + BatchSize * 200). 250 | rpc_merge(_Node, [], _BatchSize, _Timeout) -> 251 | ok; 252 | rpc_merge(Node, Actions, BatchSize, Timeout) -> 253 | {Current, Remaining} = split(BatchSize, Actions), 254 | case rpc:call(Node, ?MODULE, handle_actions, [Current], Timeout) of 255 | ok -> rpc_merge(Node, Remaining, BatchSize, Timeout); 256 | Result -> check_rpc(Result) 257 | end. 258 | 259 | %%------------------------------------------------------------------------------ 260 | %% @private 261 | %% Similar to {@link lists:split/2}. However, it is not an error when `N' 262 | %% exceeds the length of the list. 263 | %%------------------------------------------------------------------------------ 264 | split(N, List) -> 265 | try 266 | lists:split(N, List) 267 | catch 268 | error:badarg -> {List, []} 269 | end. 270 | 271 | %%------------------------------------------------------------------------------ 272 | %% @private 273 | %%------------------------------------------------------------------------------ 274 | handle_action({Function, Args}) -> erlang:apply(mnesia, Function, Args). 275 | 276 | %%------------------------------------------------------------------------------ 277 | %% @private 278 | %%------------------------------------------------------------------------------ 279 | check_rpc({badrpc, Reason}) -> {error, Reason}; 280 | check_rpc(Result) -> Result. 281 | -------------------------------------------------------------------------------- /src/lbm_kv_mon.erl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%% 20 | %%% @doc 21 | %%% A registered server that manages dynamic expansion and reduction of the 22 | %%% Mnesia cluster. The server also subscribes for Mnesia system events. In 23 | %%% case a DB inconsistency is detected (split brain) the server tries to 24 | %%% resolve the conflict using the vector clocks of the `lbm_kv' tables as well 25 | %%% as user-provided resolve callbacks. If conflict resolution fails, one of the 26 | %%% conflicting nodes will be restarted! 27 | %%% @end 28 | %%%============================================================================= 29 | 30 | -module(lbm_kv_mon). 31 | 32 | -behaviour(gen_server). 33 | 34 | %% Internal API 35 | -export([start_link/0, 36 | info/0]). 37 | 38 | %% gen_server callbacks 39 | -export([init/1, 40 | handle_cast/2, 41 | handle_call/3, 42 | handle_info/2, 43 | code_change/3, 44 | terminate/2]). 45 | 46 | -include("lbm_kv.hrl"). 47 | 48 | -define(DUMP, "mnesia_core.dump"). 49 | -define(ERR(Fmt, Args), error_logger:error_msg(Fmt, Args)). 50 | -define(INFO(Fmt, Args), error_logger:info_msg(Fmt, Args)). 51 | 52 | %%%============================================================================= 53 | %%% Internal API 54 | %%%============================================================================= 55 | 56 | %%------------------------------------------------------------------------------ 57 | %% @private 58 | %% Simply start the server (registered). 59 | %%------------------------------------------------------------------------------ 60 | -spec start_link() -> {ok, pid()} | {error, term()}. 61 | start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 62 | 63 | %%------------------------------------------------------------------------------ 64 | %% @private 65 | %% Print the all `lbm_kv` tables visible to this node. 66 | %%------------------------------------------------------------------------------ 67 | -spec info() -> ok. 68 | info() -> 69 | Tables = relevant_tables(node()), 70 | io:format("~w lbm_kv tables visible to ~s~n", [length(Tables), node()]), 71 | [io:format(" * ~s~n", [Table]) || Table <- Tables], 72 | io:format("~n"). 73 | 74 | %%%============================================================================= 75 | %%% gen_server callbacks 76 | %%%============================================================================= 77 | 78 | -record(state, {}). 79 | 80 | %%------------------------------------------------------------------------------ 81 | %% @private 82 | %%------------------------------------------------------------------------------ 83 | init([]) -> 84 | {ok, _} = mnesia:subscribe(system), 85 | [self() ! {nodeup, Node} || Node <- nodes()], 86 | ok = net_kernel:monitor_nodes(true), 87 | {ok, #state{}}. 88 | 89 | %%------------------------------------------------------------------------------ 90 | %% @private 91 | %%------------------------------------------------------------------------------ 92 | handle_call(_Request, _From, State) -> {reply, undef, State}. 93 | 94 | %%------------------------------------------------------------------------------ 95 | %% @private 96 | %%------------------------------------------------------------------------------ 97 | handle_cast(_Request, State) -> {noreply, State}. 98 | 99 | %%------------------------------------------------------------------------------ 100 | %% @private 101 | %%------------------------------------------------------------------------------ 102 | handle_info({mnesia_system_event, Event}, State) -> 103 | handle_mnesia_event(Event, State), 104 | {noreply, State}; 105 | handle_info({nodeup, Node}, State) -> 106 | ok = global:trans({?MODULE, self()}, fun() -> expand(Node) end), 107 | {noreply, State}; 108 | handle_info({nodedown, Node}, State) -> 109 | ok = global:trans({?MODULE, self()}, fun() -> reduce(Node) end), 110 | {noreply, State}; 111 | handle_info(_Info, State) -> 112 | {noreply, State}. 113 | 114 | %%------------------------------------------------------------------------------ 115 | %% @private 116 | %%------------------------------------------------------------------------------ 117 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 118 | 119 | %%------------------------------------------------------------------------------ 120 | %% @private 121 | %%------------------------------------------------------------------------------ 122 | terminate(_Reason, _State) -> ok. 123 | 124 | %%%============================================================================= 125 | %%% Internal functions 126 | %%%============================================================================= 127 | 128 | %%------------------------------------------------------------------------------ 129 | %% @private 130 | %% Handle Mnesia system events. Fatal conditions will be resolved with node 131 | %% restart. 132 | %%------------------------------------------------------------------------------ 133 | handle_mnesia_event({mnesia_fatal, Format, Args, BinaryCore}, _State) -> 134 | ?ERR("Fatal condition: " ++ Format, Args), 135 | file:write_file(?DUMP, BinaryCore), 136 | handle_unresolved_conflict(); 137 | handle_mnesia_event({mnesia_info, Format, Args}, _State) -> 138 | ?INFO(Format, Args); 139 | handle_mnesia_event(_Event, _State) -> 140 | ok. 141 | 142 | %%------------------------------------------------------------------------------ 143 | %% @private 144 | %% Expand the mnesia cluster to `Node'. This will add table copies of local 145 | %% tables to `Node' as well as table copies remote tables on the local node. 146 | %% This is the central function handling dynamic expansion of mnesia on new 147 | %% nodes as well as recovery from partitioned networks. 148 | %% 149 | %% A word about table replication: 150 | %% When using RAM copies, Mnesia is able to `merge' schema tables of different 151 | %% nodes, as long as one of the schema's to merge is clean (no tables with the 152 | %% same name on both nodes). By default tables are assigned a `cookie' value 153 | %% that differs even for tables with the same name and makes them incompatible 154 | %% by default. `lbm_kv' avoids this by assigning each of its tables a custom 155 | %% cookie (which is always the same). However, even using this trick, special 156 | %% magic is needed to merge these tables. Each of these tables must be 157 | %% configured as RAM copy on the remote node __before__ merging the schemas. 158 | %% 159 | %% Thanks again to Ulf Wiger (`unsplit') for pointing us in the right direction. 160 | %%------------------------------------------------------------------------------ 161 | expand(Node) -> 162 | expand(is_running(Node), Node). 163 | expand(true, Node) -> 164 | LocalTables = relevant_tables(node()), 165 | RemoteTables = relevant_tables(Node), 166 | ?LBM_KV_DBG("Relevant tables on ~s: ~w~n", [node(), LocalTables]), 167 | ?LBM_KV_DBG("Relevant tables on ~s: ~w~n", [Node, RemoteTables]), 168 | 169 | LocalOnlyTables = LocalTables -- RemoteTables, 170 | RemoteOnlyTables = RemoteTables -- LocalTables, 171 | ?LBM_KV_DBG("Local-only tables on ~s: ~w~n", [node(), LocalOnlyTables]), 172 | ?LBM_KV_DBG("Local-only tables on ~s: ~w~n", [Node, RemoteOnlyTables]), 173 | 174 | LocalSchema = get_cookie(node(), schema), 175 | RemoteSchema = get_cookie(Node, schema), 176 | IsRemoteRunning = lists:member(Node, get_running_nodes(node())), 177 | case {LocalSchema =:= RemoteSchema, IsRemoteRunning} of 178 | {true, true} -> 179 | %% Schemas are already merged and we see the node as running, 180 | %% someone else did the work. However, this also means that the 181 | %% respective remote node must have this node in its 182 | %% `running_db_nodes'. 183 | true = lists:member(node(), get_running_nodes(Node)), 184 | 185 | %% Now distribute tables only found on one of the nodes. 186 | add_table_copies(node(), Node, LocalOnlyTables), 187 | add_table_copies(Node, node(), RemoteOnlyTables), 188 | 189 | ?INFO("Already connected to ~s~n", [Node]); 190 | {IsMerged, IsRemoteRunning} -> 191 | case {IsMerged, IsRemoteRunning} of 192 | {false, false} -> 193 | %% The newly connected node has a different schema. That 194 | %% means the schemas must be merged now. 195 | ?INFO("Expanding to ~s...~n", [Node]); 196 | {true, false} -> 197 | %% Both nodes have the same schema, but none of the nodes 198 | %% has its respective counterpart in its `running_db_nodes', 199 | %% this must be a reconnect after a netsplit. 200 | ?INFO("Recovering from network partition (~s)...~n", [Node]) 201 | end, 202 | connect_nodes(Node, LocalTables, LocalOnlyTables, RemoteOnlyTables) 203 | end; 204 | expand(false, _) -> 205 | ok. %% Do not expand to the remote node if mnesia is not running. 206 | 207 | %%------------------------------------------------------------------------------ 208 | %% @private 209 | %%------------------------------------------------------------------------------ 210 | connect_nodes(Node, LocalTables, LocalOnlyTables, RemoteOnlyTables) -> 211 | %% All duplicate tables must be interconnected (with mutual RAM copies) 212 | %% __before__ connecting the mnesia instances. 213 | LocalAndRemoteTables = LocalTables -- LocalOnlyTables, 214 | add_table_copies(node(), Node, LocalAndRemoteTables), 215 | add_table_copies(Node, node(), LocalAndRemoteTables), 216 | 217 | %% Connect both nodes and merge values of duplicate tables. 218 | Fun = connect_nodes_user_fun(LocalAndRemoteTables, Node), 219 | case mnesia_controller:connect_nodes([Node], Fun) of 220 | {ok, NewNodes} -> 221 | %% The merge is only successful if Node is now part of the 222 | %% mnesia cluster 223 | true = lists:member(Node, NewNodes), 224 | 225 | %% Now distribute tables only found on one of the nodes. 226 | add_table_copies(node(), Node, LocalOnlyTables), 227 | add_table_copies(Node, node(), RemoteOnlyTables), 228 | 229 | ?INFO("Successfully connected to ~s~n", [Node]); 230 | Error -> 231 | ?ERR("Failed to connect to ~s: ~w~n", [Node, Error]), 232 | 233 | %% last resort 234 | handle_unresolved_conflict() 235 | end. 236 | 237 | %%------------------------------------------------------------------------------ 238 | %% @private 239 | %% A custom user function that merges the schemas of two node islands as well as 240 | %% the respective values in conflicting tables. 241 | %%------------------------------------------------------------------------------ 242 | connect_nodes_user_fun(TablesToMerge, Node) -> 243 | fun(SchemaMergeFun) -> 244 | case SchemaMergeFun(TablesToMerge) of 245 | Result = {merged, OldFriends, NewFriends} -> 246 | ?LBM_KV_DBG("Schemas successfully merged~n", []), 247 | ?LBM_KV_DBG("NewFriends: ~w~n", [NewFriends]), 248 | 249 | %% Sorry, but we must be part of `db_nodes' ourselves or I 250 | %% loose my mind (see mnesia_schema:do_merge_schema/1). 251 | true = lists:member(node(), OldFriends), 252 | 253 | %% What does this mean? 254 | %% Unfortunately, this is also not really clear to me. My 255 | %% assumption: 256 | %% `NewFriends' seems to be the list of nodes the local node 257 | %% successfully merged its schema with (without the local 258 | %% node itself). The local node is usually part of the 259 | %% `OldFriends' list. This seems to be equivalent to the 260 | %% current `db_nodes'. In case of asymmetric netsplits there 261 | %% may be a non-empty intersection between the two lists. 262 | %% The merge function may be called multiple times in case 263 | %% additional nodes become visible by merged schemas or 264 | %% through merge retries. 265 | %% 266 | %% What do we do here? 267 | %% We try to make sure to always merge with a sane candidate 268 | %% from another (new) island mnesia connects us to. 269 | case lists:member(Node, OldFriends) of 270 | false -> 271 | %% If we did not already merge with `Node', it 272 | %% __must__ be a member of `NewFriends'. 273 | true = lists:member(Node, NewFriends), 274 | MergeWith = [Node]; 275 | true -> 276 | %% Otherwise `Node' is already part of `db_nodes' 277 | %% and thus a member of `OldFriends'. In this case 278 | %% we simple choose a representative of 279 | %% `NewFriends' that is not already an old friend. 280 | MergeWith = NewFriends -- OldFriends 281 | end, 282 | 283 | case lbm_kv_merge:tables(TablesToMerge, MergeWith) of 284 | ok -> Result; 285 | {error, Reason} -> mnesia:abort(Reason) 286 | end; 287 | Result -> 288 | Result 289 | end 290 | end. 291 | 292 | %%------------------------------------------------------------------------------ 293 | %% @private 294 | %% Remove `Node' from the distributed mnesia cluster. 295 | %%------------------------------------------------------------------------------ 296 | reduce(Node) -> 297 | case lists:member(Node, get_db_nodes(node())) of 298 | true -> 299 | %% Remove the disconnected node from the global schema. This will 300 | %% remove all ram copies copied onto this node (for all tables). 301 | case mnesia:del_table_copy(schema, Node) of 302 | {atomic, ok} -> 303 | ?INFO("Successfully disconnected from ~s~n", [Node]); 304 | Error = {aborted, _} -> 305 | ?INFO("Failed to remove schema from ~s: ~w~n", [Node, Error]) 306 | end; 307 | false -> 308 | %% The disconnected node is not part of the seen `db_nodes' anymore, 309 | %% someone else did the work or the node did never participate. 310 | ok 311 | end. 312 | 313 | %%------------------------------------------------------------------------------ 314 | %% @private 315 | %% Returns whether mnesia is running on `Node'. 316 | %%------------------------------------------------------------------------------ 317 | is_running(Node) -> rpc_mnesia(Node, system_info, [is_running]) =:= yes. 318 | 319 | %%------------------------------------------------------------------------------ 320 | %% @private 321 | %% Add RAM copies of all `Tables' at `FromNode' to `ToNode'. Crashes, if a table 322 | %% cannot be replicated. 323 | %%------------------------------------------------------------------------------ 324 | add_table_copies(FromNode, ToNode, Tables) -> 325 | [ok = add_table_copy(FromNode, ToNode, T) || T <- Tables]. 326 | 327 | %%------------------------------------------------------------------------------ 328 | %% @private 329 | %% Add a RAM copy of `Table' at `FromNode' to `ToNode'. 330 | %%------------------------------------------------------------------------------ 331 | add_table_copy(FromNode, ToNode, Table) -> 332 | case rpc_mnesia(FromNode, add_table_copy, [Table, ToNode, ram_copies]) of 333 | {atomic, ok} -> 334 | ok; 335 | {aborted, {already_exists, Table, ToNode}} -> 336 | ok; 337 | {aborted, Reason} -> 338 | {error, Reason}; 339 | Error -> 340 | Error 341 | end. 342 | 343 | %%------------------------------------------------------------------------------ 344 | %% @private 345 | %% Returns the tables on `Node' that are managed by `lbm_kv'. 346 | %%------------------------------------------------------------------------------ 347 | relevant_tables(Node) -> 348 | Tables = rpc_mnesia(Node, system_info, [tables]), 349 | [T || T <- Tables, is_relevant(Node, T)]. 350 | 351 | %%------------------------------------------------------------------------------ 352 | %% @private 353 | %% Returns whether a table is managed by `lbm_kv'. 354 | %%------------------------------------------------------------------------------ 355 | is_relevant(Node, Table) -> get_cookie(Node, Table) =:= ?LBM_KV_COOKIE. 356 | 357 | %%------------------------------------------------------------------------------ 358 | %% @private 359 | %% Returns the cookie of `Table'. 360 | %%------------------------------------------------------------------------------ 361 | get_cookie(Node, Table) -> rpc_mnesia(Node, table_info, [Table, cookie]). 362 | 363 | %%------------------------------------------------------------------------------ 364 | %% @private 365 | %% Returns the current `running_db_nodes' as seen by `Node'. 366 | %%------------------------------------------------------------------------------ 367 | get_running_nodes(Node) -> rpc_mnesia(Node, system_info, [running_db_nodes]). 368 | 369 | %%------------------------------------------------------------------------------ 370 | %% @private 371 | %% Returns the current `db_nodes' as seen by `Node'. 372 | %%------------------------------------------------------------------------------ 373 | get_db_nodes(Node) -> rpc_mnesia(Node, system_info, [db_nodes]). 374 | 375 | %%------------------------------------------------------------------------------ 376 | %% @private 377 | %% Make an RPC call to the mnesia module on node `Node'. The `rpc' module knows 378 | %% when a call is local and optimizes that. 379 | %%------------------------------------------------------------------------------ 380 | rpc_mnesia(Node, Function, Args) -> 381 | Timeout = application:get_env(lbm_kv, rpc_timeout, ?LBM_KV_RPC_TIMEOUT), 382 | case rpc:call(Node, mnesia, Function, Args, Timeout) of 383 | {badrpc, Reason} -> {error, Reason}; 384 | Result -> Result 385 | end. 386 | 387 | %%------------------------------------------------------------------------------ 388 | %% @private 389 | %% Final conflict resolver for inconsistent database states, e.g. after 390 | %% netsplits. This will simply restart the local node. 391 | %%------------------------------------------------------------------------------ 392 | handle_unresolved_conflict() -> 393 | ?ERR("Final conflict resolution necessary, restarting ~s...~n", [node()]), 394 | init:restart(). 395 | -------------------------------------------------------------------------------- /src/lbm_kv_vclock.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% riak_core: Core Riak Application 4 | %% 5 | %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. 6 | %% 7 | %% This file is provided to you under the Apache License, 8 | %% Version 2.0 (the "License"); you may not use this file 9 | %% except in compliance with the License. You may obtain 10 | %% a copy of the License at 11 | %% 12 | %% http://www.apache.org/licenses/LICENSE-2.0 13 | %% 14 | %% Unless required by applicable law or agreed to in writing, 15 | %% software distributed under the License is distributed on an 16 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | %% KIND, either express or implied. See the License for the 18 | %% specific language governing permissions and limitations 19 | %% under the License. 20 | %% 21 | %% ------------------------------------------------------------------- 22 | 23 | %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks. 24 | %% 25 | %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events 26 | %% in a distributed system". Communications of the ACM 21 (7): 558-565. 27 | %% 28 | %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of 29 | %% Distributed Systems". Workshop on Parallel and Distributed Algorithms: 30 | %% pp. 215-226 31 | %% 32 | %% This module is a renamed version of the `vclock.erl' module shipped with 33 | %% `riak_core'. 34 | %% @see https://github.com/basho/riak_core 35 | -module(lbm_kv_vclock). 36 | 37 | -export([fresh/0, 38 | fresh/2, 39 | descends/2, 40 | dominates/2, 41 | descends_dot/2, 42 | pure_dot/1, 43 | merge/1, 44 | get_counter/2, 45 | get_timestamp/2, 46 | get_dot/2, 47 | valid_dot/1, 48 | increment/2, 49 | increment/3, 50 | all_nodes/1, 51 | equal/2, 52 | prune/3, 53 | timestamp/0]). 54 | 55 | -ifdef(TEST). 56 | -include_lib("eunit/include/eunit.hrl"). 57 | -endif. 58 | 59 | -export_type([vclock/0, timestamp/0, vclock_node/0, dot/0, pure_dot/0]). 60 | 61 | -type vclock() :: [dot()]. 62 | -type dot() :: {vclock_node(), {counter(), timestamp()}}. 63 | -type pure_dot() :: {vclock_node(), counter()}. 64 | 65 | % Nodes can have any term() as a name, but they must differ from each other. 66 | -type vclock_node() :: term(). 67 | -type counter() :: integer(). 68 | -type timestamp() :: integer(). 69 | 70 | % @doc Create a brand new vclock. 71 | -spec fresh() -> vclock(). 72 | fresh() -> 73 | []. 74 | 75 | -spec fresh(vclock_node(), counter()) -> vclock(). 76 | fresh(Node, Count) -> 77 | [{Node, {Count, timestamp()}}]. 78 | 79 | % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant! 80 | -spec descends(Va :: vclock(), Vb :: vclock()) -> boolean(). 81 | descends(_, []) -> 82 | % all vclocks descend from the empty vclock 83 | true; 84 | descends(Va, Vb) -> 85 | [{NodeB, {CtrB, _T}}|RestB] = Vb, 86 | case lists:keyfind(NodeB, 1, Va) of 87 | false -> 88 | false; 89 | {_, {CtrA, _TSA}} -> 90 | (CtrA >= CtrB) andalso descends(Va,RestB) 91 | end. 92 | 93 | %% @doc does the given `vclock()' descend from the given `dot()'. The 94 | %% `dot()' can be any vclock entry returned from 95 | %% `get_entry/2'. returns `true' if the `vclock()' has an entry for 96 | %% the `actor' in the `dot()', and that the counter for that entry is 97 | %% at least that of the given `dot()'. False otherwise. Call with a 98 | %% valid entry or you'll get an error. 99 | %% 100 | %% @see descends/2 101 | %% @see get_entry/3 102 | %% @see dominates/2 103 | -spec descends_dot(vclock(), dot()) -> boolean(). 104 | descends_dot(Vclock, Dot) -> 105 | descends(Vclock, [Dot]). 106 | 107 | %% @doc in some cases the dot without timestamp data is needed. 108 | -spec pure_dot(dot()) -> pure_dot(). 109 | pure_dot({N, {C, _TS}}) -> 110 | {N, C}. 111 | 112 | %% @doc true if `A' strictly dominates `B'. Note: ignores 113 | %% timestamps. In Riak it is possible to have vclocks that are 114 | %% identical except for timestamps. When two vclocks descend each 115 | %% other, but are not equal, they are concurrent. See source comment 116 | %% for more details. (Actually you can have indentical clocks 117 | %% including timestamps, that represent different events, but let's 118 | %% not go there.) 119 | %% 120 | -spec dominates(vclock(), vclock()) -> boolean(). 121 | dominates(A, B) -> 122 | %% In a sane world if two vclocks descend each other they MUST be 123 | %% equal. In riak they can descend each other and have different 124 | %% timestamps(!) How? Deleted keys, re-written, then restored is 125 | %% one example. See riak_kv#679 for others. This is why we must 126 | %% check descends both ways rather than checking descends(A, B) 127 | %% and not equal(A, B). Do not "optimise" this to dodge the second 128 | %% descends call! I know that the laws of causality say that each 129 | %% actor must act serially, but Riak breaks that. 130 | descends(A, B) andalso not descends(B, A). 131 | 132 | % @doc Combine all VClocks in the input list into their least possible 133 | % common descendant. 134 | -spec merge(VClocks :: [vclock()]) -> vclock(). 135 | merge([]) -> []; 136 | merge([SingleVclock]) -> SingleVclock; 137 | merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)). 138 | 139 | merge([], NClock) -> NClock; 140 | merge([AClock|VClocks],NClock) -> 141 | merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])). 142 | 143 | merge([], [], AccClock) -> lists:reverse(AccClock); 144 | merge([], Left, AccClock) -> lists:reverse(AccClock, Left); 145 | merge(Left, [], AccClock) -> lists:reverse(AccClock, Left); 146 | merge(V=[{Node1,{Ctr1,TS1}=CT1}=NCT1|VClock], 147 | N=[{Node2,{Ctr2,TS2}=CT2}=NCT2|NClock], AccClock) -> 148 | if Node1 < Node2 -> 149 | merge(VClock, N, [NCT1|AccClock]); 150 | Node1 > Node2 -> 151 | merge(V, NClock, [NCT2|AccClock]); 152 | true -> 153 | ({_Ctr,_TS} = CT) = if Ctr1 > Ctr2 -> CT1; 154 | Ctr1 < Ctr2 -> CT2; 155 | true -> {Ctr1, erlang:max(TS1,TS2)} 156 | end, 157 | merge(VClock, NClock, [{Node1,CT}|AccClock]) 158 | end. 159 | 160 | % @doc Get the counter value in VClock set from Node. 161 | -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter(). 162 | get_counter(Node, VClock) -> 163 | case lists:keyfind(Node, 1, VClock) of 164 | {_, {Ctr, _TS}} -> Ctr; 165 | false -> 0 166 | end. 167 | 168 | % @doc Get the timestamp value in a VClock set from Node. 169 | -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined. 170 | get_timestamp(Node, VClock) -> 171 | case lists:keyfind(Node, 1, VClock) of 172 | {_, {_Ctr, TS}} -> TS; 173 | false -> undefined 174 | end. 175 | 176 | % @doc Get the entry `dot()' for `vclock_node()' from `vclock()'. 177 | -spec get_dot(Node :: vclock_node(), VClock :: vclock()) -> {ok, dot()} | undefined. 178 | get_dot(Node, VClock) -> 179 | case lists:keyfind(Node, 1, VClock) of 180 | false -> undefined; 181 | Entry -> {ok, Entry} 182 | end. 183 | 184 | %% @doc is the given argument a valid dot, or entry? 185 | -spec valid_dot(dot()) -> boolean(). 186 | valid_dot({_, {Cnt, TS}}) when is_integer(Cnt), is_integer(TS) -> 187 | true; 188 | valid_dot(_) -> 189 | false. 190 | 191 | % @doc Increment VClock at Node. 192 | -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock(). 193 | increment(Node, VClock) -> 194 | increment(Node, timestamp(), VClock). 195 | 196 | % @doc Increment VClock at Node. 197 | -spec increment(Node :: vclock_node(), IncTs :: timestamp(), 198 | VClock :: vclock()) -> vclock(). 199 | increment(Node, IncTs, VClock) -> 200 | {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of 201 | false -> 202 | {{1, IncTs}, VClock}; 203 | {value, {_N, {C, _T}}, ModV} -> 204 | {{C + 1, IncTs}, ModV} 205 | end, 206 | [{Node,C1}|NewV]. 207 | 208 | 209 | % @doc Return the list of all nodes that have ever incremented VClock. 210 | -spec all_nodes(VClock :: vclock()) -> [vclock_node()]. 211 | all_nodes(VClock) -> 212 | [X || {X,{_,_}} <- VClock]. 213 | 214 | -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, (1970*365+478)). 215 | -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH, 216 | (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH * 24*60*60) 217 | %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}}) 218 | ). 219 | 220 | % @doc Return a timestamp for a vector clock 221 | -spec timestamp() -> timestamp(). 222 | timestamp() -> 223 | %% Same as calendar:datetime_to_gregorian_seconds(erlang:universaltime()), 224 | %% but significantly faster. 225 | {MegaSeconds, Seconds, _} = os:timestamp(), 226 | ?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH + MegaSeconds*1000000 + Seconds. 227 | 228 | % @doc Compares two VClocks for equality. 229 | -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean(). 230 | equal(VA,VB) -> 231 | lists:sort(VA) =:= lists:sort(VB). 232 | 233 | % @doc Possibly shrink the size of a vclock, depending on current age and size. 234 | -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock(). 235 | prune(V,Now,BucketProps) -> 236 | %% This sort need to be deterministic, to avoid spurious merge conflicts later. 237 | %% We achieve this by using the node ID as secondary key. 238 | SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V), 239 | prune_vclock1(SortV,Now,BucketProps). 240 | % @private 241 | prune_vclock1(V,Now,BProps) -> 242 | case length(V) =< get_property(small_vclock, BProps) of 243 | true -> V; 244 | false -> 245 | {_,{_,HeadTime}} = hd(V), 246 | case (Now - HeadTime) < get_property(young_vclock,BProps) of 247 | true -> V; 248 | false -> prune_vclock1(V,Now,BProps,HeadTime) 249 | end 250 | end. 251 | % @private 252 | prune_vclock1(V,Now,BProps,HeadTime) -> 253 | % has a precondition that V is longer than small and older than young 254 | case (length(V) > get_property(big_vclock,BProps)) orelse 255 | ((Now - HeadTime) > get_property(old_vclock,BProps)) of 256 | true -> prune_vclock1(tl(V),Now,BProps); 257 | false -> V 258 | end. 259 | 260 | get_property(Key, PairList) -> 261 | case lists:keyfind(Key, 1, PairList) of 262 | {_Key, Value} -> 263 | Value; 264 | false -> 265 | undefined 266 | end. 267 | 268 | %% =================================================================== 269 | %% EUnit tests 270 | %% =================================================================== 271 | -ifdef(TEST). 272 | 273 | % doc Serves as both a trivial test and some example code. 274 | example_test() -> 275 | A = lbm_kv_vclock:fresh(), 276 | B = lbm_kv_vclock:fresh(), 277 | A1 = lbm_kv_vclock:increment(a, A), 278 | B1 = lbm_kv_vclock:increment(b, B), 279 | true = lbm_kv_vclock:descends(A1,A), 280 | true = lbm_kv_vclock:descends(B1,B), 281 | false = lbm_kv_vclock:descends(A1,B1), 282 | A2 = lbm_kv_vclock:increment(a, A1), 283 | C = lbm_kv_vclock:merge([A2, B1]), 284 | C1 = lbm_kv_vclock:increment(c, C), 285 | true = lbm_kv_vclock:descends(C1, A2), 286 | true = lbm_kv_vclock:descends(C1, B1), 287 | false = lbm_kv_vclock:descends(B1, C1), 288 | false = lbm_kv_vclock:descends(B1, A1), 289 | ok. 290 | 291 | prune_small_test() -> 292 | % vclock with less entries than small_vclock will be untouched 293 | Now = timestamp(), 294 | OldTime = Now - 32000000, 295 | SmallVC = [{<<"1">>, {1, OldTime}}, 296 | {<<"2">>, {2, OldTime}}, 297 | {<<"3">>, {3, OldTime}}], 298 | Props = [{small_vclock,4}], 299 | ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))). 300 | 301 | prune_young_test() -> 302 | % vclock with all entries younger than young_vclock will be untouched 303 | Now = timestamp(), 304 | NewTime = Now - 1, 305 | VC = [{<<"1">>, {1, NewTime}}, 306 | {<<"2">>, {2, NewTime}}, 307 | {<<"3">>, {3, NewTime}}], 308 | Props = [{small_vclock,1},{young_vclock,1000}], 309 | ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))). 310 | 311 | prune_big_test() -> 312 | % vclock not preserved by small or young will be pruned down to 313 | % no larger than big_vclock entries 314 | Now = timestamp(), 315 | NewTime = Now - 1000, 316 | VC = [{<<"1">>, {1, NewTime}}, 317 | {<<"2">>, {2, NewTime}}, 318 | {<<"3">>, {3, NewTime}}], 319 | Props = [{small_vclock,1},{young_vclock,1}, 320 | {big_vclock,2},{old_vclock,100000}], 321 | ?assert(length(prune(VC, Now, Props)) =:= 2). 322 | 323 | prune_old_test() -> 324 | % vclock not preserved by small or young will be pruned down to 325 | % no larger than big_vclock and no entries more than old_vclock ago 326 | Now = timestamp(), 327 | NewTime = Now - 1000, 328 | OldTime = Now - 100000, 329 | VC = [{<<"1">>, {1, NewTime}}, 330 | {<<"2">>, {2, OldTime}}, 331 | {<<"3">>, {3, OldTime}}], 332 | Props = [{small_vclock,1},{young_vclock,1}, 333 | {big_vclock,2},{old_vclock,10000}], 334 | ?assert(length(prune(VC, Now, Props)) =:= 1). 335 | 336 | prune_order_test() -> 337 | % vclock with two nodes of the same timestamp will be pruned down 338 | % to the same node 339 | Now = timestamp(), 340 | OldTime = Now - 100000, 341 | VC1 = [{<<"1">>, {1, OldTime}}, 342 | {<<"2">>, {2, OldTime}}], 343 | VC2 = lists:reverse(VC1), 344 | Props = [{small_vclock,1},{young_vclock,1}, 345 | {big_vclock,2},{old_vclock,10000}], 346 | ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)). 347 | 348 | accessor_test() -> 349 | VC = [{<<"1">>, {1, 1}}, 350 | {<<"2">>, {2, 2}}], 351 | ?assertEqual(1, get_counter(<<"1">>, VC)), 352 | ?assertEqual(1, get_timestamp(<<"1">>, VC)), 353 | ?assertEqual(2, get_counter(<<"2">>, VC)), 354 | ?assertEqual(2, get_timestamp(<<"2">>, VC)), 355 | ?assertEqual(0, get_counter(<<"3">>, VC)), 356 | ?assertEqual(undefined, get_timestamp(<<"3">>, VC)), 357 | ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)). 358 | 359 | merge_test() -> 360 | VC1 = [{<<"1">>, {1, 1}}, 361 | {<<"2">>, {2, 2}}, 362 | {<<"4">>, {4, 4}}], 363 | VC2 = [{<<"3">>, {3, 3}}, 364 | {<<"4">>, {3, 3}}], 365 | ?assertEqual([], merge(lbm_kv_vclock:fresh())), 366 | ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}], 367 | merge([VC1, VC2])). 368 | 369 | merge_less_left_test() -> 370 | VC1 = [{<<"5">>, {5, 5}}], 371 | VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], 372 | ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], 373 | lbm_kv_vclock:merge([VC1, VC2])). 374 | 375 | merge_less_right_test() -> 376 | VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], 377 | VC2 = [{<<"5">>, {5, 5}}], 378 | ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}], 379 | lbm_kv_vclock:merge([VC1, VC2])). 380 | 381 | merge_same_id_test() -> 382 | VC1 = [{<<"1">>, {1, 2}},{<<"2">>,{1,4}}], 383 | VC2 = [{<<"1">>, {1, 3}},{<<"3">>,{1,5}}], 384 | ?assertEqual([{<<"1">>, {1, 3}},{<<"2">>,{1,4}},{<<"3">>,{1,5}}], 385 | lbm_kv_vclock:merge([VC1, VC2])). 386 | 387 | get_entry_test() -> 388 | VC = lbm_kv_vclock:fresh(), 389 | VC1 = increment(a, increment(c, increment(b, increment(a, VC)))), 390 | ?assertMatch({ok, {a, {2, _}}}, get_dot(a, VC1)), 391 | ?assertMatch({ok, {b, {1, _}}}, get_dot(b, VC1)), 392 | ?assertMatch({ok, {c, {1, _}}}, get_dot(c, VC1)), 393 | ?assertEqual(undefined, get_dot(d, VC1)). 394 | 395 | valid_entry_test() -> 396 | VC = lbm_kv_vclock:fresh(), 397 | VC1 = increment(c, increment(b, increment(a, VC))), 398 | [begin 399 | {ok, E} = get_dot(Actor, VC1), 400 | ?assert(valid_dot(E)) 401 | end || Actor <- [a, b, c]], 402 | ?assertNot(valid_dot(undefined)), 403 | ?assertNot(valid_dot("huffle-puff")), 404 | ?assertNot(valid_dot([])). 405 | 406 | -endif. 407 | -------------------------------------------------------------------------------- /test/lbm_kv_dist_tests.erl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014-2018, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%%============================================================================= 20 | 21 | -module(lbm_kv_dist_tests). 22 | 23 | -include_lib("eunit/include/eunit.hrl"). 24 | 25 | -define(TABLE, table). 26 | 27 | -define(NETSPLIT_EVENT, {mnesia_system_event, {inconsistent_database, _, _}}). 28 | 29 | -ifndef(EXCLUDE_FLAKY). 30 | -define(EXCLUDE_FLAKY, false). 31 | -endif. 32 | 33 | %%%============================================================================= 34 | %%% TESTS 35 | %%%============================================================================= 36 | 37 | all_test_() -> 38 | {foreach, 39 | setup(), 40 | teardown(), 41 | [{timeout, 10, [fun unique_table/0]}] 42 | ++ [{timeout, 10, [fun simple_netsplit/0]} || not ?EXCLUDE_FLAKY]}. 43 | 44 | unique_table() -> 45 | process_flag(trap_exit, true), 46 | 47 | error_logger:info_msg("TEST: ~s~n", [unique_table]), 48 | 49 | %% create table locally 50 | Create = fun() -> ok = lbm_kv:create(?TABLE) end, 51 | Create(), 52 | 53 | %% start three slave nodes 54 | {ok, Slave1} = slave_setup(slave1), 55 | {ok, Slave2} = slave_setup(slave2), 56 | {ok, Slave3} = slave_setup(slave3), 57 | 58 | %% Put a value from the local node 59 | PutValue = fun() -> {ok, []} = lbm_kv:put(?TABLE, key, value) end, 60 | PutValue(), 61 | 62 | %% Wait for the table to become available on all nodes 63 | Wait = fun() -> ok = mnesia:wait_for_tables([?TABLE], 2000) end, 64 | ?assertEqual(ok, slave_execute(Slave1, Wait)), 65 | ?assertEqual(ok, slave_execute(Slave2, Wait)), 66 | ?assertEqual(ok, slave_execute(Slave3, Wait)), 67 | 68 | %% Read the written value from all nodes 69 | GetValue = fun() -> {ok, [{key, value}]} = lbm_kv:get(?TABLE, key) end, 70 | GetValue(), 71 | ?assertEqual(ok, slave_execute(Slave1, GetValue)), 72 | ?assertEqual(ok, slave_execute(Slave2, GetValue)), 73 | ?assertEqual(ok, slave_execute(Slave3, GetValue)), 74 | 75 | %% Read the whole table from all nodes 76 | GetAll = fun() -> {ok, [{key, value}]} = lbm_kv:match_key(?TABLE, '_') end, 77 | GetAll(), 78 | ?assertEqual(ok, slave_execute(Slave1, GetAll)), 79 | ?assertEqual(ok, slave_execute(Slave2, GetAll)), 80 | ?assertEqual(ok, slave_execute(Slave3, GetAll)), 81 | 82 | %% Delete the value from a slave node 83 | Update = fun() -> {ok, [{key, value}]} = lbm_kv:del(?TABLE, key) end, 84 | ?assertEqual(ok, slave_execute(Slave1, Update)), 85 | 86 | %% Read the update from all nodes 87 | GetEmpty = fun() -> {ok, []} = lbm_kv:get(?TABLE, key) end, 88 | GetEmpty(), 89 | ?assertEqual(ok, slave_execute(Slave1, GetEmpty)), 90 | ?assertEqual(ok, slave_execute(Slave2, GetEmpty)), 91 | ?assertEqual(ok, slave_execute(Slave3, GetEmpty)), 92 | 93 | %% Shutdown a slave node 94 | ?assertEqual(ok, slave:stop(Slave2)), 95 | 96 | %% Put a value from a slave node 97 | ?assertEqual(ok, slave_execute(Slave3, PutValue)), 98 | 99 | %% Start previously exited node 100 | {ok, Slave2} = slave_setup(slave2), 101 | ?assertEqual(ok, slave_execute(Slave2, Wait)), 102 | 103 | %% Read the written value from all nodes 104 | GetValue(), 105 | ?assertEqual(ok, slave_execute(Slave1, GetValue)), 106 | ?assertEqual(ok, slave_execute(Slave2, GetValue)), 107 | ?assertEqual(ok, slave_execute(Slave3, GetValue)), 108 | 109 | ok. 110 | 111 | simple_netsplit() -> 112 | process_flag(trap_exit, true), 113 | 114 | error_logger:info_msg("TEST: ~s~n", [simple_netsplit]), 115 | 116 | %% start two slave nodes 117 | {ok, Slave1} = slave_setup(slave1), 118 | {ok, Slave2} = slave_setup(slave2), 119 | 120 | %% create table 121 | Create = fun() -> ok = lbm_kv:create(?TABLE) end, 122 | Create(), 123 | ?assertEqual(ok, slave_execute(Slave1, Create)), 124 | ?assertEqual(ok, slave_execute(Slave2, Create)), 125 | 126 | %% Put some (non-conflicting) values 127 | PutValue0 = fun() -> {ok, []} = lbm_kv:put(?TABLE, node(), value0) end, 128 | PutValue0(), 129 | ?assertEqual(ok, slave_execute(Slave1, PutValue0)), 130 | ?assertEqual(ok, slave_execute(Slave2, PutValue0)), 131 | 132 | %% Read the values written before from all nodes 133 | NumValues = length([node()] ++ erlang:nodes()), 134 | GetValues = fun() -> 135 | {ok, Vals} = lbm_kv:match_key(?TABLE, '_'), 136 | NumValues = length(Vals) 137 | end, 138 | GetValues(), 139 | ?assertEqual(ok, slave_execute(Slave1, GetValues)), 140 | ?assertEqual(ok, slave_execute(Slave2, GetValues)), 141 | 142 | PutValue1 = fun() -> {ok, _} = lbm_kv:put(?TABLE, node(), value1) end, 143 | 144 | %% simulate netsplit between both slaves 145 | Netsplit = fun() -> 146 | {ok, _} = mnesia:subscribe(system), 147 | true = net_kernel:disconnect(Slave2), 148 | 149 | %% Make the merge a bit more meaningful 150 | PutValue1(), 151 | 152 | true = net_kernel:connect(Slave2), 153 | receive ?NETSPLIT_EVENT -> ok end 154 | end, 155 | ok = slave_execute(Slave1, Netsplit), 156 | 157 | PutValue1(), 158 | ?assertEqual(ok, slave_execute(Slave2, PutValue1)), 159 | 160 | %% sorry, but there's no event we can wait for... 161 | timer:sleep(1000), 162 | 163 | GetValue1 = fun(K) -> {ok, [{K, value1}]} = lbm_kv:get(?TABLE, K) end, 164 | GetValues1 = fun() -> [GetValue1(N) || N <- nodes()] end, 165 | GetValues1(), 166 | ?assertEqual(ok, slave_execute(Slave1, GetValues1)), 167 | ?assertEqual(ok, slave_execute(Slave2, GetValues1)), 168 | 169 | ok. 170 | 171 | %%%============================================================================= 172 | %%% Internal functions 173 | %%%============================================================================= 174 | 175 | %%------------------------------------------------------------------------------ 176 | %% @private 177 | %%------------------------------------------------------------------------------ 178 | setup() -> 179 | fun() -> 180 | ok = distribute('master@localhost'), 181 | setup_apps() 182 | end. 183 | 184 | %%------------------------------------------------------------------------------ 185 | %% @private 186 | %%------------------------------------------------------------------------------ 187 | setup_apps() -> 188 | {ok, Apps} = application:ensure_all_started(lbm_kv, permanent), 189 | Apps. 190 | 191 | %%------------------------------------------------------------------------------ 192 | %% @private 193 | %%------------------------------------------------------------------------------ 194 | teardown() -> fun(Apps) -> [application:stop(App) || App <- Apps] end. 195 | 196 | %%------------------------------------------------------------------------------ 197 | %% @private 198 | %% Make this node a distributed node. 199 | %%------------------------------------------------------------------------------ 200 | distribute(Name) -> 201 | os:cmd("epmd -daemon"), 202 | case net_kernel:start([Name, shortnames]) of 203 | {ok, _} -> ok; 204 | {error, {already_started, _}} -> ok; 205 | Error -> Error 206 | end. 207 | 208 | %%------------------------------------------------------------------------------ 209 | %% @private 210 | %% Start a slave node and setup its environment (code path, applications, ...). 211 | %%------------------------------------------------------------------------------ 212 | slave_setup(Name) -> 213 | Arg = string:join(["-pa " ++ P || P <- code:get_path()], " "), 214 | {ok, Node} = slave:start_link(localhost, Name, Arg), 215 | %% Make sure slave node started correctly and is now connected 216 | true = lists:member(Node, nodes()), 217 | %% Start the needed applications 218 | ok = slave_execute(Node, fun() -> setup_apps() end), 219 | {ok, Node}. 220 | 221 | %%------------------------------------------------------------------------------ 222 | %% @private 223 | %% Execute `Fun' on the given node. 224 | %%------------------------------------------------------------------------------ 225 | slave_execute(Node, Fun) -> 226 | slave_execute(Node, Fun, sync). 227 | slave_execute(Node, Fun, no_block) -> 228 | spawn(Node, Fun), 229 | ok; 230 | slave_execute(Node, Fun, _) -> 231 | Pid = spawn_link(Node, Fun), 232 | receive 233 | {'EXIT', Pid, normal} -> ok; 234 | {'EXIT', Pid, Reason} -> {error, Reason} 235 | end. 236 | -------------------------------------------------------------------------------- /test/lbm_kv_tests.erl: -------------------------------------------------------------------------------- 1 | %%%============================================================================= 2 | %%% 3 | %%% | o __ _| _ __ |_ _ _ _ (TM) 4 | %%% |_ | | | (_| (/_ | | |_) (_| |_| | | | 5 | %%% 6 | %%% @copyright (C) 2014-2018, Lindenbaum GmbH 7 | %%% 8 | %%% Permission to use, copy, modify, and/or distribute this software for any 9 | %%% purpose with or without fee is hereby granted, provided that the above 10 | %%% copyright notice and this permission notice appear in all copies. 11 | %%% 12 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | %%%============================================================================= 20 | 21 | -module(lbm_kv_tests). 22 | 23 | -include_lib("proper/include/proper.hrl"). 24 | -include_lib("eunit/include/eunit.hrl"). 25 | 26 | -type safe() :: '' | '!' | a | b | 27 | number() | 28 | boolean() | 29 | binary() | 30 | {safe()} | 31 | {safe(), safe()} | 32 | {safe(), safe(), safe()}. 33 | %% Unfortunately, Mnesia is quite picky when it comes to allowed types for 34 | %% keys and values, e.g. all special atoms of `match_specs' are not allowed and 35 | %% lead to undefined behaviour when used. 36 | 37 | -define(TABLE, table). 38 | -define(NODE, master). 39 | 40 | %%%============================================================================= 41 | %%% TESTS 42 | %%%============================================================================= 43 | 44 | all_test_() -> 45 | {foreach, setup(), teardown(), 46 | [ 47 | fun bad_type/0, 48 | fun empty/0, 49 | fun put3_get_and_del2/0, 50 | fun put2_get_and_del2/0, 51 | fun update/0, 52 | fun update_table/0, 53 | fun integration/0 54 | ]}. 55 | 56 | bad_type() -> 57 | ?assertEqual( 58 | {aborted,{bad_type,{?TABLE,{{a}},['_']}}}, 59 | mnesia:transaction( 60 | fun() -> 61 | mnesia:write({?TABLE, {{'a'}}, ['_']}), 62 | mnesia:delete_object({?TABLE, {{'a'}}, '_'}) 63 | end)). 64 | 65 | empty() -> 66 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 67 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')). 68 | 69 | put3_get_and_del2() -> 70 | qc(?FORALL( 71 | {Key, Value}, 72 | {safe(), safe()}, 73 | begin 74 | KeyValue = {Key, Value}, 75 | ?assertEqual({ok, []}, lbm_kv:put(?TABLE, Key, Value)), 76 | ?assertEqual({ok, [KeyValue]}, lbm_kv:get(?TABLE, Key)), 77 | {ok, Matched} = lbm_kv:match_key(?TABLE, '_'), 78 | ?assert(lists:member(KeyValue, Matched)), 79 | ?assertEqual({ok, [KeyValue]}, lbm_kv:del(?TABLE, Key)), 80 | true 81 | end)). 82 | 83 | put2_get_and_del2() -> 84 | qc(?FORALL( 85 | {{Key1, Key2}, Value1, Value2}, 86 | {?SUCHTHAT({Key1, Key2}, {safe(), safe()}, Key1 =/= Key2), safe(), safe()}, 87 | begin 88 | KeyValue1 = {Key1, Value1}, 89 | KeyValue2 = {Key2, Value2}, 90 | ?assertEqual({ok, []}, lbm_kv:put(?TABLE, [KeyValue1, KeyValue2])), 91 | {ok, Get} = lbm_kv:match_key(?TABLE, '_', dirty), 92 | ?assert(lists:member(KeyValue1, Get)), 93 | ?assert(lists:member(KeyValue2, Get)), 94 | {ok, Delete} = lbm_kv:del(?TABLE, [Key1, Key2]), 95 | ?assert(lists:member(KeyValue1, Delete)), 96 | ?assert(lists:member(KeyValue2, Delete)), 97 | true 98 | end)). 99 | 100 | update() -> 101 | qc(?FORALL( 102 | {Key, Value, Update}, 103 | {safe(), safe(), safe()}, 104 | begin 105 | KeyValue = {Key, Value}, 106 | KeyUpdate = {Key, Update}, 107 | 108 | Add = fun(_, undefined) -> {value, Value} end, 109 | ?assertEqual({ok, {[], [KeyValue]}}, 110 | lbm_kv:update(?TABLE, Key, Add)), 111 | ?assertEqual({ok, [KeyValue]}, lbm_kv:get(?TABLE, Key)), 112 | 113 | Modify = fun(_, {value, V}) when V == Value -> {value, Update} end, 114 | ?assertEqual({ok, {[KeyValue], [KeyUpdate]}}, 115 | lbm_kv:update(?TABLE, Key, Modify)), 116 | ?assertEqual({ok, [KeyUpdate]}, lbm_kv:get(?TABLE, Key)), 117 | 118 | Delete = fun(_, {value, V}) when V == Update -> delete end, 119 | ?assertEqual({ok, {[KeyUpdate], []}}, 120 | lbm_kv:update(?TABLE, Key, Delete)), 121 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, Key)), 122 | true 123 | end)). 124 | 125 | update_table() -> 126 | qc(?FORALL( 127 | {Key, Value, Update}, 128 | {safe(), safe(), safe()}, 129 | begin 130 | KeyValue = {Key, Value}, 131 | KeyUpdate = {Key, Update}, 132 | 133 | Identity = fun(_, {value, V}) -> {value, V} end, 134 | ?assertEqual({ok, {[], []}}, lbm_kv:update(?TABLE, Identity)), 135 | 136 | Modify = fun(K, {value, V}) when K == Key, V == Value -> 137 | {value, Update}; 138 | (_, {value, V}) -> 139 | {value, V} 140 | end, 141 | ?assertEqual({ok, []}, lbm_kv:put(?TABLE, Key, Value)), 142 | ?assertEqual({ok, [KeyValue]}, lbm_kv:get(?TABLE, Key)), 143 | {ok, {Old1, New1}} = lbm_kv:update(?TABLE, Modify), 144 | ?assert(lists:member(KeyValue, Old1)), 145 | ?assert(lists:member(KeyUpdate, New1)), 146 | ?assertEqual({ok, [KeyUpdate]}, lbm_kv:get(?TABLE, Key)), 147 | 148 | Delete = fun(K, {value, V}) when K == Key, V == Update -> 149 | delete; 150 | (_, {value, V}) -> 151 | {value, V} 152 | end, 153 | {ok, {Old2, New2}} = lbm_kv:update(?TABLE, Delete), 154 | ?assert(lists:member(KeyUpdate, Old2)), 155 | ?assertEqual([], New2), 156 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, Key)), 157 | 158 | DeleteAll = fun(_, _) -> delete end, 159 | ?assertEqual({ok, {[], []}}, lbm_kv:update(?TABLE, DeleteAll)), 160 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')), 161 | true 162 | end)). 163 | 164 | integration() -> 165 | %% initial empty 166 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 167 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')), 168 | 169 | %% info 170 | ?assertEqual(ok, lbm_kv:info()), 171 | 172 | %% add key => value 173 | ?assertEqual({ok, []}, lbm_kv:put(?TABLE, key, value)), 174 | ?assertEqual({ok, [{key, value}]}, lbm_kv:get(?TABLE, key)), 175 | ?assertEqual({ok, [{key, value}]}, lbm_kv:match_key(?TABLE, '_')), 176 | 177 | %% update to key => value1 178 | Update1 = fun(key, {value, value}) -> {value, value1} end, 179 | ?assertEqual({ok, {[{key, value}], [{key, value1}]}}, 180 | lbm_kv:update(?TABLE, key, Update1)), 181 | ?assertEqual({ok, [{key, value1}]}, lbm_kv:get(?TABLE, key)), 182 | ?assertEqual({ok, [{key, value1}]}, lbm_kv:match_key(?TABLE, '_')), 183 | 184 | %% update to key => value2 185 | UpdateAll1 = fun(key, {value, value1}) -> {value, value2} end, 186 | ?assertEqual({ok, {[{key, value1}], [{key, value2}]}}, 187 | lbm_kv:update(?TABLE, UpdateAll1)), 188 | ?assertEqual({ok, [{key, value2}]}, lbm_kv:get(?TABLE, key)), 189 | ?assertEqual({ok, [{key, value2}]}, lbm_kv:match_key(?TABLE, '_')), 190 | 191 | %% empty table with update 192 | Update2 = fun(key, {value, value2}) -> delete end, 193 | ?assertEqual({ok, {[{key, value2}], []}}, 194 | lbm_kv:update(?TABLE, key, Update2)), 195 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 196 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')), 197 | 198 | %% add key => value2 199 | ?assertEqual({ok, []}, lbm_kv:put(?TABLE, key, value2)), 200 | ?assertEqual({ok, [{key, value2}]}, lbm_kv:get(?TABLE, key)), 201 | ?assertEqual({ok, [{key, value2}]}, lbm_kv:match_key(?TABLE, '_')), 202 | 203 | %% empty table with update_all 204 | UpdateAll2 = fun(key, {value, value2}) -> delete end, 205 | ?assertEqual({ok, {[{key, value2}], []}}, 206 | lbm_kv:update(?TABLE, UpdateAll2)), 207 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 208 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')), 209 | 210 | %% no update for non-existing key 211 | Update3 = fun(key, undefined) -> undefined end, 212 | ?assertEqual({ok, {[], []}}, lbm_kv:update(?TABLE, key, Update3)), 213 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 214 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')), 215 | 216 | %% add key => value with update to non-existing key 217 | Update4 = fun(key, undefined) -> {value, value} end, 218 | ?assertEqual({ok, {[], [{key, value}]}}, 219 | lbm_kv:update(?TABLE, key, Update4)), 220 | ?assertEqual({ok, [{key, value}]}, lbm_kv:get(?TABLE, key)), 221 | ?assertEqual({ok, [{key, value}]}, lbm_kv:match_key(?TABLE, '_')), 222 | 223 | %% del key => value 224 | ?assertEqual({ok, [{key, value}]}, lbm_kv:del(?TABLE, key)), 225 | ?assertEqual({ok, []}, lbm_kv:get(?TABLE, key)), 226 | ?assertEqual({ok, []}, lbm_kv:match_key(?TABLE, '_')). 227 | 228 | %%%============================================================================= 229 | %%% Internal functions 230 | %%%============================================================================= 231 | 232 | %%------------------------------------------------------------------------------ 233 | %% @private 234 | %%------------------------------------------------------------------------------ 235 | setup() -> 236 | fun() -> 237 | Apps = setup_apps(), 238 | ok = lbm_kv:create(?TABLE), 239 | Apps 240 | end. 241 | 242 | %%------------------------------------------------------------------------------ 243 | %% @private 244 | %%------------------------------------------------------------------------------ 245 | setup_apps() -> 246 | application:load(sasl), 247 | ok = application:set_env(sasl, sasl_error_logger, false), 248 | {ok, Apps} = application:ensure_all_started(lbm_kv), 249 | Apps. 250 | 251 | %%------------------------------------------------------------------------------ 252 | %% @private 253 | %%------------------------------------------------------------------------------ 254 | teardown() -> 255 | fun(Apps) -> 256 | [application:stop(App) || App <- Apps] 257 | end. 258 | 259 | %%------------------------------------------------------------------------------ 260 | %% @private 261 | %%------------------------------------------------------------------------------ 262 | qc(Block) -> ?assert(proper:quickcheck(Block, [long_result, verbose])). 263 | --------------------------------------------------------------------------------