├── .gitignore ├── rebar ├── rebar.config ├── src ├── bisect.app.src ├── basho_bench_driver_bisect.erl ├── bisect_server.erl └── bisect.erl ├── priv └── basho_bench_bisect.config ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | ebin 2 | deps 3 | .eunit 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knutin/bisect/HEAD/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {deps, [ 2 | {proper, "", {git,"https://github.com/manopapad/proper.git", "master"}} 3 | ]}. 4 | -------------------------------------------------------------------------------- /src/bisect.app.src: -------------------------------------------------------------------------------- 1 | {application, bisect, 2 | [ 3 | {description, ""}, 4 | {vsn, "1"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {env, []} 11 | ]}. 12 | -------------------------------------------------------------------------------- /priv/basho_bench_bisect.config: -------------------------------------------------------------------------------- 1 | {mode, max}. 2 | %{mode, {rate, 1000}}. 3 | 4 | {duration, 15}. 5 | 6 | {concurrent, 4}. 7 | 8 | {driver, basho_bench_driver_bisect}. 9 | 10 | {code_paths, ["../bisect/ebin"]}. 11 | 12 | {operations, [{mget,1}]}. 13 | 14 | {key_generator, {uniform_int, 10000000}}. 15 | 16 | {value_generator, {fixed_bin, 1}}. 17 | 18 | 19 | {singleton, false}. 20 | {initial_keys, 10000000}. 21 | {mget_keys, 1000}. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2012 Knut Nesheim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /src/basho_bench_driver_bisect.erl: -------------------------------------------------------------------------------- 1 | -module(basho_bench_driver_bisect). 2 | 3 | -export([new/1, 4 | run/4]). 5 | 6 | new(_Id) -> 7 | case basho_bench_config:get(singleton) of 8 | true -> 9 | case whereis(bisect_server) of 10 | undefined -> 11 | {ok, P} = bisect_server:start_link(bisect_server, 8, 1), 12 | ok = bisect_server:inject(P, initial_b()), 13 | {ok, P}; 14 | Pid -> 15 | {ok, Pid} 16 | end; 17 | false -> 18 | {ok, P} = bisect_server:start_link(8, 1), 19 | ok = bisect_server:inject(P, initial_b()), 20 | {ok, P} 21 | end. 22 | 23 | 24 | initial_b() -> 25 | N = basho_bench_config:get(initial_keys), 26 | KeyValuePairs = lists:map(fun (I) -> {<>, <<255:16/integer>>} end, 27 | lists:seq(1, N)), 28 | bisect:from_orddict(bisect:new(8, 2), KeyValuePairs). 29 | 30 | 31 | run(mget, KeyGen, _ValueGen, P) -> 32 | NumKeys = basho_bench_config:get(mget_keys), 33 | StartKey = KeyGen(), 34 | Keys = [<> || I <- lists:seq(StartKey, StartKey + (NumKeys * 1000), 1000)], 35 | 36 | case catch(bisect_server:mget(P, Keys)) of 37 | {ok, _Value} -> 38 | {ok, P}; 39 | {error, Reason} -> 40 | {error, Reason, P}; 41 | {'EXIT', {timeout, _}} -> 42 | {error, timeout, P} 43 | end; 44 | 45 | run(mget_serial, KeyGen, _ValueGen, P) -> 46 | NumKeys = basho_bench_config:get(mget_keys), 47 | StartKey = KeyGen(), 48 | Keys = [<> || I <- lists:seq(StartKey, StartKey + (NumKeys * 1000), 1000)], 49 | 50 | case catch(bisect_server:mget_serial(P, Keys)) of 51 | {ok, _Value} -> 52 | {ok, P}; 53 | {error, Reason} -> 54 | {error, Reason, P}; 55 | {'EXIT', {timeout, _}} -> 56 | {error, timeout, P} 57 | end; 58 | 59 | run(put, KeyGen, ValueGen, P) -> 60 | case catch(bisect_server:insert(P, <<(KeyGen()):64/integer>>, ValueGen())) of 61 | ok -> 62 | {ok, P}; 63 | {error, Reason} -> 64 | {error, Reason, P}; 65 | {'EXIT', {timeout, _}} -> 66 | {error, timeout, P} 67 | end. 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bisect 2 | 3 | Bisect is a dictionary-like data structure with some very nice properties: 4 | 5 | * Fixed-size key and values, no storage overhead 6 | * Ordered, allows fast in-order traversal, merging and intersections 7 | * Stored in an Erlang binary, making parallel no-copy reads possible, 8 | easy storage 9 | * O(log n) reads 10 | 11 | These properties makes Bisect a good fit for read-heavy 12 | workloads. Updates to the dictionary are expensive. On commodity 13 | multi-core machines it's possible to achieve millions of reads per 14 | second also with more than 100M keys. 15 | 16 | `bisect_server` is a gen_server wrapping a instance of Bisect for 17 | parallel no-copy reads. 18 | 19 | The API is a bit crap as it started out as a quick experiment and then 20 | people started using it, making it difficult to warrant fixing the 21 | API. 22 | 23 | ## Usage 24 | 25 | When creating a new Bisect you need to decide up front on the key and 26 | value size. This is great for storing many things of the same type, 27 | but not so good for different types. Let's say I want to use a single 28 | byte for both value and key, allowing me 256 unique keys. 29 | 30 | ```erlang 31 | 1> bisect:new(1, 1). 32 | {bindict,1,1,2,<<>>} 33 | 34 | %% Insert the byte 104 with value 10 35 | 2> bisect:insert(bisect:new(1, 1), <<104>>, <<10>>). 36 | {bindict,1,1,2,<<"h\n">>} 37 | 38 | 3> bisect:find(v(-1), <<104>>). 39 | <<"\n">> 40 | 41 | %% If the input parameters have the wrong size, insertion fails 42 | 4> catch bisect:insert(bisect:new(1, 1), <<104, 101>>, <<10>>). 43 | {'EXIT',{badarg,[{bisect,insert,3,[]},{lists,sort,2,[]}]}} 44 | 45 | %% Serialization 46 | 5> bisect:serialize(bisect:insert(bisect:new(1, 1), <<104>>, <<10>>)). 47 | <<131,104,5,100,0,7,98,105,110,100,105,99,116,97,1,97,1, 48 | 97,2,109,0,0,0,2,104,10>> 49 | 6> bisect:deserialize(v(-1)). 50 | {bindict,1,1,2,<<"h\n">>} 51 | 52 | %% Bulk insert, much more efficient than one insert at a time 53 | 7> bisect:bulk_insert(bisect:new(1, 1), [{<<104>>, <<10>>}, {<<101>>, <<10>>}]). 54 | {bindict,1,1,2,<<"h\ne\n">>} 55 | 56 | %% Curious how big memory you will use? 57 | 8> bisect:expected_size(bisect:new(1, 1), 255). 58 | 510 59 | 9> bisect:expected_size_mb(bisect:new(8, 1), 10000000). 60 | 85.8306884765625 61 | ``` 62 | 63 | It is up to the user to encode/decode keys and values in a way that 64 | makes sense, Bisect only stores the raw bytes you give as input. 65 | -------------------------------------------------------------------------------- /src/bisect_server.erl: -------------------------------------------------------------------------------- 1 | %% @doc: gen_server wrapping an instance of bisect, owns the bisect 2 | %% structure, serializes writes, hands out the reference to the bisect 3 | %% structure to concurrent readers. 4 | -module(bisect_server). 5 | -behaviour(gen_server). 6 | 7 | %% API 8 | -export([start_link/2, start_link/3, start_link_with_data/3, stop/1]). 9 | -export([get/2, first/1, last/1, next/2, next_nth/3, mget/2, mget_serial/2, 10 | insert/3, append/3, cas/4, inject/2, num_keys/1, delete/2]). 11 | 12 | %% gen_server callbacks 13 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 14 | terminate/2, code_change/3]). 15 | 16 | 17 | -record(state, {b}). 18 | 19 | -ifdef(TEST). 20 | -include_lib("proper/include/proper.hrl"). 21 | -include_lib("eunit/include/eunit.hrl"). 22 | -compile([export_all]). 23 | -endif. 24 | 25 | %%%=================================================================== 26 | %%% API 27 | %%%=================================================================== 28 | 29 | start_link_with_data(KeySize, ValueSize, Data) -> 30 | gen_server:start_link(?MODULE, [KeySize, ValueSize, Data], []). 31 | 32 | start_link(KeySize, ValueSize) -> 33 | gen_server:start_link(?MODULE, [KeySize, ValueSize], []). 34 | 35 | start_link(Name, KeySize, ValueSize) -> 36 | gen_server:start_link({local, Name}, ?MODULE, [KeySize, ValueSize], []). 37 | 38 | stop(Pid) -> 39 | gen_server:call(Pid, stop). 40 | 41 | get(Pid, K) -> 42 | {ok, B} = gen_server:call(Pid, get_b), 43 | {ok, bisect:find(B, K)}. 44 | 45 | first(Pid) -> 46 | {ok, B} = gen_server:call(Pid, get_b), 47 | {ok, bisect:first(B)}. 48 | 49 | last(Pid) -> 50 | {ok, B} = gen_server:call(Pid, get_b), 51 | {ok, bisect:last(B)}. 52 | 53 | next(Pid, K) -> 54 | {ok, B} = gen_server:call(Pid, get_b), 55 | {ok, bisect:next(B, K)}. 56 | 57 | next_nth(Pid, K, Steps) -> 58 | {ok, B} = gen_server:call(Pid, get_b), 59 | {ok, bisect:next_nth(B, K, Steps)}. 60 | 61 | mget(Pid, Keys) -> 62 | {ok, B} = gen_server:call(Pid, get_b), 63 | {ok, bisect:find_many(B, Keys)}. 64 | 65 | mget_serial(Pid, Keys) -> 66 | gen_server:call(Pid, {mget, Keys}). 67 | 68 | num_keys(Pid) -> 69 | {ok, B} = gen_server:call(Pid, get_b), 70 | {ok, bisect:num_keys(B)}. 71 | 72 | insert(Pid, K, V) -> 73 | gen_server:call(Pid, {insert, K, V}). 74 | 75 | append(Pid, K, V) -> 76 | gen_server:call(Pid, {append, K, V}). 77 | 78 | cas(Pid, K, OldV, V) -> 79 | gen_server:call(Pid, {cas, K, OldV, V}). 80 | 81 | inject(Pid, B) -> 82 | gen_server:call(Pid, {inject, B}). 83 | 84 | delete(Pid, K) -> 85 | gen_server:call(Pid, {delete, K}). 86 | 87 | %%%=================================================================== 88 | %%% gen_server callbacks 89 | %%%=================================================================== 90 | 91 | init([KeySize, ValueSize]) -> 92 | {ok, #state{b = bisect:new(KeySize, ValueSize)}}; 93 | 94 | init([KeySize, ValueSize, Data]) -> 95 | {ok, #state{b = bisect:new(KeySize, ValueSize, Data)}}. 96 | 97 | handle_call(get_b, _From, State) -> 98 | {reply, {ok, State#state.b}, State}; 99 | 100 | handle_call({insert, K, V}, _From, #state{b = B} = State) -> 101 | {reply, ok, State#state{b = bisect:insert(B, K, V)}}; 102 | 103 | handle_call({append, K, V}, _From, #state{b = B} = State) -> 104 | {reply, ok, State#state{b = bisect:append(B, K, V)}}; 105 | 106 | handle_call({inject, B}, _From, State) -> 107 | {reply, ok, State#state{b = B}}; 108 | 109 | handle_call({mget, Keys}, _From, State) -> 110 | {reply, {ok, bisect:find_many(State#state.b, Keys)}, State}; 111 | 112 | handle_call({delete, K}, _From, #state{b = B} = State) -> 113 | case catch bisect:delete(B, K) of 114 | {'EXIT', {badarg, _}} -> 115 | {reply, {error, badarg}, State}; 116 | NewB -> 117 | {reply, ok, State#state{b = NewB}} 118 | end; 119 | 120 | handle_call({cas, K, OldV, V}, _From, #state{b = B} = State) -> 121 | case catch bisect:cas(B, K, OldV, V) of 122 | {'EXIT', {badarg, _}} -> 123 | {reply, {error, badarg}, State}; 124 | NewB -> 125 | {reply, ok, State#state{b = NewB}} 126 | end; 127 | 128 | handle_call(stop, _From, State) -> 129 | {stop, normal, ok, State}. 130 | 131 | 132 | handle_cast(_Msg, State) -> 133 | {noreply, State}. 134 | 135 | handle_info(_Info, State) -> 136 | {noreply, State}. 137 | 138 | terminate(_Reason, _State) -> 139 | ok. 140 | 141 | code_change(_OldVsn, State, _Extra) -> 142 | {ok, State}. 143 | 144 | %% 145 | %% TESTS 146 | %% 147 | 148 | -ifdef(TEST). 149 | 150 | 151 | insert_test() -> 152 | {ok, S} = start_link(8, 1), 153 | ok = insert(S, <<1:64/integer>>, <<1>>), 154 | ok = insert(S, <<2:64/integer>>, <<2>>), 155 | ok = insert(S, <<3:64/integer>>, <<3>>), 156 | 157 | Keys = [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>], 158 | Values = [<<1>>, <<2>>, <<3>>], 159 | ?assertEqual({ok, Values}, mget(S, Keys)), 160 | ?assertEqual({ok, Values}, mget_serial(S, Keys)). 161 | 162 | 163 | cas_test() -> 164 | {ok, S} = start_link(8, 1), 165 | ok = insert(S, <<1:64/integer>>, <<1>>), 166 | {error, badarg} = cas(S, <<2:64/integer>>, <<2>>, <<2>>), 167 | ?assertEqual({ok, <<1>>}, get(S, <<1:64/integer>>)), 168 | 169 | ok = cas(S, <<1:64/integer>>, <<1>>, <<2>>), 170 | ?assertEqual({ok, <<2>>}, get(S, <<1:64/integer>>)), 171 | 172 | ok = cas(S, <<2:64/integer>>, not_found, <<2>>), 173 | ?assertEqual({ok, <<2>>}, get(S, <<2:64/integer>>)). 174 | 175 | 176 | 177 | inject_test() -> 178 | {ok, S} = start_link(8, 1), 179 | KeyPairs = lists:map(fun (I) -> {<>, <<97>>} end, 180 | lists:seq(1, 100000)), 181 | 182 | B = bisect:from_orddict(bisect:new(8, 1), KeyPairs), 183 | 184 | Key = <<20:64/integer>>, 185 | ?assertEqual({ok, not_found}, get(S, Key)), 186 | ok = inject(S, B), 187 | ?assertEqual({ok, <<97>>}, get(S, Key)). 188 | 189 | 190 | proper_test() -> 191 | ?assert(proper:quickcheck(?MODULE:prop_bisect())). 192 | 193 | 194 | -record(prop__state, {keys = []}). 195 | 196 | prop_bisect() -> 197 | ?FORALL(Cmds, commands(?MODULE), 198 | ?TRAPEXIT( 199 | begin 200 | {ok, S} = start_link(prop, 8, 1), 201 | 202 | {History,State,Result} = run_commands(?MODULE, Cmds), 203 | stop(S), 204 | 205 | ?WHENFAIL(io:format("History: ~p\nState: ~p\nResult: ~p\n", 206 | [History, State, Result]), 207 | aggregate(command_names(Cmds), Result =:= ok)) 208 | end)). 209 | 210 | prop__key() -> 211 | elements(prop__keys()). 212 | 213 | prop__value() -> 214 | elements(prop__values()). 215 | 216 | prop__keys() -> 217 | [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>]. 218 | 219 | prop__values() -> 220 | [<<1:8/integer>>, <<2:8/integer>>, <<3:8/integer>>]. 221 | 222 | 223 | command(_S) -> 224 | oneof([{call, ?MODULE, insert, [prop, prop__key(), prop__value()]}, 225 | {call, ?MODULE, get, [prop, prop__key()]}, 226 | {call, ?MODULE, mget, [prop, prop__keys()]}, 227 | {call, ?MODULE, delete, [prop, prop__key()]} 228 | ]). 229 | 230 | initial_state() -> 231 | #prop__state{keys = []}. 232 | 233 | precondition(_, _) -> 234 | true. 235 | 236 | next_state(S, _, {call, _, insert, [_, Key, Value]}) -> 237 | S#prop__state{keys = lists:keystore(Key, 1, S#prop__state.keys, {Key, Value})}; 238 | 239 | next_state(S, _, {call, _, delete, [_, Key]}) -> 240 | S#prop__state{keys = lists:keydelete(Key, 1, S#prop__state.keys)}; 241 | 242 | next_state(S, _, _) -> 243 | S. 244 | 245 | 246 | postcondition(S, {call, _, get, [_, Key]}, {ok, not_found}) -> 247 | not lists:keymember(Key, 1, S#prop__state.keys); 248 | 249 | postcondition(S, {call, _, get, [_, Key]}, {ok, Value}) -> 250 | case lists:keyfind(Key, 1, S#prop__state.keys) of 251 | {Key, Value} -> 252 | true; 253 | _ -> 254 | false 255 | end; 256 | 257 | postcondition(S, {call, _, mget, [_, Keys]}, {ok, Values}) -> 258 | lists:all( 259 | fun (V) -> V =:= true end, 260 | lists:map( 261 | fun ({Key, not_found}) -> 262 | not lists:keymember(Key, 1, S#prop__state.keys); 263 | ({Key, Value}) -> 264 | {Key, Value} =:= lists:keyfind(Key, 1, S#prop__state.keys) 265 | end, lists:zip(Keys, Values))); 266 | 267 | 268 | postcondition(S, {call, _, delete, [_, Key]}, ok) -> 269 | lists:keymember(Key, 1, S#prop__state.keys); 270 | 271 | postcondition(S, {call, _, delete, [_, Key]}, {error, badarg}) -> 272 | not lists:keymember(Key, 1, S#prop__state.keys); 273 | 274 | postcondition(_S, {call, _, insert, _}, _) -> 275 | true. 276 | 277 | -endif. 278 | -------------------------------------------------------------------------------- /src/bisect.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Space-efficient dictionary implemented using a binary 2 | %% 3 | %% This module implements a space-efficient dictionary with no 4 | %% overhead per entry. Read and write access is O(log n). 5 | %% 6 | %% Keys and values are fixed size binaries stored ordered in a larger 7 | %% binary which acts as a sparse array. All operations are implemented 8 | %% using a binary search. 9 | %% 10 | %% As large binaries can be shared among processes, there can be 11 | %% multiple concurrent readers of an instance of this structure. 12 | %% 13 | %% serialize/1 and deserialize/1 14 | -module(bisect). 15 | -author('Knut Nesheim '). 16 | 17 | -export([new/2, new/3, insert/3, bulk_insert/2, append/3, find/2, foldl/3]). 18 | -export([next/2, next_nth/3, first/1, last/1, delete/2, compact/1, cas/4, update/4]). 19 | -export([serialize/1, deserialize/1, from_orddict/2, to_orddict/1, find_many/2]). 20 | -export([merge/2, intersection/1, intersection/2]). 21 | -export([expected_size/2, expected_size_mb/2, num_keys/1, size/1]). 22 | 23 | -compile({no_auto_import, [size/1]}). 24 | -compile(native). 25 | 26 | -ifdef(TEST). 27 | -include_lib("eunit/include/eunit.hrl"). 28 | -endif. 29 | 30 | 31 | %% 32 | %% TYPES 33 | %% 34 | 35 | -type key_size() :: pos_integer(). 36 | -type value_size() :: pos_integer(). 37 | -type block_size() :: pos_integer(). 38 | 39 | -type key() :: binary(). 40 | -type value() :: binary(). 41 | 42 | -type index() :: pos_integer(). 43 | 44 | -record(bindict, { 45 | key_size :: key_size(), 46 | value_size :: value_size(), 47 | block_size :: block_size(), 48 | b :: binary() 49 | }). 50 | -type bindict() :: #bindict{}. 51 | 52 | 53 | %% 54 | %% API 55 | %% 56 | 57 | -spec new(key_size(), value_size()) -> bindict(). 58 | %% @doc: Returns a new empty dictionary where where the keys and 59 | %% values will always be of the given size. 60 | new(KeySize, ValueSize) when is_integer(KeySize) 61 | andalso is_integer(ValueSize) -> 62 | new(KeySize, ValueSize, <<>>). 63 | 64 | -spec new(key_size(), value_size(), binary()) -> bindict(). 65 | %% @doc: Returns a new dictionary with the given data 66 | new(KeySize, ValueSize, Data) when is_integer(KeySize) 67 | andalso is_integer(ValueSize) 68 | andalso is_binary(Data) -> 69 | #bindict{key_size = KeySize, 70 | value_size = ValueSize, 71 | block_size = KeySize + ValueSize, 72 | b = Data}. 73 | 74 | 75 | -spec insert(bindict(), key(), value()) -> bindict(). 76 | %% @doc: Inserts the key and value into the dictionary. If the size of 77 | %% key and value is wrong, throws badarg. If the key is already in the 78 | %% array, the value is updated. 79 | insert(B, K, V) when byte_size(K) =/= B#bindict.key_size orelse 80 | byte_size(V) =/= B#bindict.value_size -> 81 | erlang:error(badarg); 82 | 83 | insert(#bindict{b = <<>>} = B, K, V) -> 84 | B#bindict{b = <>}; 85 | 86 | insert(B, K, V) -> 87 | Index = index(B, K), 88 | LeftOffset = Index * B#bindict.block_size, 89 | RightOffset = byte_size(B#bindict.b) - LeftOffset, 90 | 91 | KeySize = B#bindict.key_size, 92 | ValueSize = B#bindict.value_size, 93 | 94 | case B#bindict.b of 95 | <> -> 96 | B#bindict{b = iolist_to_binary([Left, K, V, Right])}; 97 | 98 | <> -> 99 | B#bindict{b = iolist_to_binary([Left, K, V, Right])} 100 | end. 101 | 102 | %% @doc: Update the value stored under the key by calling F on the old 103 | %% value to get a new value. If the key is not present, initial will 104 | %% be stored as the first value. Same as dict:update/4. Note: find and 105 | %% insert requires two binary searches in the binary, while update 106 | %% only needs one. It's as close to in-place update we can get in pure 107 | %% Erlang. 108 | update(B, K, Initial, F) when byte_size(K) =/= B#bindict.key_size orelse 109 | byte_size(Initial) =/= B#bindict.value_size orelse 110 | not is_function(F) -> 111 | erlang:error(badarg); 112 | 113 | update(B, K, Initial, F) -> 114 | Index = index(B, K), 115 | LeftOffset = Index * B#bindict.block_size, 116 | RightOffset = byte_size(B#bindict.b) - LeftOffset, 117 | 118 | KeySize = B#bindict.key_size, 119 | ValueSize = B#bindict.value_size, 120 | 121 | case B#bindict.b of 122 | <> -> 123 | case F(OldV) of 124 | OldV -> 125 | B; 126 | NewV -> 127 | byte_size(NewV) =:= ValueSize orelse erlang:error(badarg), 128 | B#bindict{b = iolist_to_binary([Left, K, NewV, Right])} 129 | end; 130 | 131 | <> -> 132 | B#bindict{b = iolist_to_binary([Left, K, Initial, Right])} 133 | end. 134 | 135 | -spec append(bindict(), key(), value()) -> bindict(). 136 | %% @doc: Append a key and value. This is only useful if the key is known 137 | %% to be larger than any other key. Otherwise it will corrupt the bindict. 138 | append(B, K, V) when byte_size(K) =/= B#bindict.key_size orelse 139 | byte_size(V) =/= B#bindict.value_size -> 140 | erlang:error(badarg); 141 | 142 | append(B, K, V) -> 143 | case last(B) of 144 | {KLast, _} when K =< KLast -> 145 | erlang:error(badarg); 146 | _ -> 147 | Bin = B#bindict.b, 148 | B#bindict{b = <>} 149 | end. 150 | 151 | -spec cas(bindict(), key(), value() | 'not_found', value()) -> bindict(). 152 | %% @doc: Check-and-set operation. If 'not_found' is specified as the 153 | %% old value, the key should not exist in the array. Provided for use 154 | %% by bisect_server. 155 | cas(B, K, OldV, V) -> 156 | case find(B, K) of 157 | OldV -> 158 | insert(B, K, V); 159 | _OtherV -> 160 | error(badarg) 161 | end. 162 | 163 | 164 | -spec find(bindict(), key()) -> value() | not_found. 165 | %% @doc: Returns the value associated with the key or 'not_found' if 166 | %% there is no such key. 167 | find(B, K) -> 168 | case at(B, index(B, K)) of 169 | {K, Value} -> Value; 170 | {_OtherK, _} -> not_found; 171 | not_found -> not_found 172 | end. 173 | 174 | -spec find_many(bindict(), [key()]) -> [value() | not_found]. 175 | find_many(B, Keys) -> 176 | lists:map(fun (K) -> find(B, K) end, Keys). 177 | 178 | -spec delete(bindict(), key()) -> bindict(). 179 | delete(B, K) -> 180 | LeftOffset = index2offset(B, index(B, K)), 181 | KeySize = B#bindict.key_size, 182 | ValueSize = B#bindict.value_size, 183 | 184 | case B#bindict.b of 185 | <> -> 186 | B#bindict{b = <>}; 187 | _ -> 188 | erlang:error(badarg) 189 | end. 190 | 191 | -spec next(bindict(), key()) -> {key(), value()} | not_found. 192 | %% @doc: Returns the next larger key and value associated with it or 193 | %% 'not_found' if no larger key exists. 194 | next(B, K) -> 195 | next_nth(B, K, 1). 196 | 197 | %% @doc: Returns the nth next larger key and value associated with it 198 | %% or 'not_found' if it does not exist. 199 | -spec next_nth(bindict(), key(), non_neg_integer()) -> value() | not_found. 200 | next_nth(B, K, Steps) -> 201 | at(B, index(B, inc(K)) + Steps - 1). 202 | 203 | 204 | 205 | -spec first(bindict()) -> {key(), value()} | not_found. 206 | %% @doc: Returns the first key-value pair or 'not_found' if the dict is empty 207 | first(B) -> 208 | at(B, 0). 209 | 210 | -spec last(bindict()) -> {key(), value()} | not_found. 211 | %% @doc: Returns the last key-value pair or 'not_found' if the dict is empty 212 | last(B) -> 213 | at(B, num_keys(B) - 1). 214 | 215 | -spec foldl(bindict(), fun(), any()) -> any(). 216 | foldl(B, F, Acc) -> 217 | case first(B) of 218 | {Key, Value} -> 219 | do_foldl(B, F, Key, F(Key, Value, Acc)); 220 | not_found -> 221 | [] 222 | end. 223 | 224 | do_foldl(B, F, PrevKey, Acc) -> 225 | case next(B, PrevKey) of 226 | {Key, Value} -> 227 | do_foldl(B, F, Key, F(Key, Value, Acc)); 228 | not_found -> 229 | Acc 230 | end. 231 | 232 | 233 | %% @doc: Compacts the internal binary used for storage, by creating a 234 | %% new copy where all the data is aligned in memory. Writes will cause 235 | %% fragmentation. 236 | compact(B) -> 237 | B#bindict{b = binary:copy(B#bindict.b)}. 238 | 239 | %% @doc: Returns how many bytes would be used by the structure if it 240 | %% was storing NumKeys. 241 | expected_size(B, NumKeys) -> 242 | B#bindict.block_size * NumKeys. 243 | 244 | expected_size_mb(B, NumKeys) -> 245 | expected_size(B, NumKeys) / 1024 / 1024. 246 | 247 | -spec num_keys(bindict()) -> integer(). 248 | %% @doc: Returns the number of keys in the dictionary 249 | num_keys(B) -> 250 | byte_size(B#bindict.b) div B#bindict.block_size. 251 | 252 | size(#bindict{b = B}) -> 253 | erlang:byte_size(B). 254 | 255 | 256 | -spec serialize(bindict()) -> binary(). 257 | %% @doc: Returns a binary representation of the dictionary which can 258 | %% be deserialized later to recreate the same structure. 259 | serialize(#bindict{} = B) -> 260 | term_to_binary(B). 261 | 262 | -spec deserialize(binary()) -> bindict(). 263 | deserialize(Bin) -> 264 | case binary_to_term(Bin) of 265 | #bindict{} = B -> 266 | B; 267 | _ -> 268 | erlang:error(badarg) 269 | end. 270 | 271 | %% @doc: Insert a batch of key-value pairs into the dictionary. A new 272 | %% binary is only created once, making it much cheaper than individual 273 | %% calls to insert/2. The input list must be sorted. 274 | bulk_insert(#bindict{} = B, Orddict) -> 275 | L = do_bulk_insert(B, B#bindict.b, [], Orddict), 276 | B#bindict{b = iolist_to_binary(lists:reverse(L))}. 277 | 278 | do_bulk_insert(_B, Bin, Acc, []) -> 279 | [Bin | Acc]; 280 | do_bulk_insert(B, Bin, Acc, [{Key, Value} | Rest]) -> 281 | {Left, Right} = split_at(Bin, B#bindict.key_size, B#bindict.value_size, Key, 0), 282 | do_bulk_insert(B, Right, [Value, Key, Left | Acc], Rest). 283 | 284 | split_at(Bin, KeySize, ValueSize, Key, I) -> 285 | LeftOffset = I * (KeySize + ValueSize), 286 | case Bin of 287 | Bin when byte_size(Bin) < LeftOffset -> 288 | {Bin, <<>>}; 289 | 290 | <> -> 293 | {Left, Right}; 294 | 295 | <> when OtherKey > Key -> 298 | NewRight = <>, 299 | {Left, NewRight}; 300 | _ -> 301 | split_at(Bin, KeySize, ValueSize, Key, I+1) 302 | end. 303 | 304 | merge(Small, Big) -> 305 | Small#bindict.block_size =:= Big#bindict.block_size 306 | orelse erlang:error(badarg), 307 | 308 | L = do_merge(Small#bindict.b, Big#bindict.b, [], 309 | Big#bindict.key_size, Big#bindict.value_size), 310 | Big#bindict{b = iolist_to_binary(L)}. 311 | 312 | do_merge(Small, Big, Acc, KeySize, ValueSize) -> 313 | case Small of 314 | <> -> 315 | {LeftBig, RightBig} = split_at(Big, KeySize, ValueSize, Key, 0), 316 | do_merge(RestSmall, RightBig, [Value, Key, LeftBig | Acc], 317 | KeySize, ValueSize); 318 | <<>> -> 319 | lists:reverse([Big | Acc]) 320 | end. 321 | 322 | %% @doc: Intersect two or more bindicts by key. The resulting bindict 323 | %% contains keys found in all input bindicts. 324 | intersection(Bs) when length(Bs) >= 2 -> 325 | intersection(Bs, svs); 326 | intersection(_TooFewSets) -> 327 | erlang:error(badarg). 328 | 329 | %% @doc: SvS set intersection algorithm, as described in 330 | %% http://www.cs.toronto.edu/~tl/papers/fiats.pdf 331 | intersection(Bs, svs) -> 332 | [CandidateSet | Sets] = lists:sort(fun (A, B) -> size(A) =< size(B) end, Bs), 333 | from_orddict(new(CandidateSet#bindict.key_size, 334 | CandidateSet#bindict.value_size), 335 | do_svs(Sets, CandidateSet)). 336 | 337 | do_svs([], Candidates) -> 338 | Candidates; 339 | do_svs([Set | Sets], #bindict{} = Candidates) -> 340 | %% Optimization: we let the candidate set remain a bindict for the 341 | %% first iteration to avoid creating a large orddict just to throw 342 | %% most of it away. For the remainding sets, we keep the candidate 343 | %% set as a list 344 | {_, NewCandidatesList} = 345 | foldl(Candidates, 346 | fun (K, V, {L, Acc}) -> 347 | Size = byte_size(Set#bindict.b) div Set#bindict.block_size, 348 | Rank = index(Set, L, Size, K), 349 | %% TODO: Skip candidates until OtherK? 350 | case at(Set, Rank) of 351 | {K, _} -> {Rank, [{K, V} | Acc]}; 352 | {_OtherK, _} -> {Rank, Acc}; 353 | not_found -> {Rank, Acc} 354 | end 355 | end, {0, []}), 356 | do_svs(Sets, lists:reverse(NewCandidatesList)); 357 | 358 | do_svs([Set | Sets], Candidates) when is_list(Candidates) -> 359 | {_, NewCandidates} = 360 | lists:foldl(fun ({K, V}, {L, Acc}) -> 361 | Size = byte_size(Set#bindict.b) div Set#bindict.block_size, 362 | Rank = index(Set, L, Size, K), 363 | case at(Set, Rank) of 364 | {K, _} -> {Rank, [{K, V} | Acc]}; 365 | {_OtherK, _} -> {Rank, Acc}; 366 | not_found -> {Rank, Acc} 367 | end 368 | end, {0, []}, Candidates), 369 | do_svs(Sets, lists:reverse(NewCandidates)). 370 | 371 | at(B, I) -> 372 | Offset = index2offset(B, I), 373 | KeySize = B#bindict.key_size, 374 | ValueSize = B#bindict.value_size, 375 | case B#bindict.b of 376 | <<_:Offset/binary, Key:KeySize/binary, Value:ValueSize/binary, _/binary>> -> 377 | {Key, Value}; 378 | _ -> 379 | not_found 380 | end. 381 | 382 | 383 | %% @doc: Populates the dictionary with data from the orddict, taking 384 | %% advantage of the fact that it is already ordered. The given bindict 385 | %% must be empty, but contain size parameters. 386 | from_orddict(#bindict{b = <<>>} = B, Orddict) -> 387 | KeySize = B#bindict.key_size, 388 | ValueSize = B#bindict.value_size, 389 | L = orddict:fold(fun (K, V, Acc) 390 | when byte_size(K) =:= B#bindict.key_size andalso 391 | byte_size(V) =:= B#bindict.value_size -> 392 | [<> | Acc]; 393 | (_, _, _) -> 394 | erlang:error(badarg) 395 | end, [], Orddict), 396 | B#bindict{b = iolist_to_binary(lists:reverse(L))}. 397 | 398 | to_orddict(#bindict{} = B) -> 399 | lists:reverse( 400 | foldl(B, fun (Key, Value, Acc) -> 401 | [{Key, Value} | Acc] 402 | end, [])). 403 | 404 | 405 | %% 406 | %% INTERNAL HELPERS 407 | %% 408 | 409 | index2offset(_, 0) -> 0; 410 | index2offset(B, I) -> I * B#bindict.block_size. 411 | 412 | %% @doc: Uses binary search to find the index of the given key. If the 413 | %% key does not exist, the index where it should be inserted is 414 | %% returned. 415 | -spec index(bindict(), key()) -> index(). 416 | index(<<>>, _) -> 417 | 0; 418 | index(B, K) -> 419 | N = byte_size(B#bindict.b) div B#bindict.block_size, 420 | index(B, 0, N, K). 421 | 422 | index(_B, Low, High, _K) when High =:= Low -> 423 | Low; 424 | 425 | index(_B, Low, High, _K) when High < Low -> 426 | -1; 427 | 428 | index(B, Low, High, K) -> 429 | Mid = (Low + High) div 2, 430 | MidOffset = index2offset(B, Mid), 431 | 432 | KeySize = B#bindict.key_size, 433 | case byte_size(B#bindict.b) > MidOffset of 434 | true -> 435 | <<_:MidOffset/binary, MidKey:KeySize/binary, _/binary>> = B#bindict.b, 436 | 437 | if 438 | MidKey > K -> 439 | index(B, Low, Mid, K); 440 | MidKey < K -> 441 | index(B, Mid + 1, High, K); 442 | MidKey =:= K -> 443 | Mid 444 | end; 445 | false -> 446 | Mid 447 | end. 448 | 449 | inc(B) -> 450 | IncInt = binary:decode_unsigned(B) + 1, 451 | SizeBits = erlang:size(B) * 8, 452 | <>. 453 | 454 | %% 455 | %% TEST 456 | %% 457 | -ifdef(TEST). 458 | 459 | 460 | -define(i2k(I), <>). 461 | -define(i2v(I), <>). 462 | -define(b2i(B), list_to_integer(binary_to_list(B))). 463 | 464 | new_with_data_test() -> 465 | Dict = insert_many(new(8, 1), [{2, 2}, {4, 4}, {1, 1}, {3, 3}]), 466 | ?assertEqual(Dict, new(8, 1, Dict#bindict.b)). 467 | 468 | insert_test() -> 469 | insert_many(new(8, 1), [{2, 2}, {4, 4}, {1, 1}, {3, 3}]). 470 | 471 | sorted_insert_test() -> 472 | B = insert_many(new(8, 1), [{1, 1}, {2, 2}, {3, 3}, {4, 4}]), 473 | ?assertEqual(<<1:64/integer, 1, 2:64/integer, 2, 474 | 3:64/integer, 3, 4:64/integer, 4>>, B#bindict.b). 475 | 476 | index_test() -> 477 | B = #bindict{key_size = 8, value_size = 1, block_size = 9, 478 | b = <<0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,2,2>>}, 479 | ?assertEqual(0, index(B, <<1:64/integer>>)), 480 | ?assertEqual(1, index(B, <<2:64/integer>>)), 481 | ?assertEqual(2, index(B, <<3:64/integer>>)), 482 | ?assertEqual(2, index(B, <<100:64/integer>>)). 483 | 484 | find_test() -> 485 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 486 | ?assertEqual(<<3:8/integer>>, find(B, <<3:64/integer>>)). 487 | 488 | find_non_existing_test() -> 489 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 490 | ?assertEqual(not_found, find(B, ?i2k(4))). 491 | 492 | find_many_test() -> 493 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 494 | find_many(B, [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>]). 495 | 496 | insert_overwrite_test() -> 497 | B = insert_many(new(8, 1), [{2, 2}]), 498 | ?assertEqual(<<2>>, find(B, <<2:64/integer>>)), 499 | B2 = insert(B, <<2:64/integer>>, <<4>>), 500 | ?assertEqual(<<4>>, find(B2, <<2:64/integer>>)). 501 | 502 | update_test() -> 503 | B = insert_many(new(8, 1), [{2, 2}]), 504 | B2 = update(B, <<2:64/integer>>, <<4>>, fun (Old) -> 505 | ?assertEqual(Old, <<2>>), 506 | <<5>> 507 | end), 508 | ?assertEqual(<<5>>, find(B2, <<2:64/integer>>)), 509 | B3 = update(B2, <<3:64/integer>>, <<3>>, fun (_) -> 510 | throw(unexpected_call) 511 | end), 512 | ?assertEqual(<<3>>, find(B3, <<3:64/integer>>)). 513 | 514 | append_test() -> 515 | KV1 = {<<2:64>>, <<2:8>>}, 516 | {K2, V2} = {<<3:64>>, <<3:8>>}, 517 | B = insert_many(new(8, 1), [KV1]), 518 | ?assertError(badarg, append(B, <<1:64>>, V2)), 519 | ?assertError(badarg, append(B, <<2:64>>, V2)), 520 | B2 = append(B, K2, V2), 521 | ?assertEqual(V2, find(B2, K2)). 522 | 523 | next_test() -> 524 | KV1 = {<<2:64>>, <<2:8>>}, 525 | KV2 = {<<3:64>>, <<3:8>>}, 526 | B = insert_many(new(8, 1), [KV1, KV2]), 527 | ?assertEqual(KV1, next(B, <<0:64>>)), 528 | ?assertEqual(KV1, next(B, <<1:64>>)), 529 | ?assertEqual(KV2, next(B, <<2:64>>)), 530 | ?assertEqual(not_found, next(B, <<3:64>>)). 531 | 532 | next_nth_test() -> 533 | KV1 = {<<2:64>>, <<2:8>>}, 534 | KV2 = {<<3:64>>, <<3:8>>}, 535 | B = insert_many(new(8, 1), [KV1, KV2]), 536 | ?assertEqual(KV1, next_nth(B, <<0:64>>, 1)), 537 | ?assertEqual(KV2, next_nth(B, <<0:64>>, 2)), 538 | ?assertEqual(KV2, next_nth(B, <<2:64>>, 1)), 539 | ?assertEqual(not_found, next_nth(B, <<2:64>>, 2)), 540 | ?assertEqual(not_found, next_nth(B, <<3:64>>, 1)). 541 | 542 | first_test() -> 543 | KV1 = {K1, V1} = {<<2:64>>, <<2:8>>}, 544 | _KV2 = {K2, V2} = {<<3:64>>, <<3:8>>}, 545 | B1 = new(8, 1), 546 | ?assertEqual(not_found, first(B1)), 547 | B2 = insert(B1, K1, V1), 548 | ?assertEqual(KV1, first(B2)), 549 | B3 = insert(B2, K2, V2), 550 | ?assertEqual(KV1, first(B3)). 551 | 552 | last_test() -> 553 | KV1 = {K1, V1} = {<<2:64>>, <<2:8>>}, 554 | KV2 = {K2, V2} = {<<3:64>>, <<3:8>>}, 555 | B1 = new(8, 1), 556 | ?assertEqual(not_found, last(B1)), 557 | ?assertEqual(0, num_keys(B1)), 558 | ?assertEqual(not_found, at(B1, 0)), 559 | ?assertEqual(not_found, at(B1, -1)), 560 | ?assertEqual(not_found, at(B1, 1)), 561 | B2 = insert(B1, K1, V1), 562 | ?assertEqual(KV1, last(B2)), 563 | B3 = insert(B2, K2, V2), 564 | ?assertEqual(KV2, last(B3)). 565 | 566 | delete_test() -> 567 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 568 | ?assertEqual(<<2:8/integer>>, find(B, ?i2k(2))), 569 | 570 | NewB = delete(B, ?i2k(2)), 571 | ?assertEqual(not_found, find(NewB, ?i2k(2))). 572 | 573 | delete_non_existing_test() -> 574 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 575 | ?assertError(badarg, delete(B, ?i2k(4))). 576 | 577 | foldl_test() -> 578 | B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]), 579 | ?assertEqual(2+3+1, foldl(B, fun (_, <>, Acc) -> V + Acc end, 0)), 580 | ?assertEqual([], foldl(new(8, 1), fun (I, V, Acc) -> [{I, V} | Acc] end, [])). 581 | 582 | 583 | size_test() -> 584 | Start = 100000000000000, 585 | N = 1000, 586 | Spread = 1, 587 | KeyPairs = lists:map(fun (I) -> {I, 255} end, 588 | lists:seq(Start, Start+(N*Spread), Spread)), 589 | 590 | B = insert_many(new(8, 1), KeyPairs), 591 | ?assertEqual(N+Spread, num_keys(B)). 592 | 593 | serialize_test() -> 594 | KeyPairs = lists:map(fun (I) -> {I, 255} end, lists:seq(1, 100)), 595 | B = insert_many(new(8, 1), KeyPairs), 596 | ?assertEqual(B, deserialize(serialize(B))). 597 | 598 | from_orddict_test() -> 599 | Orddict = orddict:from_list([{<<1:64/integer>>, <<255:8/integer>>}]), 600 | ?assertEqual(<<255>>, find(from_orddict(new(8, 1), Orddict), <<1:64/integer>>)). 601 | 602 | 603 | intersection_test() -> 604 | Sets = [insert_many(new(8, 1), [{1, 1}, {2, 2}, {3, 3}]), 605 | insert_many(new(8, 1), [{1, 1}, {2, 3}, {4, 4}]), 606 | insert_many(new(8, 1), [{1, 1}, {2, 3}, {5, 5}]), 607 | insert_many(new(8, 1), [{1, 1}, {2, 3}, {6, 6}])], 608 | 609 | Intersection = intersection(Sets), 610 | ?assertEqual(to_orddict(insert_many(new(8, 1), [{1, 1}, {2, 2}])), 611 | to_orddict(Intersection)). 612 | 613 | 614 | intersection_perf_test_() -> 615 | {timeout, 600, ?_test(intersection_perf())}. 616 | 617 | intersection_perf() -> 618 | TestCases = [{[1000, 1000], 10}, 619 | {[100000, 100000, 100000], 1000}, 620 | {[10000, 100000, 1000000], 1000}, 621 | {[1000000, 1000000, 1000000], 10000} 622 | ], 623 | 624 | lists:foreach( 625 | fun ({SetSizes, IntersectionSize}) -> 626 | UnionSize = lists:sum([SetSize - IntersectionSize 627 | || SetSize <- SetSizes]) + IntersectionSize, 628 | KVs = lists:map(fun (K) -> {<>, <<97:32/integer>>} end, 629 | generate_unique(UnionSize)), 630 | ?assertEqual(UnionSize, sets:size(sets:from_list(KVs))), 631 | 632 | {IntersectionKeys, Rest} = lists:split(IntersectionSize, KVs), 633 | {SetKeys, []} = lists:mapfoldl(fun (Size, AccRest) -> 634 | lists:split(Size - IntersectionSize, 635 | AccRest) 636 | end, Rest, SetSizes), 637 | ?assertEqual(IntersectionSize, length(IntersectionKeys)), 638 | 639 | SetIntersection = sets:intersection( 640 | [sets:from_list(Ks ++ IntersectionKeys) 641 | || Ks <- SetKeys]), 642 | ?assertEqual(IntersectionSize, sets:size(SetIntersection)), 643 | 644 | Bisects = lists:map(fun (Ks) -> 645 | AllKeys = orddict:from_list( 646 | Ks ++ IntersectionKeys), 647 | from_orddict(new(36, 4), AllKeys) 648 | end, SetKeys), 649 | {IntersectUs, BisectIntersection} = timer:tc( 650 | fun () -> intersection(Bisects) end), 651 | IntersectingKeys = to_orddict(BisectIntersection), 652 | ?assertEqual(length(lists:sort(sets:to_list(SetIntersection))), 653 | length(lists:sort(IntersectingKeys))), 654 | ?assertEqual(lists:sort(sets:to_list(SetIntersection)), 655 | lists:sort(IntersectingKeys)), 656 | error_logger:info_msg("Set sizes: ~p, Intersection size: ~p~n" 657 | "Intersection runtime: ~.2f ms~n", 658 | [SetSizes, IntersectionSize, 659 | IntersectUs / 1000]), 660 | 661 | ok 662 | end, TestCases). 663 | 664 | 665 | generate_unique(N) -> 666 | RandomGenerator = fun () -> crypto:rand_bytes(36) end, 667 | generate_unique(RandomGenerator, [], N). 668 | 669 | generate_unique(RandomGenerator, Acc, N) -> 670 | case length(Acc) =:= N of 671 | true -> 672 | Acc; 673 | false -> 674 | Gen = fun (_, 0) -> []; 675 | (F, M) -> [RandomGenerator() | F(F, M-1)] 676 | end, 677 | Uniques = lists:usort(Gen(Gen, N - length(Acc))), 678 | generate_unique(RandomGenerator, Acc ++ Uniques, N) 679 | end. 680 | 681 | 682 | speed_test_() -> 683 | {timeout, 600, 684 | fun() -> 685 | Start = 100000000000000, 686 | N = 100000, 687 | Keys = lists:seq(Start, Start+N), 688 | KeyValuePairs = lists:map(fun (I) -> {<>, <<255:8/integer>>} end, 689 | Keys), 690 | 691 | %% Will mostly be unique, if N is bigger than 10000 692 | ReadKeys = [lists:nth(random:uniform(N), Keys) || _ <- lists:seq(1, 1000)], 693 | B = from_orddict(new(8, 1), KeyValuePairs), 694 | time_reads(B, N, ReadKeys) 695 | end}. 696 | 697 | 698 | insert_speed_test_() -> 699 | {timeout, 600, 700 | fun() -> 701 | Start = 100000000000000, 702 | N = 10000, 703 | Keys = lists:seq(Start, Start+N), 704 | KeyValuePairs = lists:map(fun (I) -> {<>, <<255:8/integer>>} end, 705 | Keys), 706 | ReadKeys = [lists:nth(random:uniform(N), Keys) || _ <- lists:seq(1, 1000)], 707 | 708 | StartTime = now(), 709 | B = lists:foldl(fun ({K, V}, B) -> 710 | insert(B, K, V) 711 | end, new(8, 1), KeyValuePairs), 712 | ElapsedUs = timer:now_diff(now(), StartTime), 713 | error_logger:info_msg("insert in ~p ms, ~p us per key~n", 714 | [ElapsedUs / 1000, 715 | ElapsedUs / N 716 | ]), 717 | time_reads(B, N, ReadKeys) 718 | end}. 719 | 720 | 721 | time_reads(B, Size, ReadKeys) -> 722 | Parent = self(), 723 | spawn( 724 | fun() -> 725 | Runs = 100, 726 | Timings = 727 | lists:map( 728 | fun (_) -> 729 | StartTime = now(), 730 | find_many(B, ReadKeys), 731 | timer:now_diff(now(), StartTime) 732 | end, lists:seq(1, Runs)), 733 | 734 | Rps = 1000000 / ((lists:sum(Timings) / length(Timings)) / length(ReadKeys)), 735 | error_logger:info_msg("Average over ~p runs, ~p keys in dict~n" 736 | "Average fetch ~p keys: ~p us, max: ~p us~n" 737 | "Average fetch 1 key: ~p us~n" 738 | "Theoretical sequential RPS: ~w~n", 739 | [Runs, Size, length(ReadKeys), 740 | lists:sum(Timings) / length(Timings), 741 | lists:max(Timings), 742 | (lists:sum(Timings) / length(Timings)) / length(ReadKeys), 743 | trunc(Rps)]), 744 | 745 | Parent ! done 746 | end), 747 | receive done -> ok after 1000 -> ok end. 748 | 749 | 750 | time_write_test_() -> 751 | {timeout, 600, 752 | fun() -> 753 | Fun = fun(N , B) -> 754 | insert(B, <>, <<255:8/integer>>) 755 | end, 756 | start_time_interval("Insert", Fun, new(8, 1), 1000, 20000) 757 | end 758 | }. 759 | 760 | time_write_and_read_test_() -> 761 | {timeout, 600, 762 | fun() -> 763 | Fun = fun(Count, B) -> 764 | KInt = random:uniform(Count), 765 | find(B, <>), 766 | insert(B, <>, <<255:8/integer>>) 767 | end, 768 | start_time_interval("Insert and find", Fun, new(8, 1), 1000, 10000) 769 | end 770 | }. 771 | 772 | time_appends_test_() -> 773 | {timeout, 600, 774 | fun() -> 775 | Fun = fun(Count, B) -> 776 | append(B, <>, <<255:8/integer>>) 777 | end, 778 | start_time_interval("Append", Fun, new(8, 1), 1000, 50000) 779 | end 780 | }. 781 | 782 | time_appends_and_find_test_() -> 783 | {timeout, 600, 784 | fun() -> 785 | Fun = fun(Count, B) -> 786 | KInt = random:uniform(Count), 787 | find(B, <>), 788 | append(B, <>, <<255:8/integer>>) 789 | end, 790 | start_time_interval("Append and find", Fun, new(8, 1), 1000, 50000) 791 | end 792 | }. 793 | 794 | time_appends_and_next_test_() -> 795 | {timeout, 600, 796 | fun() -> 797 | Fun = fun(Count , B) -> 798 | KInt = random:uniform(Count), 799 | next(B, <>), 800 | append(B, <>, <<255:8/integer>>) 801 | end, 802 | start_time_interval("Append and next", Fun, new(8, 1), 1000, 50000) 803 | end 804 | }. 805 | 806 | start_time_interval(Operation, Fun, B, MeasureEvery, N) -> 807 | Times = time_interval(Fun, B, MeasureEvery, N, 1, now()), 808 | error_logger:info_msg("Time (ms) taken for ~p executions each of ~p:\n~p\n", 809 | [MeasureEvery, Operation, Times]). 810 | 811 | time_interval(_, _, _, N, N, _) -> 812 | []; 813 | time_interval(Fun, B, MeasureEvery, N, Count, T) -> 814 | B2 = Fun(Count, B), 815 | case Count rem MeasureEvery =:= 0 of 816 | true -> 817 | [timer:now_diff(now(), T)| time_interval(Fun, B2, MeasureEvery, N, Count + 1, now())]; 818 | false -> 819 | time_interval(Fun, B2, MeasureEvery, N, Count + 1, T) 820 | end. 821 | 822 | 823 | insert_many(Bin, Pairs) -> 824 | lists:foldl(fun ({K, V}, B) when is_integer(K) andalso is_integer(V) -> 825 | insert(B, ?i2k(K), ?i2v(V)); 826 | ({K, V}, B) -> 827 | insert(B, K, V) 828 | end, Bin, Pairs). 829 | 830 | inc_test() -> 831 | ?assertEqual(<<7:64>>, inc(<<6:64>>)). 832 | 833 | 834 | bulk_insert_test() -> 835 | B = insert_many(new(8, 1), [{1, 1}, {10, 10}, {12, 12}]), 836 | New = bulk_insert(B, [{?i2k(0), ?i2v(0)}, 837 | {?i2k(5), ?i2v(5)}, 838 | {?i2k(10), ?i2v(11)}, 839 | {?i2k(11), ?i2v(11)}]), 840 | 841 | ?assertEqual([{?i2k(0) , ?i2v(0)}, 842 | {?i2k(1) , ?i2v(1)}, 843 | {?i2k(5) , ?i2v(5)}, 844 | {?i2k(10), ?i2v(11)}, 845 | {?i2k(11), ?i2v(11)}, 846 | {?i2k(12), ?i2v(12)}], 847 | to_orddict(New)). 848 | 849 | smart_merge_test() -> 850 | Big = insert_many(new(8, 1), [{1, 1}, {10, 10}, {25, 25}]), 851 | Small = insert_many(new(8, 1), [{0, 0}, {10, 11}, {12, 12}]), 852 | 853 | Merged = merge(Small, Big), 854 | 855 | ?assertEqual([{?i2k(0) , ?i2v(0)}, 856 | {?i2k(1) , ?i2v(1)}, 857 | {?i2k(10) , ?i2v(11)}, 858 | {?i2k(12), ?i2v(12)}, 859 | {?i2k(25), ?i2v(25)}], 860 | to_orddict(Merged)). 861 | 862 | 863 | -endif. 864 | --------------------------------------------------------------------------------