├── .gitignore
├── rebar
├── rebar.config
├── src
    ├── bisect.app.src
    ├── basho_bench_driver_bisect.erl
    ├── bisect_server.erl
    └── bisect.erl
├── priv
    └── basho_bench_bisect.config
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | ebin
2 | deps
3 | .eunit
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knutin/bisect/HEAD/rebar


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {deps, [
2 |   {proper, "", {git,"https://github.com/manopapad/proper.git", "master"}}
3 | ]}.
4 | 


--------------------------------------------------------------------------------
/src/bisect.app.src:
--------------------------------------------------------------------------------
 1 | {application, bisect,
 2 |  [
 3 |   {description, ""},
 4 |   {vsn, "1"},
 5 |   {registered, []},
 6 |   {applications, [
 7 |                   kernel,
 8 |                   stdlib
 9 |                  ]},
10 |   {env, []}
11 |  ]}.
12 | 


--------------------------------------------------------------------------------
/priv/basho_bench_bisect.config:
--------------------------------------------------------------------------------
 1 | {mode, max}.
 2 | %{mode, {rate, 1000}}.
 3 | 
 4 | {duration, 15}.
 5 | 
 6 | {concurrent, 4}.
 7 | 
 8 | {driver, basho_bench_driver_bisect}.
 9 | 
10 | {code_paths, ["../bisect/ebin"]}.
11 | 
12 | {operations, [{mget,1}]}.
13 | 
14 | {key_generator, {uniform_int, 10000000}}.
15 | 
16 | {value_generator, {fixed_bin, 1}}.
17 | 
18 | 
19 | {singleton, false}.
20 | {initial_keys, 10000000}.
21 | {mget_keys, 1000}.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2012 Knut Nesheim
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/src/basho_bench_driver_bisect.erl:
--------------------------------------------------------------------------------
 1 | -module(basho_bench_driver_bisect).
 2 | 
 3 | -export([new/1,
 4 |          run/4]).
 5 | 
 6 | new(_Id) ->
 7 |     case basho_bench_config:get(singleton) of
 8 |         true ->
 9 |             case whereis(bisect_server) of
10 |                 undefined ->
11 |                     {ok, P} = bisect_server:start_link(bisect_server, 8, 1),
12 |                     ok = bisect_server:inject(P, initial_b()),
13 |                     {ok, P};
14 |                 Pid ->
15 |                     {ok, Pid}
16 |             end;
17 |         false ->
18 |             {ok, P} = bisect_server:start_link(8, 1),
19 |             ok = bisect_server:inject(P, initial_b()),
20 |             {ok, P}
21 |     end.
22 | 
23 | 
24 | initial_b() ->
25 |     N = basho_bench_config:get(initial_keys),
26 |     KeyValuePairs = lists:map(fun (I) -> {<<I:64/integer>>, <<255:16/integer>>} end,
27 |                               lists:seq(1, N)),
28 |     bisect:from_orddict(bisect:new(8, 2), KeyValuePairs).
29 | 
30 | 
31 | run(mget, KeyGen, _ValueGen, P) ->
32 |     NumKeys = basho_bench_config:get(mget_keys),
33 |     StartKey = KeyGen(),
34 |     Keys = [<<I:64/integer>> || I <- lists:seq(StartKey, StartKey + (NumKeys * 1000), 1000)],
35 | 
36 |     case catch(bisect_server:mget(P, Keys)) of
37 |         {ok, _Value} ->
38 |             {ok, P};
39 |         {error, Reason} ->
40 |             {error, Reason, P};
41 |         {'EXIT', {timeout, _}} ->
42 |             {error, timeout, P}
43 |     end;
44 | 
45 | run(mget_serial, KeyGen, _ValueGen, P) ->
46 |     NumKeys = basho_bench_config:get(mget_keys),
47 |     StartKey = KeyGen(),
48 |     Keys = [<<I:64/integer>> || I <- lists:seq(StartKey, StartKey + (NumKeys * 1000), 1000)],
49 | 
50 |     case catch(bisect_server:mget_serial(P, Keys)) of
51 |         {ok, _Value} ->
52 |             {ok, P};
53 |         {error, Reason} ->
54 |             {error, Reason, P};
55 |         {'EXIT', {timeout, _}} ->
56 |             {error, timeout, P}
57 |     end;
58 | 
59 | run(put, KeyGen, ValueGen, P) ->
60 |     case catch(bisect_server:insert(P, <<(KeyGen()):64/integer>>, ValueGen())) of
61 |         ok ->
62 |             {ok, P};
63 |         {error, Reason} ->
64 |             {error, Reason, P};
65 |         {'EXIT', {timeout, _}} ->
66 |             {error, timeout, P}
67 |     end.
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bisect
 2 | 
 3 | Bisect is a dictionary-like data structure with some very nice properties:
 4 | 
 5 |  * Fixed-size key and values, no storage overhead
 6 |  * Ordered, allows fast in-order traversal, merging and intersections
 7 |  * Stored in an Erlang binary, making parallel no-copy reads possible,
 8 |    easy storage
 9 |  * O(log n) reads
10 | 
11 | These properties makes Bisect a good fit for read-heavy
12 | workloads. Updates to the dictionary are expensive. On commodity
13 | multi-core machines it's possible to achieve millions of reads per
14 | second also with more than 100M keys.
15 | 
16 | `bisect_server` is a gen_server wrapping a instance of Bisect for
17 | parallel no-copy reads.
18 | 
19 | The API is a bit crap as it started out as a quick experiment and then
20 | people started using it, making it difficult to warrant fixing the
21 | API.
22 | 
23 | ## Usage
24 | 
25 | When creating a new Bisect you need to decide up front on the key and
26 | value size. This is great for storing many things of the same type,
27 | but not so good for different types. Let's say I want to use a single
28 | byte for both value and key, allowing me 256 unique keys.
29 | 
30 | ```erlang
31 | 1> bisect:new(1, 1).
32 | {bindict,1,1,2,<<>>}
33 | 
34 | %% Insert the byte 104 with value 10
35 | 2> bisect:insert(bisect:new(1, 1), <<104>>, <<10>>).
36 | {bindict,1,1,2,<<"h\n">>}
37 | 
38 | 3> bisect:find(v(-1), <<104>>).
39 | <<"\n">>
40 | 
41 | %% If the input parameters have the wrong size, insertion fails
42 | 4> catch bisect:insert(bisect:new(1, 1), <<104, 101>>, <<10>>).
43 | {'EXIT',{badarg,[{bisect,insert,3,[]},{lists,sort,2,[]}]}}
44 | 
45 | %% Serialization
46 | 5> bisect:serialize(bisect:insert(bisect:new(1, 1), <<104>>, <<10>>)).
47 | <<131,104,5,100,0,7,98,105,110,100,105,99,116,97,1,97,1,
48 |   97,2,109,0,0,0,2,104,10>>
49 | 6> bisect:deserialize(v(-1)).
50 | {bindict,1,1,2,<<"h\n">>}
51 | 
52 | %% Bulk insert, much more efficient than one insert at a time
53 | 7> bisect:bulk_insert(bisect:new(1, 1), [{<<104>>, <<10>>}, {<<101>>, <<10>>}]).
54 | {bindict,1,1,2,<<"h\ne\n">>}
55 | 
56 | %% Curious how big memory you will use?
57 | 8> bisect:expected_size(bisect:new(1, 1), 255).
58 | 510
59 | 9> bisect:expected_size_mb(bisect:new(8, 1), 10000000).
60 | 85.8306884765625
61 | ```
62 | 
63 | It is up to the user to encode/decode keys and values in a way that
64 | makes sense, Bisect only stores the raw bytes you give as input.
65 | 


--------------------------------------------------------------------------------
/src/bisect_server.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: gen_server wrapping an instance of bisect, owns the bisect
  2 | %% structure, serializes writes, hands out the reference to the bisect
  3 | %% structure to concurrent readers.
  4 | -module(bisect_server).
  5 | -behaviour(gen_server).
  6 | 
  7 | %% API
  8 | -export([start_link/2, start_link/3, start_link_with_data/3, stop/1]).
  9 | -export([get/2, first/1, last/1, next/2, next_nth/3, mget/2, mget_serial/2,
 10 |          insert/3, append/3, cas/4, inject/2, num_keys/1, delete/2]).
 11 | 
 12 | %% gen_server callbacks
 13 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 14 |          terminate/2, code_change/3]).
 15 | 
 16 | 
 17 | -record(state, {b}).
 18 | 
 19 | -ifdef(TEST).
 20 | -include_lib("proper/include/proper.hrl").
 21 | -include_lib("eunit/include/eunit.hrl").
 22 | -compile([export_all]).
 23 | -endif.
 24 | 
 25 | %%%===================================================================
 26 | %%% API
 27 | %%%===================================================================
 28 | 
 29 | start_link_with_data(KeySize, ValueSize, Data) ->
 30 |     gen_server:start_link(?MODULE, [KeySize, ValueSize, Data], []).
 31 | 
 32 | start_link(KeySize, ValueSize) ->
 33 |     gen_server:start_link(?MODULE, [KeySize, ValueSize], []).
 34 | 
 35 | start_link(Name, KeySize, ValueSize) ->
 36 |     gen_server:start_link({local, Name}, ?MODULE, [KeySize, ValueSize], []).
 37 | 
 38 | stop(Pid) ->
 39 |     gen_server:call(Pid, stop).
 40 | 
 41 | get(Pid, K) ->
 42 |     {ok, B} = gen_server:call(Pid, get_b),
 43 |     {ok, bisect:find(B, K)}.
 44 | 
 45 | first(Pid) ->
 46 |     {ok, B} = gen_server:call(Pid, get_b),
 47 |     {ok, bisect:first(B)}.
 48 | 
 49 | last(Pid) ->
 50 |     {ok, B} = gen_server:call(Pid, get_b),
 51 |     {ok, bisect:last(B)}.
 52 | 
 53 | next(Pid, K) ->
 54 |     {ok, B} = gen_server:call(Pid, get_b),
 55 |     {ok, bisect:next(B, K)}.
 56 | 
 57 | next_nth(Pid, K, Steps) ->
 58 |     {ok, B} = gen_server:call(Pid, get_b),
 59 |     {ok, bisect:next_nth(B, K, Steps)}.
 60 | 
 61 | mget(Pid, Keys) ->
 62 |     {ok, B} = gen_server:call(Pid, get_b),
 63 |     {ok, bisect:find_many(B, Keys)}.
 64 | 
 65 | mget_serial(Pid, Keys) ->
 66 |     gen_server:call(Pid, {mget, Keys}).
 67 | 
 68 | num_keys(Pid) ->
 69 |     {ok, B} = gen_server:call(Pid, get_b),
 70 |     {ok, bisect:num_keys(B)}.
 71 | 
 72 | insert(Pid, K, V) ->
 73 |     gen_server:call(Pid, {insert, K, V}).
 74 | 
 75 | append(Pid, K, V) ->
 76 |     gen_server:call(Pid, {append, K, V}).
 77 | 
 78 | cas(Pid, K, OldV, V) ->
 79 |     gen_server:call(Pid, {cas, K, OldV, V}).
 80 | 
 81 | inject(Pid, B) ->
 82 |     gen_server:call(Pid, {inject, B}).
 83 | 
 84 | delete(Pid, K) ->
 85 |     gen_server:call(Pid, {delete, K}).
 86 | 
 87 | %%%===================================================================
 88 | %%% gen_server callbacks
 89 | %%%===================================================================
 90 | 
 91 | init([KeySize, ValueSize]) ->
 92 |     {ok, #state{b = bisect:new(KeySize, ValueSize)}};
 93 | 
 94 | init([KeySize, ValueSize, Data]) ->
 95 |     {ok, #state{b = bisect:new(KeySize, ValueSize, Data)}}.
 96 | 
 97 | handle_call(get_b, _From, State) ->
 98 |     {reply, {ok, State#state.b}, State};
 99 | 
100 | handle_call({insert, K, V}, _From, #state{b = B} = State) ->
101 |     {reply, ok, State#state{b = bisect:insert(B, K, V)}};
102 | 
103 | handle_call({append, K, V}, _From, #state{b = B} = State) ->
104 |     {reply, ok, State#state{b = bisect:append(B, K, V)}};
105 | 
106 | handle_call({inject, B}, _From, State) ->
107 |     {reply, ok, State#state{b = B}};
108 | 
109 | handle_call({mget, Keys}, _From, State) ->
110 |     {reply, {ok, bisect:find_many(State#state.b, Keys)}, State};
111 | 
112 | handle_call({delete, K}, _From, #state{b = B} = State) ->
113 |     case catch bisect:delete(B, K) of
114 |         {'EXIT', {badarg, _}} ->
115 |             {reply, {error, badarg}, State};
116 |         NewB ->
117 |             {reply, ok, State#state{b = NewB}}
118 |     end;
119 | 
120 | handle_call({cas, K, OldV, V}, _From, #state{b = B} = State) ->
121 |     case catch bisect:cas(B, K, OldV, V) of
122 |         {'EXIT', {badarg, _}} ->
123 |             {reply, {error, badarg}, State};
124 |         NewB ->
125 |             {reply, ok, State#state{b = NewB}}
126 |     end;
127 | 
128 | handle_call(stop, _From, State) ->
129 |     {stop, normal, ok, State}.
130 | 
131 | 
132 | handle_cast(_Msg, State) ->
133 |     {noreply, State}.
134 | 
135 | handle_info(_Info, State) ->
136 |     {noreply, State}.
137 | 
138 | terminate(_Reason, _State) ->
139 |     ok.
140 | 
141 | code_change(_OldVsn, State, _Extra) ->
142 |     {ok, State}.
143 | 
144 | %%
145 | %% TESTS
146 | %%
147 | 
148 | -ifdef(TEST).
149 | 
150 | 
151 | insert_test() ->
152 |     {ok, S} = start_link(8, 1),
153 |     ok = insert(S, <<1:64/integer>>, <<1>>),
154 |     ok = insert(S, <<2:64/integer>>, <<2>>),
155 |     ok = insert(S, <<3:64/integer>>, <<3>>),
156 | 
157 |     Keys = [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>],
158 |     Values = [<<1>>, <<2>>, <<3>>],
159 |     ?assertEqual({ok, Values}, mget(S, Keys)),
160 |     ?assertEqual({ok, Values}, mget_serial(S, Keys)).
161 | 
162 | 
163 | cas_test() ->
164 |     {ok, S} = start_link(8, 1),
165 |     ok = insert(S, <<1:64/integer>>, <<1>>),
166 |     {error, badarg} = cas(S, <<2:64/integer>>, <<2>>, <<2>>),
167 |     ?assertEqual({ok, <<1>>}, get(S, <<1:64/integer>>)),
168 | 
169 |     ok = cas(S, <<1:64/integer>>, <<1>>, <<2>>),
170 |     ?assertEqual({ok, <<2>>}, get(S, <<1:64/integer>>)),
171 | 
172 |     ok = cas(S, <<2:64/integer>>, not_found, <<2>>),
173 |     ?assertEqual({ok, <<2>>}, get(S, <<2:64/integer>>)).
174 | 
175 | 
176 | 
177 | inject_test() ->
178 |     {ok, S} = start_link(8, 1),
179 |     KeyPairs = lists:map(fun (I) -> {<<I:64/integer>>, <<97>>} end,
180 |                          lists:seq(1, 100000)),
181 | 
182 |     B = bisect:from_orddict(bisect:new(8, 1), KeyPairs),
183 | 
184 |     Key = <<20:64/integer>>,
185 |     ?assertEqual({ok, not_found}, get(S, Key)),
186 |     ok = inject(S, B),
187 |     ?assertEqual({ok, <<97>>}, get(S, Key)).
188 | 
189 | 
190 | proper_test() ->
191 |     ?assert(proper:quickcheck(?MODULE:prop_bisect())).
192 | 
193 | 
194 | -record(prop__state, {keys = []}).
195 | 
196 | prop_bisect() ->
197 |     ?FORALL(Cmds, commands(?MODULE),
198 |             ?TRAPEXIT(
199 |                begin
200 |                    {ok, S} = start_link(prop, 8, 1),
201 | 
202 |                    {History,State,Result} = run_commands(?MODULE, Cmds),
203 |                    stop(S),
204 | 
205 |                    ?WHENFAIL(io:format("History: ~p\nState: ~p\nResult: ~p\n",
206 |                                        [History, State, Result]),
207 |                              aggregate(command_names(Cmds), Result =:= ok))
208 |                 end)).
209 | 
210 | prop__key() ->
211 |     elements(prop__keys()).
212 | 
213 | prop__value() ->
214 |     elements(prop__values()).
215 | 
216 | prop__keys() ->
217 |     [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>].
218 | 
219 | prop__values() ->
220 |     [<<1:8/integer>>, <<2:8/integer>>, <<3:8/integer>>].
221 | 
222 | 
223 | command(_S) ->
224 |     oneof([{call, ?MODULE, insert, [prop, prop__key(), prop__value()]},
225 |            {call, ?MODULE, get, [prop, prop__key()]},
226 |            {call, ?MODULE, mget, [prop, prop__keys()]},
227 |            {call, ?MODULE, delete, [prop, prop__key()]}
228 |           ]).
229 | 
230 | initial_state() ->
231 |     #prop__state{keys = []}.
232 | 
233 | precondition(_, _) ->
234 |     true.
235 | 
236 | next_state(S, _, {call, _, insert, [_, Key, Value]}) ->
237 |     S#prop__state{keys = lists:keystore(Key, 1, S#prop__state.keys, {Key, Value})};
238 | 
239 | next_state(S, _, {call, _, delete, [_, Key]}) ->
240 |     S#prop__state{keys = lists:keydelete(Key, 1, S#prop__state.keys)};
241 | 
242 | next_state(S, _, _) ->
243 |     S.
244 | 
245 | 
246 | postcondition(S, {call, _, get, [_, Key]}, {ok, not_found}) ->
247 |     not lists:keymember(Key, 1, S#prop__state.keys);
248 | 
249 | postcondition(S, {call, _, get, [_, Key]}, {ok, Value}) ->
250 |     case lists:keyfind(Key, 1, S#prop__state.keys) of
251 |         {Key, Value} ->
252 |             true;
253 |         _ ->
254 |             false
255 |     end;
256 | 
257 | postcondition(S, {call, _, mget, [_, Keys]}, {ok, Values}) ->
258 |     lists:all(
259 |       fun (V) -> V =:= true end,
260 |       lists:map(
261 |         fun ({Key, not_found}) ->
262 |                 not lists:keymember(Key, 1, S#prop__state.keys);
263 |             ({Key, Value}) ->
264 |                 {Key, Value} =:= lists:keyfind(Key, 1, S#prop__state.keys)
265 |         end, lists:zip(Keys, Values)));
266 | 
267 | 
268 | postcondition(S, {call, _, delete, [_, Key]}, ok) ->
269 |     lists:keymember(Key, 1, S#prop__state.keys);
270 | 
271 | postcondition(S, {call, _, delete, [_, Key]}, {error, badarg}) ->
272 |     not lists:keymember(Key, 1, S#prop__state.keys);
273 | 
274 | postcondition(_S, {call, _, insert, _}, _) ->
275 |     true.
276 | 
277 | -endif.
278 | 


--------------------------------------------------------------------------------
/src/bisect.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: Space-efficient dictionary implemented using a binary
  2 | %%
  3 | %% This module implements a space-efficient dictionary with no
  4 | %% overhead per entry. Read and write access is O(log n).
  5 | %%
  6 | %% Keys and values are fixed size binaries stored ordered in a larger
  7 | %% binary which acts as a sparse array. All operations are implemented
  8 | %% using a binary search.
  9 | %%
 10 | %% As large binaries can be shared among processes, there can be
 11 | %% multiple concurrent readers of an instance of this structure.
 12 | %%
 13 | %% serialize/1 and deserialize/1
 14 | -module(bisect).
 15 | -author('Knut Nesheim <knutin@gmail.com>').
 16 | 
 17 | -export([new/2, new/3, insert/3, bulk_insert/2, append/3, find/2, foldl/3]).
 18 | -export([next/2, next_nth/3, first/1, last/1, delete/2, compact/1, cas/4, update/4]).
 19 | -export([serialize/1, deserialize/1, from_orddict/2, to_orddict/1, find_many/2]).
 20 | -export([merge/2, intersection/1, intersection/2]).
 21 | -export([expected_size/2, expected_size_mb/2, num_keys/1, size/1]).
 22 | 
 23 | -compile({no_auto_import, [size/1]}).
 24 | -compile(native).
 25 | 
 26 | -ifdef(TEST).
 27 | -include_lib("eunit/include/eunit.hrl").
 28 | -endif.
 29 | 
 30 | 
 31 | %%
 32 | %% TYPES
 33 | %%
 34 | 
 35 | -type key_size()   :: pos_integer().
 36 | -type value_size() :: pos_integer().
 37 | -type block_size() :: pos_integer().
 38 | 
 39 | -type key()        :: binary().
 40 | -type value()      :: binary().
 41 | 
 42 | -type index()      :: pos_integer().
 43 | 
 44 | -record(bindict, {
 45 |           key_size   :: key_size(),
 46 |           value_size :: value_size(),
 47 |           block_size :: block_size(),
 48 |           b          :: binary()
 49 | }).
 50 | -type bindict() :: #bindict{}.
 51 | 
 52 | 
 53 | %%
 54 | %% API
 55 | %%
 56 | 
 57 | -spec new(key_size(), value_size()) -> bindict().
 58 | %% @doc: Returns a new empty dictionary where where the keys and
 59 | %% values will always be of the given size.
 60 | new(KeySize, ValueSize) when is_integer(KeySize)
 61 |                              andalso is_integer(ValueSize) ->
 62 |     new(KeySize, ValueSize, <<>>).
 63 | 
 64 | -spec new(key_size(), value_size(), binary()) -> bindict().
 65 | %% @doc: Returns a new dictionary with the given data
 66 | new(KeySize, ValueSize, Data) when is_integer(KeySize)
 67 |                                    andalso is_integer(ValueSize)
 68 |                                    andalso is_binary(Data) ->
 69 |     #bindict{key_size = KeySize,
 70 |              value_size = ValueSize,
 71 |              block_size = KeySize + ValueSize,
 72 |              b = Data}.
 73 | 
 74 | 
 75 | -spec insert(bindict(), key(), value()) -> bindict().
 76 | %% @doc: Inserts the key and value into the dictionary. If the size of
 77 | %% key and value is wrong, throws badarg. If the key is already in the
 78 | %% array, the value is updated.
 79 | insert(B, K, V) when byte_size(K) =/= B#bindict.key_size orelse
 80 |                      byte_size(V) =/= B#bindict.value_size ->
 81 |     erlang:error(badarg);
 82 | 
 83 | insert(#bindict{b = <<>>} = B, K, V) ->
 84 |     B#bindict{b = <<K/binary, V/binary>>};
 85 | 
 86 | insert(B, K, V) ->
 87 |     Index = index(B, K),
 88 |     LeftOffset = Index * B#bindict.block_size,
 89 |     RightOffset = byte_size(B#bindict.b) - LeftOffset,
 90 | 
 91 |     KeySize = B#bindict.key_size,
 92 |     ValueSize = B#bindict.value_size,
 93 | 
 94 |     case B#bindict.b of
 95 |         <<Left:LeftOffset/binary, K:KeySize/binary, _:ValueSize/binary, Right/binary>> ->
 96 |             B#bindict{b = iolist_to_binary([Left, K, V, Right])};
 97 | 
 98 |         <<Left:LeftOffset/binary, Right:RightOffset/binary>> ->
 99 |             B#bindict{b = iolist_to_binary([Left, K, V, Right])}
100 |     end.
101 | 
102 | %% @doc: Update the value stored under the key by calling F on the old
103 | %% value to get a new value. If the key is not present, initial will
104 | %% be stored as the first value. Same as dict:update/4. Note: find and
105 | %% insert requires two binary searches in the binary, while update
106 | %% only needs one. It's as close to in-place update we can get in pure
107 | %% Erlang.
108 | update(B, K, Initial, F) when byte_size(K) =/= B#bindict.key_size orelse
109 |                               byte_size(Initial) =/= B#bindict.value_size orelse
110 |                               not is_function(F) ->
111 |     erlang:error(badarg);
112 | 
113 | update(B, K, Initial, F) ->
114 |     Index = index(B, K),
115 |     LeftOffset = Index * B#bindict.block_size,
116 |     RightOffset = byte_size(B#bindict.b) - LeftOffset,
117 | 
118 |     KeySize = B#bindict.key_size,
119 |     ValueSize = B#bindict.value_size,
120 | 
121 |     case B#bindict.b of
122 |         <<Left:LeftOffset/binary, K:KeySize/binary, OldV:ValueSize/binary, Right/binary>> ->
123 |             case F(OldV) of
124 |                 OldV ->
125 |                     B;
126 |                 NewV ->
127 |                     byte_size(NewV) =:= ValueSize orelse erlang:error(badarg),
128 |                     B#bindict{b = iolist_to_binary([Left, K, NewV, Right])}
129 |             end;
130 | 
131 |         <<Left:LeftOffset/binary, Right:RightOffset/binary>> ->
132 |             B#bindict{b = iolist_to_binary([Left, K, Initial, Right])}
133 |     end.
134 | 
135 | -spec append(bindict(), key(), value()) -> bindict().
136 | %% @doc: Append a key and value. This is only useful if the key is known
137 | %% to be larger than any other key. Otherwise it will corrupt the bindict.
138 | append(B, K, V) when byte_size(K) =/= B#bindict.key_size orelse
139 |                      byte_size(V) =/= B#bindict.value_size ->
140 |     erlang:error(badarg);
141 | 
142 | append(B, K, V) ->
143 |     case last(B) of
144 |         {KLast, _} when K =< KLast ->
145 |           erlang:error(badarg);
146 |         _ ->
147 |           Bin = B#bindict.b,
148 |           B#bindict{b = <<Bin/binary, K/binary, V/binary>>}
149 |     end.
150 | 
151 | -spec cas(bindict(), key(), value() | 'not_found', value()) -> bindict().
152 | %% @doc: Check-and-set operation. If 'not_found' is specified as the
153 | %% old value, the key should not exist in the array. Provided for use
154 | %% by bisect_server.
155 | cas(B, K, OldV, V) ->
156 |     case find(B, K) of
157 |         OldV ->
158 |             insert(B, K, V);
159 |         _OtherV ->
160 |             error(badarg)
161 |     end.
162 | 
163 | 
164 | -spec find(bindict(), key()) -> value() | not_found.
165 | %% @doc: Returns the value associated with the key or 'not_found' if
166 | %% there is no such key.
167 | find(B, K) ->
168 |     case at(B, index(B, K)) of
169 |         {K, Value}   -> Value;
170 |         {_OtherK, _} -> not_found;
171 |         not_found    -> not_found
172 |     end.
173 | 
174 | -spec find_many(bindict(), [key()]) -> [value() | not_found].
175 | find_many(B, Keys) ->
176 |     lists:map(fun (K) -> find(B, K) end, Keys).
177 | 
178 | -spec delete(bindict(), key()) -> bindict().
179 | delete(B, K) ->
180 |     LeftOffset = index2offset(B, index(B, K)),
181 |     KeySize = B#bindict.key_size,
182 |     ValueSize = B#bindict.value_size,
183 | 
184 |     case B#bindict.b of
185 |         <<Left:LeftOffset/binary, K:KeySize/binary, _:ValueSize/binary, Right/binary>> ->
186 |             B#bindict{b = <<Left/binary, Right/binary>>};
187 |         _ ->
188 |             erlang:error(badarg)
189 |     end.
190 | 
191 | -spec next(bindict(), key()) -> {key(), value()} | not_found.
192 | %% @doc: Returns the next larger key and value associated with it or
193 | %% 'not_found' if no larger key exists.
194 | next(B, K) ->
195 |   next_nth(B, K, 1).
196 | 
197 | %% @doc: Returns the nth next larger key and value associated with it
198 | %% or 'not_found' if it does not exist.
199 | -spec next_nth(bindict(), key(), non_neg_integer()) -> value() | not_found.
200 | next_nth(B, K, Steps) ->
201 |     at(B, index(B, inc(K)) + Steps - 1).
202 | 
203 | 
204 | 
205 | -spec first(bindict()) -> {key(), value()} | not_found.
206 | %% @doc: Returns the first key-value pair or 'not_found' if the dict is empty
207 | first(B) ->
208 |     at(B, 0).
209 | 
210 | -spec last(bindict()) -> {key(), value()} | not_found.
211 | %% @doc: Returns the last key-value pair or 'not_found' if the dict is empty
212 | last(B) ->
213 |     at(B, num_keys(B) - 1).
214 | 
215 | -spec foldl(bindict(), fun(), any()) -> any().
216 | foldl(B, F, Acc) ->
217 |     case first(B) of
218 |         {Key, Value} ->
219 |             do_foldl(B, F, Key, F(Key, Value, Acc));
220 |         not_found ->
221 |             []
222 |     end.
223 | 
224 | do_foldl(B, F, PrevKey, Acc) ->
225 |     case next(B, PrevKey) of
226 |         {Key, Value} ->
227 |             do_foldl(B, F, Key, F(Key, Value, Acc));
228 |         not_found ->
229 |             Acc
230 |     end.
231 | 
232 | 
233 | %% @doc: Compacts the internal binary used for storage, by creating a
234 | %% new copy where all the data is aligned in memory. Writes will cause
235 | %% fragmentation.
236 | compact(B) ->
237 |     B#bindict{b = binary:copy(B#bindict.b)}.
238 | 
239 | %% @doc: Returns how many bytes would be used by the structure if it
240 | %% was storing NumKeys.
241 | expected_size(B, NumKeys) ->
242 |     B#bindict.block_size * NumKeys.
243 | 
244 | expected_size_mb(B, NumKeys) ->
245 |     expected_size(B, NumKeys) / 1024 / 1024.
246 | 
247 | -spec num_keys(bindict()) -> integer().
248 | %% @doc: Returns the number of keys in the dictionary
249 | num_keys(B) ->
250 |     byte_size(B#bindict.b) div B#bindict.block_size.
251 | 
252 | size(#bindict{b = B}) ->
253 |     erlang:byte_size(B).
254 | 
255 | 
256 | -spec serialize(bindict()) -> binary().
257 | %% @doc: Returns a binary representation of the dictionary which can
258 | %% be deserialized later to recreate the same structure.
259 | serialize(#bindict{} = B) ->
260 |     term_to_binary(B).
261 | 
262 | -spec deserialize(binary()) -> bindict().
263 | deserialize(Bin) ->
264 |     case binary_to_term(Bin) of
265 |         #bindict{} = B ->
266 |             B;
267 |         _ ->
268 |             erlang:error(badarg)
269 |     end.
270 | 
271 | %% @doc: Insert a batch of key-value pairs into the dictionary. A new
272 | %% binary is only created once, making it much cheaper than individual
273 | %% calls to insert/2. The input list must be sorted.
274 | bulk_insert(#bindict{} = B, Orddict) ->
275 |     L = do_bulk_insert(B, B#bindict.b, [], Orddict),
276 |     B#bindict{b = iolist_to_binary(lists:reverse(L))}.
277 | 
278 | do_bulk_insert(_B, Bin, Acc, []) ->
279 |     [Bin | Acc];
280 | do_bulk_insert(B, Bin, Acc, [{Key, Value} | Rest]) ->
281 |     {Left, Right} = split_at(Bin, B#bindict.key_size, B#bindict.value_size, Key, 0),
282 |     do_bulk_insert(B, Right, [Value, Key, Left | Acc], Rest).
283 | 
284 | split_at(Bin, KeySize, ValueSize, Key, I) ->
285 |     LeftOffset = I * (KeySize + ValueSize),
286 |     case Bin of
287 |         Bin when byte_size(Bin) < LeftOffset ->
288 |             {Bin, <<>>};
289 | 
290 |         <<Left:LeftOffset/binary,
291 |           Key:KeySize/binary, _:ValueSize/binary,
292 |           Right/binary>> ->
293 |             {Left, Right};
294 | 
295 |         <<Left:LeftOffset/binary,
296 |           OtherKey:KeySize/binary, Value:ValueSize/binary,
297 |           Right/binary>> when OtherKey > Key ->
298 |             NewRight = <<OtherKey/binary, Value/binary, Right/binary>>,
299 |             {Left, NewRight};
300 |         _ ->
301 |             split_at(Bin, KeySize, ValueSize, Key, I+1)
302 |     end.
303 | 
304 | merge(Small, Big) ->
305 |     Small#bindict.block_size =:= Big#bindict.block_size
306 |         orelse erlang:error(badarg),
307 | 
308 |     L = do_merge(Small#bindict.b, Big#bindict.b, [],
309 |                  Big#bindict.key_size, Big#bindict.value_size),
310 |     Big#bindict{b = iolist_to_binary(L)}.
311 | 
312 | do_merge(Small, Big, Acc, KeySize, ValueSize) ->
313 |     case Small of
314 |         <<Key:KeySize/binary, Value:ValueSize/binary, RestSmall/binary>> ->
315 |             {LeftBig, RightBig} = split_at(Big, KeySize, ValueSize, Key, 0),
316 |             do_merge(RestSmall, RightBig, [Value, Key, LeftBig | Acc],
317 |                      KeySize, ValueSize);
318 |         <<>> ->
319 |             lists:reverse([Big | Acc])
320 |     end.
321 | 
322 | %% @doc: Intersect two or more bindicts by key. The resulting bindict
323 | %% contains keys found in all input bindicts.
324 | intersection(Bs) when length(Bs) >= 2 ->
325 |     intersection(Bs, svs);
326 | intersection(_TooFewSets) ->
327 |     erlang:error(badarg).
328 | 
329 | %% @doc: SvS set intersection algorithm, as described in
330 | %% http://www.cs.toronto.edu/~tl/papers/fiats.pdf
331 | intersection(Bs, svs) ->
332 |     [CandidateSet | Sets] = lists:sort(fun (A, B) -> size(A) =< size(B) end, Bs),
333 |     from_orddict(new(CandidateSet#bindict.key_size,
334 |                      CandidateSet#bindict.value_size),
335 |                  do_svs(Sets, CandidateSet)).
336 | 
337 | do_svs([], Candidates) ->
338 |     Candidates;
339 | do_svs([Set | Sets], #bindict{} = Candidates) ->
340 |     %% Optimization: we let the candidate set remain a bindict for the
341 |     %% first iteration to avoid creating a large orddict just to throw
342 |     %% most of it away. For the remainding sets, we keep the candidate
343 |     %% set as a list
344 |     {_, NewCandidatesList} =
345 |         foldl(Candidates,
346 |               fun (K, V, {L, Acc}) ->
347 |                       Size = byte_size(Set#bindict.b) div Set#bindict.block_size,
348 |                       Rank = index(Set, L, Size, K),
349 |                       %% TODO: Skip candidates until OtherK?
350 |                       case at(Set, Rank) of
351 |                           {K, _}       -> {Rank, [{K, V} | Acc]};
352 |                           {_OtherK, _} -> {Rank, Acc};
353 |                           not_found    -> {Rank, Acc}
354 |                       end
355 |               end, {0, []}),
356 |     do_svs(Sets, lists:reverse(NewCandidatesList));
357 | 
358 | do_svs([Set | Sets], Candidates) when is_list(Candidates) ->
359 |     {_, NewCandidates} =
360 |         lists:foldl(fun ({K, V}, {L, Acc}) ->
361 |                             Size = byte_size(Set#bindict.b) div Set#bindict.block_size,
362 |                             Rank = index(Set, L, Size, K),
363 |                             case at(Set, Rank) of
364 |                                 {K, _}       -> {Rank, [{K, V} | Acc]};
365 |                                 {_OtherK, _} -> {Rank, Acc};
366 |                                 not_found    -> {Rank, Acc}
367 |                             end
368 |                     end, {0, []}, Candidates),
369 |     do_svs(Sets, lists:reverse(NewCandidates)).
370 | 
371 | at(B, I) ->
372 |     Offset = index2offset(B, I),
373 |     KeySize = B#bindict.key_size,
374 |     ValueSize = B#bindict.value_size,
375 |     case B#bindict.b of
376 |         <<_:Offset/binary, Key:KeySize/binary, Value:ValueSize/binary, _/binary>> ->
377 |             {Key, Value};
378 |         _ ->
379 |             not_found
380 |     end.
381 | 
382 | 
383 | %% @doc: Populates the dictionary with data from the orddict, taking
384 | %% advantage of the fact that it is already ordered. The given bindict
385 | %% must be empty, but contain size parameters.
386 | from_orddict(#bindict{b = <<>>} = B, Orddict) ->
387 |     KeySize = B#bindict.key_size,
388 |     ValueSize = B#bindict.value_size,
389 |     L = orddict:fold(fun (K, V, Acc)
390 |                            when byte_size(K) =:= B#bindict.key_size andalso
391 |                                 byte_size(V) =:= B#bindict.value_size ->
392 |                              [<<K:KeySize/binary, V:ValueSize/binary>> | Acc];
393 |                          (_, _, _) ->
394 |                              erlang:error(badarg)
395 |                      end, [], Orddict),
396 |     B#bindict{b = iolist_to_binary(lists:reverse(L))}.
397 | 
398 | to_orddict(#bindict{} = B) ->
399 |     lists:reverse(
400 |       foldl(B, fun (Key, Value, Acc) ->
401 |                        [{Key, Value} | Acc]
402 |                end, [])).
403 | 
404 | 
405 | %%
406 | %% INTERNAL HELPERS
407 | %%
408 | 
409 | index2offset(_, 0) -> 0;
410 | index2offset(B, I) -> I * B#bindict.block_size.
411 | 
412 | %% @doc: Uses binary search to find the index of the given key. If the
413 | %% key does not exist, the index where it should be inserted is
414 | %% returned.
415 | -spec index(bindict(), key()) -> index().
416 | index(<<>>, _) ->
417 |     0;
418 | index(B, K) ->
419 |     N = byte_size(B#bindict.b) div B#bindict.block_size,
420 |     index(B, 0, N, K).
421 | 
422 | index(_B, Low, High, _K) when High =:= Low ->
423 |     Low;
424 | 
425 | index(_B, Low, High, _K) when High < Low ->
426 |     -1;
427 | 
428 | index(B, Low, High, K) ->
429 |     Mid = (Low + High) div 2,
430 |     MidOffset = index2offset(B, Mid),
431 | 
432 |     KeySize = B#bindict.key_size,
433 |     case byte_size(B#bindict.b) > MidOffset of
434 |         true ->
435 |             <<_:MidOffset/binary, MidKey:KeySize/binary, _/binary>> = B#bindict.b,
436 | 
437 |             if
438 |                 MidKey > K ->
439 |                     index(B, Low, Mid, K);
440 |                 MidKey < K ->
441 |                     index(B, Mid + 1, High, K);
442 |                 MidKey =:= K ->
443 |                     Mid
444 |             end;
445 |         false ->
446 |             Mid
447 |     end.
448 | 
449 | inc(B) ->
450 |     IncInt = binary:decode_unsigned(B) + 1,
451 |     SizeBits = erlang:size(B) * 8,
452 |     <<IncInt:SizeBits>>.
453 | 
454 | %%
455 | %% TEST
456 | %%
457 | -ifdef(TEST).
458 | 
459 | 
460 | -define(i2k(I), <<I:64/integer>>).
461 | -define(i2v(I), <<I:8/integer>>).
462 | -define(b2i(B), list_to_integer(binary_to_list(B))).
463 | 
464 | new_with_data_test() ->
465 |     Dict = insert_many(new(8, 1), [{2, 2}, {4, 4}, {1, 1}, {3, 3}]),
466 |     ?assertEqual(Dict, new(8, 1, Dict#bindict.b)).
467 | 
468 | insert_test() ->
469 |     insert_many(new(8, 1), [{2, 2}, {4, 4}, {1, 1}, {3, 3}]).
470 | 
471 | sorted_insert_test() ->
472 |     B = insert_many(new(8, 1), [{1, 1}, {2, 2}, {3, 3}, {4, 4}]),
473 |     ?assertEqual(<<1:64/integer, 1, 2:64/integer, 2,
474 |                    3:64/integer, 3, 4:64/integer, 4>>, B#bindict.b).
475 | 
476 | index_test() ->
477 |     B = #bindict{key_size = 8, value_size = 1, block_size = 9,
478 |            b = <<0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,2,2>>},
479 |     ?assertEqual(0, index(B, <<1:64/integer>>)),
480 |     ?assertEqual(1, index(B, <<2:64/integer>>)),
481 |     ?assertEqual(2, index(B, <<3:64/integer>>)),
482 |     ?assertEqual(2, index(B, <<100:64/integer>>)).
483 | 
484 | find_test() ->
485 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
486 |     ?assertEqual(<<3:8/integer>>, find(B, <<3:64/integer>>)).
487 | 
488 | find_non_existing_test() ->
489 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
490 |     ?assertEqual(not_found, find(B, ?i2k(4))).
491 | 
492 | find_many_test() ->
493 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
494 |     find_many(B, [<<1:64/integer>>, <<2:64/integer>>, <<3:64/integer>>]).
495 | 
496 | insert_overwrite_test() ->
497 |     B = insert_many(new(8, 1), [{2, 2}]),
498 |     ?assertEqual(<<2>>, find(B, <<2:64/integer>>)),
499 |     B2 = insert(B, <<2:64/integer>>, <<4>>),
500 |     ?assertEqual(<<4>>, find(B2, <<2:64/integer>>)).
501 | 
502 | update_test() ->
503 |     B = insert_many(new(8, 1), [{2, 2}]),
504 |     B2 = update(B, <<2:64/integer>>, <<4>>, fun (Old) ->
505 |                                                     ?assertEqual(Old, <<2>>),
506 |                                                     <<5>>
507 |                                             end),
508 |     ?assertEqual(<<5>>, find(B2, <<2:64/integer>>)),
509 |     B3 = update(B2, <<3:64/integer>>, <<3>>, fun (_) ->
510 |                                                      throw(unexpected_call)
511 |                                              end),
512 |     ?assertEqual(<<3>>, find(B3, <<3:64/integer>>)).
513 | 
514 | append_test() ->
515 |     KV1 = {<<2:64>>, <<2:8>>},
516 |     {K2, V2} = {<<3:64>>, <<3:8>>},
517 |     B = insert_many(new(8, 1), [KV1]),
518 |     ?assertError(badarg, append(B, <<1:64>>, V2)),
519 |     ?assertError(badarg, append(B, <<2:64>>, V2)),
520 |     B2 = append(B, K2, V2),
521 |     ?assertEqual(V2, find(B2, K2)).
522 | 
523 | next_test() ->
524 |     KV1 = {<<2:64>>, <<2:8>>},
525 |     KV2 = {<<3:64>>, <<3:8>>},
526 |     B = insert_many(new(8, 1), [KV1, KV2]),
527 |     ?assertEqual(KV1, next(B, <<0:64>>)),
528 |     ?assertEqual(KV1, next(B, <<1:64>>)),
529 |     ?assertEqual(KV2, next(B, <<2:64>>)),
530 |     ?assertEqual(not_found, next(B, <<3:64>>)).
531 | 
532 | next_nth_test() ->
533 |     KV1 = {<<2:64>>, <<2:8>>},
534 |     KV2 = {<<3:64>>, <<3:8>>},
535 |     B = insert_many(new(8, 1), [KV1, KV2]),
536 |     ?assertEqual(KV1, next_nth(B, <<0:64>>, 1)),
537 |     ?assertEqual(KV2, next_nth(B, <<0:64>>, 2)),
538 |     ?assertEqual(KV2, next_nth(B, <<2:64>>, 1)),
539 |     ?assertEqual(not_found, next_nth(B, <<2:64>>, 2)),
540 |     ?assertEqual(not_found, next_nth(B, <<3:64>>, 1)).
541 | 
542 | first_test() ->
543 |     KV1  = {K1, V1} = {<<2:64>>, <<2:8>>},
544 |     _KV2 = {K2, V2} = {<<3:64>>, <<3:8>>},
545 |     B1 = new(8, 1),
546 |     ?assertEqual(not_found, first(B1)),
547 |     B2 = insert(B1, K1, V1),
548 |     ?assertEqual(KV1, first(B2)),
549 |     B3 = insert(B2, K2, V2),
550 |     ?assertEqual(KV1, first(B3)).
551 | 
552 | last_test() ->
553 |     KV1 = {K1, V1} = {<<2:64>>, <<2:8>>},
554 |     KV2 = {K2, V2} = {<<3:64>>, <<3:8>>},
555 |     B1 = new(8, 1),
556 |     ?assertEqual(not_found, last(B1)),
557 |     ?assertEqual(0, num_keys(B1)),
558 |     ?assertEqual(not_found, at(B1, 0)),
559 |     ?assertEqual(not_found, at(B1, -1)),
560 |     ?assertEqual(not_found, at(B1, 1)),
561 |     B2 = insert(B1, K1, V1),
562 |     ?assertEqual(KV1, last(B2)),
563 |     B3 = insert(B2, K2, V2),
564 |     ?assertEqual(KV2, last(B3)).
565 | 
566 | delete_test() ->
567 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
568 |     ?assertEqual(<<2:8/integer>>, find(B, ?i2k(2))),
569 | 
570 |     NewB = delete(B, ?i2k(2)),
571 |     ?assertEqual(not_found, find(NewB, ?i2k(2))).
572 | 
573 | delete_non_existing_test() ->
574 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
575 |     ?assertError(badarg, delete(B, ?i2k(4))).
576 | 
577 | foldl_test() ->
578 |     B = insert_many(new(8, 1), [{2, 2}, {3, 3}, {1, 1}]),
579 |     ?assertEqual(2+3+1, foldl(B, fun (_, <<V:8/integer>>, Acc) -> V + Acc end, 0)),
580 |     ?assertEqual([], foldl(new(8, 1), fun (I, V, Acc) -> [{I, V} | Acc] end, [])).
581 | 
582 | 
583 | size_test() ->
584 |     Start = 100000000000000,
585 |     N = 1000,
586 |     Spread = 1,
587 |     KeyPairs = lists:map(fun (I) -> {I, 255} end,
588 |                          lists:seq(Start, Start+(N*Spread), Spread)),
589 | 
590 |     B = insert_many(new(8, 1), KeyPairs),
591 |     ?assertEqual(N+Spread, num_keys(B)).
592 | 
593 | serialize_test() ->
594 |     KeyPairs = lists:map(fun (I) -> {I, 255} end, lists:seq(1, 100)),
595 |     B = insert_many(new(8, 1), KeyPairs),
596 |     ?assertEqual(B, deserialize(serialize(B))).
597 | 
598 | from_orddict_test() ->
599 |     Orddict = orddict:from_list([{<<1:64/integer>>, <<255:8/integer>>}]),
600 |     ?assertEqual(<<255>>, find(from_orddict(new(8, 1), Orddict), <<1:64/integer>>)).
601 | 
602 | 
603 | intersection_test() ->
604 |     Sets = [insert_many(new(8, 1), [{1, 1}, {2, 2}, {3, 3}]),
605 |             insert_many(new(8, 1), [{1, 1}, {2, 3}, {4, 4}]),
606 |             insert_many(new(8, 1), [{1, 1}, {2, 3}, {5, 5}]),
607 |             insert_many(new(8, 1), [{1, 1}, {2, 3}, {6, 6}])],
608 | 
609 |     Intersection = intersection(Sets),
610 |     ?assertEqual(to_orddict(insert_many(new(8, 1), [{1, 1}, {2, 2}])),
611 |                  to_orddict(Intersection)).
612 | 
613 | 
614 | intersection_perf_test_() ->
615 |     {timeout, 600, ?_test(intersection_perf())}.
616 | 
617 | intersection_perf() ->
618 |     TestCases = [{[1000, 1000], 10},
619 |                  {[100000, 100000, 100000], 1000},
620 |                  {[10000, 100000, 1000000], 1000},
621 |                  {[1000000, 1000000, 1000000], 10000}
622 |                 ],
623 | 
624 |     lists:foreach(
625 |       fun ({SetSizes, IntersectionSize}) ->
626 |               UnionSize = lists:sum([SetSize - IntersectionSize
627 |                                      || SetSize <- SetSizes]) + IntersectionSize,
628 |               KVs = lists:map(fun (K) -> {<<K:36/binary>>, <<97:32/integer>>} end,
629 |                               generate_unique(UnionSize)),
630 |               ?assertEqual(UnionSize, sets:size(sets:from_list(KVs))),
631 | 
632 |               {IntersectionKeys, Rest} = lists:split(IntersectionSize, KVs),
633 |               {SetKeys, []} = lists:mapfoldl(fun (Size, AccRest) ->
634 |                                                      lists:split(Size - IntersectionSize,
635 |                                                                  AccRest)
636 |                                              end, Rest, SetSizes),
637 |               ?assertEqual(IntersectionSize, length(IntersectionKeys)),
638 | 
639 |               SetIntersection = sets:intersection(
640 |                                   [sets:from_list(Ks ++ IntersectionKeys)
641 |                                    || Ks <- SetKeys]),
642 |               ?assertEqual(IntersectionSize, sets:size(SetIntersection)),
643 | 
644 |               Bisects = lists:map(fun (Ks) ->
645 |                                           AllKeys = orddict:from_list(
646 |                                                       Ks ++ IntersectionKeys),
647 |                                           from_orddict(new(36, 4), AllKeys)
648 |                                   end, SetKeys),
649 |               {IntersectUs, BisectIntersection} = timer:tc(
650 |                                                     fun () -> intersection(Bisects) end),
651 |               IntersectingKeys = to_orddict(BisectIntersection),
652 |               ?assertEqual(length(lists:sort(sets:to_list(SetIntersection))),
653 |                            length(lists:sort(IntersectingKeys))),
654 |               ?assertEqual(lists:sort(sets:to_list(SetIntersection)),
655 |                            lists:sort(IntersectingKeys)),
656 |               error_logger:info_msg("Set sizes: ~p, Intersection size: ~p~n"
657 |                                     "Intersection runtime: ~.2f ms~n",
658 |                                     [SetSizes, IntersectionSize,
659 |                                      IntersectUs / 1000]),
660 | 
661 |               ok
662 |       end, TestCases).
663 | 
664 | 
665 | generate_unique(N) ->
666 |     RandomGenerator = fun () -> crypto:rand_bytes(36) end,
667 |     generate_unique(RandomGenerator, [], N).
668 | 
669 | generate_unique(RandomGenerator, Acc, N) ->
670 |     case length(Acc) =:= N of
671 |         true ->
672 |             Acc;
673 |         false ->
674 |             Gen = fun (_, 0) -> [];
675 |                       (F, M) -> [RandomGenerator() | F(F, M-1)]
676 |                   end,
677 |             Uniques = lists:usort(Gen(Gen, N - length(Acc))),
678 |             generate_unique(RandomGenerator, Acc ++ Uniques, N)
679 |     end.
680 | 
681 | 
682 | speed_test_() ->
683 |     {timeout, 600,
684 |      fun() ->
685 |              Start = 100000000000000,
686 |              N = 100000,
687 |              Keys = lists:seq(Start, Start+N),
688 |              KeyValuePairs = lists:map(fun (I) -> {<<I:64/integer>>, <<255:8/integer>>} end,
689 |                                        Keys),
690 | 
691 |              %% Will mostly be unique, if N is bigger than 10000
692 |              ReadKeys = [lists:nth(random:uniform(N), Keys) || _ <- lists:seq(1, 1000)],
693 |              B = from_orddict(new(8, 1), KeyValuePairs),
694 |              time_reads(B, N, ReadKeys)
695 |      end}.
696 | 
697 | 
698 | insert_speed_test_() ->
699 |     {timeout, 600,
700 |      fun() ->
701 |              Start = 100000000000000,
702 |              N = 10000,
703 |              Keys = lists:seq(Start, Start+N),
704 |              KeyValuePairs = lists:map(fun (I) -> {<<I:64/integer>>, <<255:8/integer>>} end,
705 |                                        Keys),
706 |              ReadKeys = [lists:nth(random:uniform(N), Keys) || _ <- lists:seq(1, 1000)],
707 | 
708 |              StartTime = now(),
709 |              B = lists:foldl(fun ({K, V}, B) ->
710 |                                  insert(B, K, V)
711 |                          end, new(8, 1), KeyValuePairs),
712 |              ElapsedUs = timer:now_diff(now(), StartTime),
713 |              error_logger:info_msg("insert in ~p ms, ~p us per key~n",
714 |                                    [ElapsedUs / 1000,
715 |                                     ElapsedUs / N
716 |                                    ]),
717 |              time_reads(B, N, ReadKeys)
718 |      end}.
719 | 
720 | 
721 | time_reads(B, Size, ReadKeys) ->
722 |     Parent = self(),
723 |     spawn(
724 |       fun() ->
725 |               Runs = 100,
726 |               Timings =
727 |                   lists:map(
728 |                     fun (_) ->
729 |                             StartTime = now(),
730 |                             find_many(B, ReadKeys),
731 |                             timer:now_diff(now(), StartTime)
732 |                     end, lists:seq(1, Runs)),
733 | 
734 |               Rps = 1000000 / ((lists:sum(Timings) / length(Timings)) / length(ReadKeys)),
735 |               error_logger:info_msg("Average over ~p runs, ~p keys in dict~n"
736 |                                     "Average fetch ~p keys: ~p us, max: ~p us~n"
737 |                                     "Average fetch 1 key: ~p us~n"
738 |                                     "Theoretical sequential RPS: ~w~n",
739 |                                     [Runs, Size, length(ReadKeys),
740 |                                      lists:sum(Timings) / length(Timings),
741 |                                      lists:max(Timings),
742 |                                      (lists:sum(Timings) / length(Timings)) / length(ReadKeys),
743 |                                      trunc(Rps)]),
744 | 
745 |               Parent ! done
746 |       end),
747 |     receive done -> ok after 1000 -> ok end.
748 | 
749 | 
750 | time_write_test_() ->
751 |   {timeout, 600,
752 |     fun() ->
753 |       Fun = fun(N , B) ->
754 |         insert(B, <<N:64/integer>>, <<255:8/integer>>)
755 |       end,
756 |       start_time_interval("Insert", Fun, new(8, 1), 1000, 20000)
757 |     end
758 |   }.
759 | 
760 | time_write_and_read_test_() ->
761 |   {timeout, 600,
762 |     fun() ->
763 |       Fun = fun(Count, B) ->
764 |         KInt = random:uniform(Count),
765 |         find(B, <<KInt:64/integer>>),
766 |         insert(B, <<Count:64/integer>>, <<255:8/integer>>)
767 |       end,
768 |       start_time_interval("Insert and find", Fun, new(8, 1), 1000, 10000)
769 |     end
770 |   }.
771 | 
772 | time_appends_test_() ->
773 |   {timeout, 600,
774 |     fun() ->
775 |       Fun = fun(Count, B) ->
776 |         append(B, <<Count:64/integer>>, <<255:8/integer>>)
777 |       end,
778 |       start_time_interval("Append", Fun, new(8, 1), 1000, 50000)
779 |     end
780 |   }.
781 | 
782 | time_appends_and_find_test_() ->
783 |   {timeout, 600,
784 |     fun() ->
785 |       Fun = fun(Count, B) ->
786 |         KInt = random:uniform(Count),
787 |         find(B, <<KInt:64/integer>>),
788 |         append(B, <<Count:64/integer>>, <<255:8/integer>>)
789 |       end,
790 |       start_time_interval("Append and find", Fun, new(8, 1), 1000, 50000)
791 |     end
792 |   }.
793 | 
794 | time_appends_and_next_test_() ->
795 |   {timeout, 600,
796 |     fun() ->
797 |       Fun = fun(Count , B) ->
798 |         KInt = random:uniform(Count),
799 |         next(B, <<KInt:64/integer>>),
800 |         append(B, <<Count:64/integer>>, <<255:8/integer>>)
801 |       end,
802 |       start_time_interval("Append and next", Fun, new(8, 1), 1000, 50000)
803 |     end
804 |   }.
805 | 
806 | start_time_interval(Operation, Fun, B, MeasureEvery, N) ->
807 |   Times = time_interval(Fun, B, MeasureEvery, N, 1, now()),
808 |   error_logger:info_msg("Time (ms) taken for ~p executions each of ~p:\n~p\n",
809 |                         [MeasureEvery, Operation, Times]).
810 | 
811 | time_interval(_, _, _, N, N, _) ->
812 |   [];
813 | time_interval(Fun, B, MeasureEvery, N, Count, T) ->
814 |   B2 = Fun(Count, B),
815 |   case Count rem MeasureEvery =:= 0 of
816 |     true ->
817 |       [timer:now_diff(now(), T)| time_interval(Fun, B2, MeasureEvery, N, Count + 1, now())];
818 |     false ->
819 |       time_interval(Fun, B2, MeasureEvery, N, Count + 1, T)
820 |   end.
821 | 
822 | 
823 | insert_many(Bin, Pairs) ->
824 |     lists:foldl(fun ({K, V}, B) when is_integer(K) andalso is_integer(V) ->
825 |                         insert(B, ?i2k(K), ?i2v(V));
826 |                     ({K, V}, B) ->
827 |                         insert(B, K, V)
828 |                 end, Bin, Pairs).
829 | 
830 | inc_test() ->
831 |     ?assertEqual(<<7:64>>, inc(<<6:64>>)).
832 | 
833 | 
834 | bulk_insert_test() ->
835 |     B = insert_many(new(8, 1), [{1, 1}, {10, 10}, {12, 12}]),
836 |     New = bulk_insert(B, [{?i2k(0), ?i2v(0)},
837 |                           {?i2k(5), ?i2v(5)},
838 |                           {?i2k(10), ?i2v(11)},
839 |                           {?i2k(11), ?i2v(11)}]),
840 | 
841 |     ?assertEqual([{?i2k(0) , ?i2v(0)},
842 |                   {?i2k(1) , ?i2v(1)},
843 |                   {?i2k(5) , ?i2v(5)},
844 |                   {?i2k(10), ?i2v(11)},
845 |                   {?i2k(11), ?i2v(11)},
846 |                   {?i2k(12), ?i2v(12)}],
847 |                  to_orddict(New)).
848 | 
849 | smart_merge_test() ->
850 |     Big   = insert_many(new(8, 1), [{1, 1}, {10, 10}, {25, 25}]),
851 |     Small = insert_many(new(8, 1), [{0, 0}, {10, 11}, {12, 12}]),
852 | 
853 |     Merged = merge(Small, Big),
854 | 
855 |     ?assertEqual([{?i2k(0) , ?i2v(0)},
856 |                   {?i2k(1) , ?i2v(1)},
857 |                   {?i2k(10) , ?i2v(11)},
858 |                   {?i2k(12), ?i2v(12)},
859 |                   {?i2k(25), ?i2v(25)}],
860 |                  to_orddict(Merged)).
861 | 
862 | 
863 | -endif.
864 | 


--------------------------------------------------------------------------------