├── README.md
├── rebar
├── rebar.config
├── src
├── riak_column.erl
├── riak_link_index.app.src
├── riak_link_index.erl
└── riak_link_set.erl
└── test
├── mock_kv.erl
├── mock_kv_store.erl
└── riak_column_tests.erl
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
Link Indexing
3 |
4 | This module allows you to create simple secondary indexes
5 | in Riak based on Riaks' link model. The basic idea is thus:
6 |
7 | Assume we model person and companies as separate buckets:
8 |
9 | /riak/person/Name
10 | /riak/company/Name
11 |
12 | When you store a `/riak/person/Kresten` object, you describe the
13 | employment relation by including this link in the Kresten object:
14 |
15 | Link: ; riaktag="idx@employs"
16 |
17 | The magic is that `riak_link_index` will then automatically add (and
18 | maintain) a link in the opposite direction; from `Trifork` to
19 | `Kresten`, and that link will have tag `employs`. The tag needs to
20 | start with `idx@` for `riak_link_index` to recognize it.
21 |
22 | Whenever you update or delete a person object, you can pass in new (or
23 | multiple) such links, and the old reverse links will automatically be
24 | deleted/updated as appropriate. Deleting a company object has no
25 | effect the other way around.
26 |
27 | > The objects that contain the reverse links (in this case
28 | e.g. `/riak/company/Trifork`) will have special content used to manage
29 | the links, so you cannot use them for other stuff!
30 |
31 | This module also allows you to install an `index_hook`, which can be
32 | used to extract links from your objects. Index hooks can be written in
33 | both JavaScript for Erlang.
34 |
35 |
36 | Installation
37 | ------------
38 |
39 | > Notice! this only works on the `master` branch of Riak; this
40 | > does not work on `riak-0.14.*` releases, because it depends on the
41 | > pre- and post-commit hooks to both run in the same internal process
42 | > (the `riak_kv_put_fsm`, if you must know).
43 |
44 | To install, you need to make the `ebin` directory containing
45 | `riak_link_index.beam` accessible to your Riak install. You can do that
46 | by adding a line like this to riaks `etc/vm.args`
47 |
48 | -pz /Path/to/riak_function_contrib/other/riak_link_index/ebin
49 |
50 | If you're an Erlang wiz there are other ways, but that should work.
51 |
52 |
53 | Next, you configure a bucket to support indexing. This involves two things:
54 |
55 | 1. Install a set of commit hooks (indexing needs both a pre- and a
56 | post-commit hook).
57 |
58 | 2. (optionally) configure a function to extract index information
59 | from your bucket data. We'll do that later, and start out with
60 | the easy version.
61 |
62 | If your bucket is name `person`, it could be done thus:
63 |
64 | prompt$ cat > bucket_props.json
65 | { "props" : {
66 | "precommit" : [{"mod": "riak_link_index", "fun": "precommit"}],
67 | "postcommit" : [{"mod": "riak_link_index", "fun": "postcommit"}]
68 | }}
69 | ^D
70 | prompt$ curl -X PUT --data @bucket_props.json \
71 | -H 'Content-Type: application/json' \
72 | http://127.0.0.1:8091/riak/person
73 |
74 | There you go: you're ready for some action.
75 |
76 | Explicit Indexing
77 | -----------------
78 |
79 |
80 | The simple indexer now works for the `person` bucket, by interpreting
81 | links on `/riak/person/XXX` objects that have tags starting with
82 | `idx@`. The special `idx@` prefix is recognized by the indexer, and
83 | it will create and maintain a link in the opposite direction, tagged
84 | with whatever comes after the `idx@` prefix.
85 |
86 | Let's say we add me:
87 |
88 | curl -X PUT \
89 | -H 'Link: ; riaktag="idx@employs"' \
90 | -H 'Content-Type: application/json' \
91 | --data '{ "name": "Kresten Krab Thorup", "employer":"Trifork" }' \
92 | http://127.0.0.1:8091/riak/person/kresten
93 |
94 | As this gets written to Riak, the indexer will then
95 | create an object by the name of `/riak/company/Trifork`,
96 | which has a link pointing back to me:
97 |
98 | curl -v -X GET http://127.0.0.1:8091/riak/company/Trifork
99 | < 200 OK
100 | < Link: ; riaktag="employs"
101 | < Content-Length: 0
102 |
103 | If there was already an object at `/company/Trifork`, then the indexer
104 | would leave the contents alone, but still add the reverse link. If no
105 | such object existed, then it would be created with empty contents.
106 |
107 | Link Walking
108 | ------------
109 |
110 | The beauty of this is that you can now do link-walk queries to find
111 | your stuff. For instance, this link query should give you a list of
112 | person employed at Trifork. Lucky them :-)
113 |
114 | curl http://localhost:8091/riak/company/Trifork/_,_,employs
115 |
116 | Using a `link_index` hook
117 | -------------------------
118 |
119 | You can also install an index hook as a bucket property, which designates
120 | a function that can be used to decide which index records to create. This way
121 | you can keep the index creation on the server side; and also more easily
122 | generate some more indexes.
123 |
124 | You install the index hook the same way you install a pre-commit hook; and the
125 | hook can be written in either Erlang or JavaScript, just like precommits.
126 |
127 | // Return list of [Bucket,Key] that will link to me
128 | function employmentIndexing(metaData, contents) {
129 | personData = JSON.parse(contents);
130 | if(personData.employer) {
131 | return [ ['company', personData.employer] ];
132 | } else {
133 | return [];
134 | }
135 | }
136 |
137 | Assume you have that code in `/tmp/js_source/my_indexer.s`, and
138 | configured `{js_source_dir, "/tmp/js_source"}` in the `riak_kv`
139 | section of your `etc/app.config`.
140 |
141 | Then, to install it as an indexer, you need to get install it as a
142 | bucket property in the person bucket. You can have more indexes, so
143 | it's a list of functions. Link-Index hooks can also be erlang
144 | functions.
145 |
146 | prompt$ cat > bucket_props.json
147 | { "props" : {
148 | "link_index" : [{"name": "employmentIndexing",
149 | "tag" : "employs"}],
150 | }}
151 | ^D
152 | prompt$ curl -X PUT --data @bucket_props.json \
153 | -H 'Content-Type: application/json' \
154 | http://127.0.0.1:8091/riak/person
155 |
156 | Notice, that the link index also needs a `tag` property. You can
157 | install multiple index functions, but they should all have separate
158 | tags. Any `idx@...` tagged links that do not correspond to a
159 | registered link index are processed as "explicit indexing. In fact,
160 | the link_index hook is just a convenient way to have code insert the
161 | `idx@`-links for you.
162 |
163 | Now, we can add objects to the person bucket *without* having to put
164 | the `idx@employs` link on the object. The index hook will do it for
165 | you. Happy you!
166 |
167 | curl -X POST \
168 | -H 'Content-Type: application/json' \
169 | --data '{ "name": "Justin Sheehy", "employer":"Basho" }' \
170 | http://127.0.0.1:8091/riak/person
171 |
172 | > While you can have multiple `link_index`'es, it is important that
173 | each `link_index` as its own distinguished tag, because
174 | `riak_link_index` will process each link index hook by first deleting
175 | any links with said tag, and then recomputing them based on the new
176 | content.
177 |
178 |
179 | Consistency
180 | -----------
181 |
182 | The indexer will handle delete/update of your records as appropriate,
183 | and should work fine with `allow_mult` buckets too. In fact, it is
184 | recommended to enable a `allow_mult=true` on the buckets containing
185 | the company objects (company in my example above), otherwise
186 | conflicting updates may be lost.
187 |
188 | The indexer also manages conflicting updates to the link objects;
189 | which is pretty cool. Say, at the same time someone deletes some
190 | person object, and another process creates a new person object. In
191 | that case, the index object (in the company bucket) may end up with a
192 | conflicting update (i.e. get siblings); which would normally mean that
193 | someone has to take action on resolving the conflict. To manage this
194 | situation, `riak_link_index` stores a [vclock-backed
195 | set](src/vset.erl) in the content part of the index object (the
196 | company object), which is a set abstraction, which allows automatic
197 | merging based on each element in the set having its own vector clock.
198 | So, if someone adds a link, and someone else deletes a different link,
199 | then the result is quite easy to handle.
200 |
201 |
202 |
203 |
--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krestenkrab/riak_link_index/4555b4c52d298d7a9f911cb629a4f66afece17cb/rebar
--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {erl_opts, [debug_info]}.
2 | {cover_enabled, true}.
3 | {deps, [
4 | {riak_kv, "0.14.*", {git, "git://github.com/basho/riak_kv",
5 | {branch, "master"}}},
6 | {edown, ".*", {git, "git://github.com/esl/edown.git", "HEAD"}}
7 |
8 | ]}.
9 | {edoc_opts, [{doclet, edown_doclet},
10 | {src_path, ["src/"]},
11 | {subpackages, true}]}.
12 |
--------------------------------------------------------------------------------
/src/riak_column.erl:
--------------------------------------------------------------------------------
1 | %% -------------------------------------------------------------------
2 | %%
3 | %% This file is provided to you under the Apache License,
4 | %% Version 2.0 (the "License"); you may not use this file
5 | %% except in compliance with the License. You may obtain
6 | %% a copy of the License at
7 | %%
8 | %% http://www.apache.org/licenses/LICENSE-2.0
9 | %%
10 | %% Unless required by applicable law or agreed to in writing,
11 | %% software distributed under the License is distributed on an
12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | %% KIND, either express or implied. See the License for the
14 | %% specific language governing permissions and limitations
15 | %% under the License.
16 | %%
17 | %% -------------------------------------------------------------------
18 |
19 | -module(riak_column, [Storage,Bucket,ColumnName]).
20 | -author("Kresten Krab Thorup ").
21 |
22 | %%
23 | %% Based on idea by Erik Soe Soerensen described here
24 | %%
25 | %% http://polymorphictypist.blogspot.com/
26 | %% 2011/04/multi-version-collections-in-riak.html
27 | %%
28 |
29 | -export([lookup/1,add/2,put/2,put/3,delete/1,fold/2]).
30 |
31 | -record(main_group, { grouppointers=[] :: [bitstring()] }).
32 | -record(group, {entries=[] :: [entry()] }).
33 |
34 | -type vclock() :: vclock:vclock().
35 | -type riak_object() :: riak_object:riak_object().
36 |
37 | -type value() :: {vclock(),list()}.
38 | -type entry() :: {binary(), value()}.
39 |
40 |
41 | -define(GROUP_TOMBSTONE, <<"deleted_group">>).
42 | -define(VALUE_TOMBSTONE, <<"deleted_value">>).
43 |
44 | -ifdef(TEST).
45 | -define(MAX_ENTRIES_PER_GROUP, 3).
46 | -define(edbg(M,A), error_logger:info_msg(M,A)).
47 | -else.
48 | -define(MAX_ENTRIES_PER_GROUP, 100).
49 | -define(edbg(M,A), ok).
50 | -endif.
51 |
52 | %% @private
53 | group_name(GroupP) when bit_size(GroupP) =< 160 ->
54 | BitSize = bit_size(GroupP),
55 | Bits = BitSize rem 8,
56 | case Bits of
57 | 0 ->
58 | <>;
59 | _ ->
60 | <>
61 | end.
62 |
63 |
64 |
65 | -spec lookup(RowKey::binary()) -> {ok, value()} | {error, notfound}.
66 | lookup(RowKey) when is_binary(RowKey) ->
67 | {ok, #main_group{grouppointers=GroupPointers}, _} = get_main_group(),
68 | case listfind( bit_prefix_match(RowKey), GroupPointers ) of
69 | {ok, GroupP} ->
70 | ?edbg("looking up ~p (hash=~w)~nin ~p", [RowKey, crypto:sha(RowKey), group_name(GroupP)]),
71 | case lookup_in_group(GroupP,RowKey) of
72 | {ok,_}=V -> V;
73 | {error,_}=E -> E
74 | end;
75 | notfound ->
76 | {error, notfound}
77 | end.
78 |
79 | -spec fold(fun(({binary(), {vclock(), list()}}, term()) -> term), term()) -> term().
80 | fold(Fun,Acc0) ->
81 | {ok, #main_group{grouppointers=GroupPs}, _} = get_main_group(),
82 | lists:foldl(fun(GroupP,Acc) ->
83 | case get_group(GroupP) of
84 | {ok, #group{ entries=Entries }, _} ->
85 | lists:foldl(Fun, Acc, Entries);
86 | {error, notfound} ->
87 | Acc
88 | end
89 | end,
90 | Acc0,
91 | GroupPs).
92 |
93 | add(RowKey, Value) when is_binary(RowKey) ->
94 | case lookup(RowKey) of
95 | {error, notfound} ->
96 | THIS:put(RowKey, {vclock:fresh(), [Value]});
97 | {ok, {VC, OldValues}} ->
98 | update(RowKey, {vclock:increment(Storage:get_client_id(), VC), [Value| [V || V<-OldValues, V =/=Value]]})
99 | end.
100 |
101 | put(RowKey, {VC, [Value]}) when is_binary(RowKey) ->
102 | put(RowKey, {VC, [Value]}, []).
103 |
104 | put(RowKey, {VC, [Value]}, Options) ->
105 | {ok, Stored} = update(RowKey, {vclock:increment(Storage:get_client_id(), VC), [Value]}),
106 | case proplists:get_bool(returnvalue, Options) of
107 | true -> {ok, Stored};
108 | false -> ok
109 | end.
110 |
111 | -spec update(RowKey::binary(), Value::value()) -> {ok, value()}.
112 | update(RowKey, {VC,Values}) when is_list(Values) ->
113 | {ok, #main_group{grouppointers=Groups}=TheMainGroup, RObj} = get_main_group(),
114 | {ok, GroupP} = listfind( bit_prefix_match(RowKey), Groups),
115 | ?edbg("storing ~p into ~p", [RowKey, group_name(GroupP)]),
116 | case update_group(GroupP, RowKey, {VC,Values}) of
117 | {ok, [], Merged} ->
118 | %% must re-update main group to force later read repair if different
119 | update_main_group(RObj, TheMainGroup),
120 | {ok, Merged};
121 | {ok, [GP1,GP2]=SplitGroupPs, Merged} when is_bitstring(GP1), is_bitstring(GP2) ->
122 | NewMainGroup = #main_group{ grouppointers= lists:sort( SplitGroupPs ++ [R || R <- Groups, R =/= GroupP] ) },
123 | ok = update_main_group(RObj, NewMainGroup),
124 | {ok, Merged}
125 | end.
126 |
127 |
128 | update_group(RObj) ->
129 | MD = dict:store(<<"Links">>, [{{Bucket, ColumnName}, "column"}],
130 | riak_object:get_update_metadata(RObj)),
131 | Storage:put(riak_object:update_metadata(RObj, MD)).
132 |
133 |
134 | update_main_group_links(RObj) ->
135 | #main_group{grouppointers=Pointers} = riak_object:get_update_value(RObj),
136 | GroupLinks = lists:foldl(fun(GroupP,Links) ->
137 | [{{Bucket, group_name(GroupP)}, "colum_group"} | Links]
138 | end,
139 | [],
140 | Pointers),
141 | MD = dict:store(<<"Links">>, GroupLinks, riak_object:get_update_metadata(RObj)),
142 | RObj2 = riak_object:update_metadata(RObj, MD),
143 | RObj2.
144 |
145 | update_main_group(RObj,#main_group{}=MainGroup) ->
146 | RObj1 = riak_object:update_value(RObj, MainGroup),
147 | RObj2 = update_main_group_links( RObj1 ),
148 | Storage:put(RObj2).
149 |
150 | split_by_prefix(N,List) when N<160 ->
151 | split_by_prefix(N, List, {[], []}).
152 |
153 | split_by_prefix(_, [], {L0,L1}) ->
154 | {#group{entries=L0},#group{entries=L1}};
155 | split_by_prefix(Bits, [{RowKey,Value}|Rest], {L0,L1}) ->
156 | case crypto:sha(RowKey) of
157 | <<_:Bits/bitstring,0:1,_/bitstring>> ->
158 | split_by_prefix(Bits, Rest, {[{RowKey,Value}|L0], L1});
159 | <<_:Bits/bitstring,1:1,_/bitstring>> ->
160 | split_by_prefix(Bits, Rest, {L0, [{RowKey,Value}|L1]})
161 | end.
162 |
163 |
164 | update_group(GroupP, RowKey, Value) ->
165 | case get_group(GroupP) of
166 | {ok, #group{ entries=Elems }, RObj} ->
167 | case lists:keyfind(RowKey, 1, Elems) of
168 | {RowKey,OrigValue} ->
169 | Merged = merge_values(Value, OrigValue),
170 | NewEntries = lists:keyreplace(RowKey,1,Elems,{RowKey,Merged}),
171 | update_group(riak_object:update_value(RObj, #group{entries=NewEntries})),
172 | {ok, [], Merged};
173 |
174 | false ->
175 | Merged = Value,
176 | NewEntries = lists:keysort(1,[ {RowKey, Merged} | Elems ]),
177 |
178 | %% group needs splitting?
179 | if length(NewEntries) > ?MAX_ENTRIES_PER_GROUP ->
180 | {Group0,Group1} = split_by_prefix(bit_size(GroupP), NewEntries),
181 | Bits = bit_size(GroupP),
182 | GroupP0 = <>,
183 | GroupP1 = <>,
184 | ok = update_group(riak_object:new(Bucket, group_name(GroupP0), Group0)),
185 | ok = update_group(riak_object:new(Bucket, group_name(GroupP1), Group1)),
186 | ok = update_group(riak_object:update_value(RObj, ?GROUP_TOMBSTONE)),
187 | {ok, [GroupP0,GroupP1], Merged};
188 |
189 | true ->
190 | update_group(riak_object:update_value(RObj, #group{entries=NewEntries})),
191 | {ok, [], Merged}
192 | end
193 |
194 | end;
195 | {error, notfound} ->
196 | ok = update_group(riak_object:new(Bucket, group_name(GroupP), #group{entries=[{RowKey, Value}]})),
197 | {ok, [], Value}
198 | end.
199 |
200 |
201 | -spec value_result(entry()) -> {ok, value()} | {error, notfound}.
202 |
203 | value_result({_,VE}) ->
204 | case VE of
205 | {_, [?VALUE_TOMBSTONE]} ->
206 | {error, notfound};
207 | {VC,Values} ->
208 | {ok, {VC, [V || V <- Values, V =/= ?VALUE_TOMBSTONE]}}
209 | end.
210 |
211 |
212 | delete(RowKey) ->
213 | case lookup(RowKey) of
214 | {ok, {VC, _}} ->
215 | THIS:put(RowKey, {VC, [?VALUE_TOMBSTONE]});
216 | {error, notfound} ->
217 | ok
218 | end.
219 |
220 |
221 | -spec bit_prefix_match(RowKey::binary()) -> fun( (GroupP::bitstring()) -> boolean() ).
222 | bit_prefix_match(RowKey) ->
223 | KeyHash = crypto:sha(RowKey),
224 | fun(GroupP) ->
225 | Bits = bit_size(GroupP),
226 | PadBits = 160-Bits,
227 | case KeyHash of
228 | <> ->
229 | true;
230 | _ ->
231 | false
232 | end
233 | end.
234 |
235 | -spec listfind(fun((T) -> boolean()), list(T)) -> notfound | {ok, T}.
236 | listfind(_Fun, []) ->
237 | notfound;
238 | listfind(Fun, [H|T]) ->
239 | case Fun(H) of
240 | true -> {ok, H};
241 | false -> listfind(Fun, T)
242 | end.
243 |
244 | keyzip(N,Fun,TupList1,TupList2) ->
245 | keyzip(N,Fun,TupList1,TupList2,[]).
246 |
247 | %keyzip(N,TupList1,TupList2) ->
248 | % keyzip(N,fun(E1,E2) -> {E1,E2} end,TupList1,TupList2,[]).
249 |
250 | keyzip(N,Fun,[Tup1|Rest1]=L1,[Tup2|Rest2]=L2, Result) ->
251 | Key1 = element(N, Tup1),
252 | Key2 = element(N, Tup2),
253 | if
254 | Key1 =:= Key2 ->
255 | keyzip(N,Fun,Rest1,Rest2,[Fun(Tup1,Tup2)|Result]);
256 | Key1 < Key2 ->
257 | keyzip(N,Fun,Rest1,L2,[Fun(Tup1,undefined)|Result]);
258 | true ->
259 | keyzip(N,Fun,L1,Rest2,[Fun(undefined,Tup2)|Result])
260 | end.
261 |
262 | -spec get_main_group() -> {ok, #main_group{}, riak_object()} | {error, _}.
263 | get_main_group() ->
264 | case Storage:get(Bucket, ColumnName) of
265 | {error, notfound} ->
266 | Storage:put(riak_object:new(Bucket, ColumnName, #main_group{ grouppointers=[<<>>] }),
267 | [{returnbody, true}]),
268 | get_main_group();
269 | {error, E} ->
270 | {error, E};
271 | {ok, MainGroupObject} ->
272 | case riak_object:get_values(MainGroupObject) of
273 | [] ->
274 | {error, notfound};
275 | [MainGroup] ->
276 | {ok, MainGroup, MainGroupObject};
277 | MainGroups ->
278 | %% do read repair
279 | MergedMainGroup = lists:foldl(fun merge_main_groups/2, #main_group{}, MainGroups),
280 | RObj = Storage:put(update_main_group_links(riak_object:update_value(MainGroupObject,
281 | MergedMainGroup)),
282 | [{returnbody, true}]),
283 | {ok, MergedMainGroup, RObj}
284 | end
285 | end.
286 |
287 | merge_main_groups(#main_group{grouppointers=Groups1},
288 | #main_group{grouppointers=Groups2}) ->
289 | #main_group{ grouppointers=merge_grouppointers(Groups1, Groups2) }.
290 |
291 | merge_grouppointers(Groups1,Groups2) ->
292 | R = lists:umerge(Groups1,Groups2),
293 | {Dead,Alive} = compute_dead_or_live(R, {[],[]}),
294 | if Dead =:= [] -> ok;
295 | true -> read_repair_dead_groups(Dead)
296 | end,
297 | Alive.
298 |
299 | compute_dead_or_live([R1,R2|Rest], {Dead,Alive}) when bit_size(R1) < bit_size(R2) ->
300 | BS1 = bit_size(R1),
301 | case R2 of
302 | <> ->
303 | compute_dead_or_live([R2|Rest], {[R1|Dead], Alive});
304 | _ ->
305 | compute_dead_or_live([R2|Rest], {Dead, [R1|Alive]})
306 | end;
307 | compute_dead_or_live(Live, {Dead,Alive}) ->
308 | {Dead, lists:reverse(Alive, Live)}.
309 |
310 | read_repair_dead_groups([]) ->
311 | ok;
312 | read_repair_dead_groups([GroupP|Rest]) ->
313 | case get_group(GroupP) of
314 | {ok, #group{entries=Elms}, RObj} ->
315 | ok = bulk_update(Elms,binary),
316 | ok = Storage:put(riak_object:update_value(RObj,?GROUP_TOMBSTONE));
317 | {error, E} ->
318 | error_logger:info_msg("read repair failed; ignoring error: ~p", [E]),
319 | ok
320 | end,
321 | read_repair_dead_groups(Rest).
322 |
323 | %% TODO: make this work sensibly
324 | bulk_update([], _) -> ok;
325 | bulk_update([{K,BVs}|Elms], binary) ->
326 | update(K,BVs),
327 | bulk_update(Elms, binary);
328 | bulk_update([{K,VEs}|Elms], term) ->
329 | update(K,VEs),
330 | bulk_update(Elms, term).
331 |
332 | -spec merge_entries([entry()], [entry()]) -> [entry()].
333 | merge_entries(Elms1,[]) ->
334 | Elms1;
335 | merge_entries([],Elms2) ->
336 | Elms2;
337 | merge_entries(Elms1,Elms2) ->
338 | keyzip(1,
339 | fun(Elm1,undefined) ->
340 | Elm1;
341 | (undefined,Elm2) ->
342 | Elm2;
343 | (Elm1,Elm2) ->
344 | merge_entrie_pair(Elm1,Elm2)
345 | end,
346 | Elms1,
347 | Elms2).
348 |
349 | merge_entrie_pair({Key,VE1}, {Key,VE2}) ->
350 | {Key, merge_values(VE1,VE2)}.
351 |
352 | merge_values({VC1,ValueList1}=Elm1, {VC2,ValueList2}=Elm2) ->
353 | case vclock:descends(VC1,VC2) of
354 | true ->
355 | Elm1;
356 | false ->
357 | case vclock:descends(VC2,VC1) of
358 | true ->
359 | Elm2;
360 | false ->
361 | ValList = lists:umerge(lists:usort(ValueList1),lists:usort(ValueList2)),
362 | {vclock:merge(VC1,VC2),ValList}
363 | end
364 | end.
365 |
366 |
367 | -spec lookup_in_group(bitstring(),binary()) -> {ok, value()} | {error, notfound}.
368 | lookup_in_group(Groupp, RowKey) ->
369 | case get_group(Groupp) of
370 | {ok, #group{entries=Elems}, _} ->
371 | case lists:keyfind(RowKey, 1, Elems) of
372 | false ->
373 | {error, notfound};
374 | KVE ->
375 | value_result(KVE)
376 | end;
377 | {error, _}=Error ->
378 | Error
379 | end.
380 |
381 | -spec get_group(GroupP::bitstring()) -> {ok, #group{}, riak_object()} | {error, _}.
382 | get_group(GroupP) ->
383 | case Storage:get(Bucket, group_name(GroupP)) of
384 | {error, E} ->
385 | {error, E};
386 | {ok, GroupObject} ->
387 | case [ GroupData ||
388 | GroupData <- riak_object:get_values(GroupObject),
389 | GroupData =/= ?GROUP_TOMBSTONE ] of
390 |
391 | %% TODO: read-repair if sibling is deleted
392 |
393 | [] ->
394 | {error, notfound};
395 |
396 | [#group{}=Group] ->
397 | {ok, Group, GroupObject};
398 |
399 | ManyGroups ->
400 | error_logger:info_msg("ManyGroups: ~p", [ManyGroups]),
401 | NewGroup = lists:foldl(fun(#group{entries=E1},#group{entries=E2}) ->
402 | #group{entries=merge_entries(E1,E2)}
403 | end,
404 | #group{},
405 | ManyGroups),
406 |
407 | %% read repair the group
408 | {ok, RObj} =
409 | Storage:put(riak_object:update_value(GroupObject, NewGroup),
410 | [{returnbody, true}]),
411 |
412 | {ok, NewGroup, RObj}
413 | end
414 | end.
415 |
--------------------------------------------------------------------------------
/src/riak_link_index.app.src:
--------------------------------------------------------------------------------
1 | {application, riak_link_index,
2 | [{description, "Simple link-based indexing for riak"},
3 | {vsn, "0.1"},
4 | {modules, []},
5 | {registered, []},
6 | {applications, [kernel, stdlib]},
7 | {env, []}
8 | ]}.
9 |
10 |
--------------------------------------------------------------------------------
/src/riak_link_index.erl:
--------------------------------------------------------------------------------
1 | %% -------------------------------------------------------------------
2 | %%
3 | %% This file is provided to you under the Apache License,
4 | %% Version 2.0 (the "License"); you may not use this file
5 | %% except in compliance with the License. You may obtain
6 | %% a copy of the License at
7 | %%
8 | %% http://www.apache.org/licenses/LICENSE-2.0
9 | %%
10 | %% Unless required by applicable law or agreed to in writing,
11 | %% software distributed under the License is distributed on an
12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | %% KIND, either express or implied. See the License for the
14 | %% specific language governing permissions and limitations
15 | %% under the License.
16 | %%
17 | %% -------------------------------------------------------------------
18 |
19 | -module(riak_link_index).
20 | -author("Kresten Krab Thorup ").
21 |
22 | -export([precommit/1,postcommit/1]).
23 |
24 | -define(CTYPE_ERLANG_BINARY,"application/x-erlang-binary").
25 | -define(CTYPE_JSON,"application/json").
26 | -define(MD_CTYPE,<<"content-type">>).
27 | -define(MD_LINKS,<<"Links">>).
28 | -define(MD_DELETED,<<"X-Riak-Deleted">>).
29 | -define(IDX_PREFIX,"idx@").
30 | -define(JSPOOL_HOOK, riak_kv_js_hook).
31 |
32 | -ifdef(DEBUG).
33 | -define(debug(A,B),error_logger:info_msg(A,B)).
34 | -else.
35 | -define(debug(A,B),ok).
36 | -endif.
37 |
38 | -define(ENCODE_JSON,true).
39 |
40 | precommit(Object) ->
41 |
42 | ?debug("precommit in ~p", [Object]),
43 |
44 | Bucket = riak_object:bucket(Object),
45 | Key = riak_object:key(Object),
46 |
47 | %% Indexing works in two phases: precommit will use a hook to add links as
48 | %%
49 | %% ; riaktag="idx@Tag"
50 | %%
51 | %% to the object being stored. Then postcommit creates empty-contents
52 | %% objects named IBucket/IKey, with links to this object thus:
53 | %%
54 | %% ; riaktag="Tag"
55 | %%
56 |
57 | case is_updated(Object) of
58 | true ->
59 | OldLinksToMe = get_index_links(riak_object:get_metadatas(Object)),
60 | [{MD,_Value}] = index_contents(Bucket,
61 | [{ riak_object:get_update_metadata(Object),
62 | riak_object:get_update_value(Object) }]),
63 | IndexedObject = riak_object:update_metadata(Object, MD);
64 |
65 | false ->
66 | {ok, StorageMod} = riak:local_client(),
67 | case StorageMod:get(Bucket, Key) of
68 | {ok, OldRO} ->
69 | OldLinksToMe = get_index_links(riak_object:get_metadatas(OldRO));
70 | _ ->
71 | OldLinksToMe = []
72 | end,
73 | MDVs = index_contents(Bucket,
74 | riak_object:get_contents(Object)),
75 | IndexedObject = riak_object:set_contents(Object, MDVs)
76 | end,
77 |
78 | %% this only works in recent riak_kv master branch
79 | put(?MODULE, {old_links, OldLinksToMe}),
80 |
81 | ?debug("precommit out ~p", [IndexedObject]),
82 |
83 | IndexedObject.
84 |
85 | postcommit(Object) ->
86 | try
87 |
88 | case erlang:erase(?MODULE) of
89 | {old_links, OldLinksToMe} ->
90 | %% compute links to add/remove in postcommit
91 | NewLinksToMe = get_index_links(Object),
92 | LinksToRemove = ordsets:subtract(OldLinksToMe, NewLinksToMe),
93 | LinksToAdd = ordsets:subtract(NewLinksToMe, OldLinksToMe),
94 |
95 | ?debug("postcommit: old=~p, new=~p", [OldLinksToMe,NewLinksToMe]),
96 |
97 | {ok, StorageMod} = riak:local_client(),
98 | Bucket = riak_object:bucket(Object),
99 | Key = riak_object:key(Object),
100 | ClientID = StorageMod:get_client_id(),
101 | add_links(StorageMod, LinksToAdd, Bucket, Key, ClientID),
102 | remove_links(StorageMod, LinksToRemove, Bucket, Key, ClientID),
103 | ok;
104 | _ ->
105 | error_logger:error_msg("error in pre/postcommit interaction", []),
106 | ok
107 | end
108 |
109 | catch
110 | Class:Reason ->
111 | error_logger:error_msg("error in postcommit ~p:~p ~p", [Class,Reason,erlang:get_stacktrace()]),
112 | ok
113 | end
114 | .
115 |
116 | add_links(StorageMod, Links, Bucket, Key, ClientID) ->
117 | lists:foreach(fun({{IndexB,IndexK}, <>}) ->
118 | add_link(StorageMod, IndexB, IndexK, {{Bucket,Key},Tag}, ClientID)
119 | end,
120 | Links).
121 |
122 |
123 | add_link(StorageMod, Bucket, Key, Link, ClientID) ->
124 | update_links(
125 | fun(VLinkSet) ->
126 | ?debug("adding link ~p/~p -> ~p", [Bucket, Key, Link]),
127 | riak_link_set:add(Link, ClientID, VLinkSet)
128 | end,
129 | StorageMod, Bucket, Key).
130 |
131 | remove_links(StorageMod, Links, Bucket, Key, ClientID) ->
132 | lists:foreach(fun({{IndexB,IndexK}, <>}) ->
133 | remove_link(StorageMod, IndexB, IndexK, {{Bucket,Key},Tag}, ClientID)
134 | end,
135 | Links).
136 |
137 | remove_link(StorageMod, Bucket, Key, Link, ClientID) ->
138 | update_links(
139 | fun(VLinkSet) ->
140 | ?debug("removing link ~p/~p -> ~p", [Bucket, Key, Link]),
141 | riak_link_set:remove(Link, ClientID, VLinkSet)
142 | end,
143 | StorageMod, Bucket, Key).
144 |
145 | update_links(Fun,StorageMod,Bucket,Key) ->
146 | case StorageMod:get(Bucket,Key) of
147 | {ok, Object} ->
148 | ?debug("1", []),
149 | VLinkSet = decode_merge_vsets(Object),
150 | ?debug("decoded: ~p", [VLinkSet]),
151 | VLinkSet2 = Fun(VLinkSet),
152 | ?debug("transformed: ~p", [VLinkSet2]),
153 | Links = riak_link_set:values(VLinkSet2),
154 | ?debug("new links: ~p", [Links]),
155 | case ?ENCODE_JSON of
156 | true ->
157 | Data = iolist_to_binary(mochijson2:encode(riak_link_set:to_json(VLinkSet2))),
158 | CType = ?CTYPE_JSON;
159 | false ->
160 | Data = term_to_binary(VLinkSet2, [compressed]),
161 | CType = ?CTYPE_ERLANG_BINARY
162 | end,
163 | IO1 = riak_object:update_value(Object, Data),
164 | Updated = riak_object:update_metadata(IO1,
165 | dict:store(?MD_CTYPE, CType,
166 | dict:store(?MD_LINKS, Links,
167 | riak_object:get_update_metadata(IO1))));
168 | _Got ->
169 | ?debug("2: ~p from get(~p,~p)", [_Got, Bucket, Key]),
170 | VLinkSet2 = Fun(riak_link_set:new()),
171 | ?debug("new set: ~p", [VLinkSet2]),
172 | case catch (riak_link_set:values(VLinkSet2)) of
173 | Links -> ok
174 | end,
175 | ?debug("new links: ~p", [Links]),
176 | case ?ENCODE_JSON of
177 | true ->
178 | Data = iolist_to_binary(mochijson2:encode(riak_link_set:to_json(VLinkSet2))),
179 | CType = ?CTYPE_JSON;
180 | false ->
181 | Data = term_to_binary(VLinkSet2, [compressed]),
182 | CType = ?CTYPE_ERLANG_BINARY
183 | end,
184 | Updated = riak_object:new(Bucket,Key,
185 | Data,
186 | dict:from_list([{?MD_CTYPE, CType},
187 | {?MD_LINKS, Links}]))
188 | end,
189 |
190 | ?debug("storing ~p", [Updated]),
191 | ok = StorageMod:put(Updated, 1).
192 |
193 |
194 | decode_merge_vsets(Object) ->
195 | lists:foldl(fun ({MD,V},Dict) ->
196 | case dict:fetch(?MD_CTYPE, MD) of
197 | ?CTYPE_ERLANG_BINARY ->
198 | Dict2 = binary_to_term(V),
199 | riak_link_set:merge(Dict,Dict2);
200 | ?CTYPE_JSON ->
201 | Dict2 = riak_link_set:from_json(mochijson2:decode(V)),
202 | riak_link_set:merge(Dict,Dict2);
203 | _ ->
204 | Dict
205 | end
206 | end,
207 | dict:new(),
208 | riak_object:get_contents(Object)).
209 |
210 |
211 | get_index_links(MDList) ->
212 | ordsets:filter(fun({_, <>}) ->
213 | true;
214 | (_) ->
215 | false
216 | end,
217 | get_all_links(MDList)).
218 |
219 | get_all_links(Object) when element(1,Object) =:= r_object ->
220 | get_all_links
221 | (case is_updated(Object) of
222 | true ->
223 | [riak_object:get_update_metadata(Object)]
224 | ++ riak_object:get_metadatas(Object);
225 | false ->
226 | riak_object:get_metadatas(Object)
227 | end);
228 |
229 | get_all_links(MetaDatas) when is_list(MetaDatas) ->
230 | Links = lists:foldl(fun(MetaData, Acc) ->
231 | case dict:find(?MD_LINKS, MetaData) of
232 | error ->
233 | Acc;
234 | {ok, LinksList} ->
235 | LinksList ++ Acc
236 | end
237 | end,
238 | [],
239 | MetaDatas),
240 |
241 | ordsets:from_list(Links).
242 |
243 | index_contents(Bucket, Contents) ->
244 |
245 | %% grab indexes from bucket properties
246 | {ok, IndexHooks} = get_index_hooks(Bucket),
247 |
248 | ?debug("hooks are: ~p", [IndexHooks]),
249 |
250 | lists:map
251 | (fun({MD,Value}) ->
252 | case dict:find(?MD_DELETED, MD) of
253 | {ok, "true"} ->
254 | {remove_idx_links(MD),Value};
255 | _ ->
256 | NewMD = compute_indexed_md(MD, Value, IndexHooks),
257 | {NewMD, Value}
258 | end
259 | end,
260 | Contents).
261 |
262 | remove_idx_links(MD) ->
263 | %% remove any "idx#..." links
264 | case dict:find(?MD_LINKS, MD) of
265 | error ->
266 | MD;
267 | {ok, Links} ->
268 | dict:store
269 | (?MD_LINKS,
270 | lists:filter(fun({_,<>}) ->
271 | false;
272 | (_) ->
273 | true
274 | end,
275 | Links),
276 | MD)
277 | end.
278 |
279 |
280 | compute_indexed_md(MD, Value, IndexHooks) ->
281 | lists:foldl
282 | (fun({struct, PropList}=IndexHook, MDAcc) ->
283 | {<<"tag">>, Tag} = proplists:lookup(<<"tag">>, PropList),
284 | Links = case dict:find(?MD_LINKS, MDAcc) of
285 | error -> [];
286 | {ok, MDLinks} -> MDLinks
287 | end,
288 | IdxTag = <>,
289 | KeepLinks =
290 | lists:filter(fun({{_,_}, TagValue}) -> TagValue =/= IdxTag end,
291 | Links),
292 | NewLinksSansTag =
293 | try apply_index_hook(IndexHook, MD, Value) of
294 | {erlang, _, {ok, IL}} when is_list(IL) ->
295 | IL;
296 | {js, _, {ok, IL}} when is_list(IL) ->
297 | IL;
298 | _Val ->
299 | error_logger:error_msg
300 | ("indexing function returned ~p", [_Val]),
301 | []
302 | catch
303 | _:_ ->
304 | error_logger:error_msg
305 | ("exception invoking indexing function", []),
306 | []
307 | end,
308 |
309 | ResultLinks =
310 | lists:map(fun({Bucket,Key}) when is_binary(Bucket), is_binary(Key) ->
311 | {{Bucket, Key}, IdxTag};
312 | ([Bucket, Key]) when is_binary(Bucket), is_binary(Key) ->
313 | {{Bucket, Key}, IdxTag}
314 | end,
315 | NewLinksSansTag)
316 | ++
317 | KeepLinks,
318 |
319 | dict:store(?MD_LINKS, ResultLinks, MDAcc)
320 | end,
321 | MD,
322 | IndexHooks).
323 |
324 |
325 | %%%%%% code from riak_kv_put_fsm %%%%%%
326 |
327 |
328 | get_index_hooks(Bucket) ->
329 |
330 | {ok,Ring} = riak_core_ring_manager:get_my_ring(),
331 | BucketProps = riak_core_bucket:get_bucket(Bucket, Ring),
332 |
333 | IndexHooks = proplists:get_value(link_index, BucketProps, []),
334 | case IndexHooks of
335 | <<"none">> ->
336 | {ok, []};
337 | {struct, Hook} ->
338 | {ok, [{struct, Hook}]};
339 | IndexHooks when is_list(IndexHooks) ->
340 | {ok, IndexHooks};
341 | V ->
342 | error_logger:error_msg("bad value in bucket_prop ~p:link_index: ~p", [Bucket,V]),
343 | {ok, []}
344 | end.
345 |
346 |
347 | apply_index_hook({struct, Hook}, MD, Value) ->
348 | Mod = proplists:get_value(<<"mod">>, Hook),
349 | Fun = proplists:get_value(<<"fun">>, Hook),
350 | JSName = proplists:get_value(<<"name">>, Hook),
351 | invoke_hook(Mod, Fun, JSName, MD, Value);
352 | apply_index_hook(HookDef, _, _) ->
353 | {error, {invalid_hook_def, HookDef}}.
354 |
355 | invoke_hook(Mod0, Fun0, undefined, MD, Value) when Mod0 /= undefined, Fun0 /= undefined ->
356 | Mod = binary_to_atom(Mod0, utf8),
357 | Fun = binary_to_atom(Fun0, utf8),
358 | try
359 | {erlang, {Mod, Fun}, Mod:Fun(MD, Value)}
360 | catch
361 | Class:Exception ->
362 | {erlang, {Mod, Fun}, {'EXIT', Mod, Fun, Class, Exception}}
363 | end;
364 | invoke_hook(undefined, undefined, JSName, MD, Value) when JSName /= undefined ->
365 | {js, JSName, riak_kv_js_manager:blocking_dispatch
366 | (?JSPOOL_HOOK, {{jsfun, JSName}, [jsonify_metadata(MD), Value]}, 5)};
367 | invoke_hook(_, _, _, _, _) ->
368 | {error, {invalid_hook_def, no_hook}}.
369 |
370 |
371 |
372 |
373 | %%%%% code from riak_object %%%%%%
374 |
375 | jsonify_metadata(MD) ->
376 | MDJS = fun({LastMod, Now={_,_,_}}) ->
377 | % convert Now to JS-readable time string
378 | {LastMod, list_to_binary(
379 | httpd_util:rfc1123_date(
380 | calendar:now_to_local_time(Now)))};
381 | ({<<"Links">>, Links}) ->
382 | {<<"Links">>, [ [B, K, T] || {{B, K}, T} <- Links ]};
383 | ({Name, List=[_|_]}) ->
384 | {Name, jsonify_metadata_list(List)};
385 | ({Name, Value}) ->
386 | {Name, Value}
387 | end,
388 | {struct, lists:map(MDJS, dict:to_list(MD))}.
389 |
390 | %% @doc convert strings to binaries, and proplists to JSON objects
391 | jsonify_metadata_list([]) -> [];
392 | jsonify_metadata_list(List) ->
393 | Classifier = fun({Key,_}, Type) when (is_binary(Key) orelse is_list(Key)),
394 | Type /= array, Type /= string ->
395 | struct;
396 | (C, Type) when is_integer(C), C >= 0, C =< 256,
397 | Type /= array, Type /= struct ->
398 | string;
399 | (_, _) ->
400 | array
401 | end,
402 | case lists:foldl(Classifier, undefined, List) of
403 | struct -> {struct, [ {if is_list(Key) -> list_to_binary(Key);
404 | true -> Key
405 | end,
406 | if is_list(Value) -> jsonify_metadata_list(Value);
407 | true -> Value
408 | end}
409 | || {Key, Value} <- List]};
410 | string -> list_to_binary(List);
411 | array -> List
412 | end.
413 |
414 | is_updated(O) ->
415 | M = riak_object:get_update_metadata(O),
416 | V = riak_object:get_update_value(O),
417 | case dict:find(clean, M) of
418 | error -> true;
419 | {ok,_} ->
420 | case V of
421 | undefined -> false;
422 | _ -> true
423 | end
424 | end.
425 |
--------------------------------------------------------------------------------
/src/riak_link_set.erl:
--------------------------------------------------------------------------------
1 | %% -------------------------------------------------------------------
2 | %%
3 | %% This file is provided to you under the Apache License,
4 | %% Version 2.0 (the "License"); you may not use this file
5 | %% except in compliance with the License. You may obtain
6 | %% a copy of the License at
7 | %%
8 | %% http://www.apache.org/licenses/LICENSE-2.0
9 | %%
10 | %% Unless required by applicable law or agreed to in writing,
11 | %% software distributed under the License is distributed on an
12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | %% KIND, either express or implied. See the License for the
14 | %% specific language governing permissions and limitations
15 | %% under the License.
16 | %%
17 | %% -------------------------------------------------------------------
18 |
19 | %%@doc
20 | %% This is a temportal set abstraction, in which each add/remove
21 | %% operation is augmented with a vclock timestamp. Thus, it allows
22 | %% reordering (in physical time) of add and remove operation, while
23 | %% leaving only the vclock-time scale observable, and specifically
24 | %% it allows merging of concurrent updates in an orderly fashion.
25 | %%
26 | %% Essentially, a vset is a [vector
27 | %% map](http://www.javalimit.com/2011/02/the-beauty-of-vector-clocked-data.html)
28 | %% which uses the "contained values" as keys, and true and/or false
29 | %% ad the value. @end
30 |
31 | -module(riak_link_set).
32 | -author("Kresten Krab Thorup ").
33 |
34 | -export([new/0,contains/2,add/3,remove/3,merge/1,merge/2,values/1]).
35 | -export([to_json/1,from_json/1]).
36 |
37 | -ifdef(TEST).
38 | -include_lib("eunit/include/eunit.hrl").
39 | -endif.
40 |
41 | new() ->
42 | dict:new().
43 |
44 | values(VSet) ->
45 | dict:fold(fun(Value,{_VC,Bools},Acc) ->
46 | case lists:any(fun(Bool) -> Bool end, Bools) of
47 | true -> [Value|Acc];
48 | false -> Acc
49 | end
50 | end,
51 | [],
52 | VSet).
53 |
54 | contains(Value,VSet) ->
55 | lists:any(fun(Bools) -> lists:any(fun(Bool) -> Bool end, Bools) end,
56 | case dict:find(Value, VSet) of
57 | error -> [];
58 | {ok, {_VC,Bools}} -> Bools
59 | end).
60 |
61 |
62 | value_merge({VC1,V1}=O1,{VC2,V2}=O2) ->
63 | case vclock:descends(VC1,VC2) of
64 | true ->
65 | O1;
66 | false ->
67 | case vclock:descends(VC2,VC1) of
68 | true ->
69 | O2;
70 | false ->
71 | {vclock:merge(VC1,VC2), lists:usort(V1 ++ V2)}
72 | end
73 | end.
74 |
75 | merge(VSet1,VSet2) ->
76 | dict:merge(fun value_merge/2, VSet1, VSet2).
77 |
78 | merge([]) ->
79 | [];
80 | merge([S1]) ->
81 | S1;
82 | merge([S1|Rest]) ->
83 | lists:fold(fun(Set1,Set2) -> merge(Set1,Set2) end,
84 | S1,
85 | Rest).
86 |
87 | get_vclock(Value,VSet) ->
88 | case dict:find(Value,VSet) of
89 | error ->
90 | vclock:fresh();
91 | {ok, {VC,_}} ->
92 | VC
93 | end.
94 |
95 | add(Value,ClientID,VSet) ->
96 | VClock = get_vclock (Value, VSet),
97 | VC2 = vclock:increment(ClientID,VClock),
98 | dict:store(Value,{VC2,[true]}, VSet).
99 |
100 | remove(Value,ClientID,VSet) ->
101 | VClock = get_vclock (Value, VSet),
102 | VC2 = vclock:increment(ClientID,VClock),
103 | dict:store(Value,{VC2,[false]}, VSet).
104 |
105 | to_json(VSet) ->
106 | {struct, [{<<"links">>,
107 | dict:fold(fun(Link,{VClock,Bools}, Acc) ->
108 | [{struct,
109 | [
110 | {<<"link">>, link_to_json(Link)},
111 | {<<"vclock">>, vclock_to_json(VClock)},
112 | {<<"active">>,Bools}
113 | ]} |Acc]
114 | end,
115 | [],
116 | VSet)}]}.
117 |
118 | from_json({struct, [{<<"links">>, JSONLinks}]}) ->
119 | lists:foldl(fun({struct, Members}, Dict) ->
120 | {_, JSONLink} = lists:keyfind(<<"link">>, 1, Members),
121 | {_, JSONVClock} = lists:keyfind(<<"vclock">>, 1, Members),
122 | {_, Bools} = lists:keyfind(<<"active">>, 1, Members),
123 | dict:store(link_from_json(JSONLink),
124 | {vclock_from_json(JSONVClock), Bools},
125 | Dict)
126 | end,
127 | dict:new(),
128 | JSONLinks).
129 |
130 | link_from_json([Bucket,Key,Tag]) ->
131 | {{list_to_binary(mochiweb_util:unquote(Bucket)),
132 | list_to_binary(mochiweb_util:unquote(Key))},
133 | list_to_binary(mochiweb_util:unquote(Tag))}.
134 |
135 | link_to_json({{Bucket,Key},Tag}) ->
136 | [list_to_binary(mochiweb_util:quote_plus(Bucket)),
137 | list_to_binary(mochiweb_util:quote_plus(Key)),
138 | list_to_binary(mochiweb_util:quote_plus(Tag))].
139 |
140 | vclock_from_json(Base64Data) ->
141 | binary_to_term(zlib:unzip(base64:decode(Base64Data))).
142 |
143 | vclock_to_json(Clocks) ->
144 | base64:encode(zlib:zip(term_to_binary(Clocks))).
145 |
146 | -ifdef(TEST).
147 |
148 |
149 |
150 | -endif.
151 |
152 |
--------------------------------------------------------------------------------
/test/mock_kv.erl:
--------------------------------------------------------------------------------
1 | -module(mock_kv).
2 |
3 | -export([with_mock_store/5]).
4 |
5 | %%%==================== Mock store stuff:
6 | create_mock_store(Nr, ClientID, Bucket, MapRedDelay, Contents) when is_integer(Nr) ->
7 | Table1 = ets:new(content,[public]),
8 | Table2 = ets:new(meta,[public]),
9 | Instance = mock_kv_store:new(ClientID, Table1, Table2, Bucket, MapRedDelay),
10 | Instance:init(Contents),
11 | Instance.
12 |
13 | with_mock_store(Nr, Bucket, Data, Delay, Body) when is_function(Body,1) ->
14 | ClientID = list_to_binary("peer-"++integer_to_list(Nr)),
15 | MockStore = create_mock_store(Nr, ClientID, Bucket, Delay, Data),
16 | try Body(MockStore)
17 | catch
18 | Class:Reason ->
19 | All = ets:tab2list(MockStore:content_table()),
20 | error_logger:error_msg("Failed with ~p:~p~nStore= ~p", [Class,Reason,All]),
21 | erlang:raise(Class,Reason,erlang:get_stacktrace())
22 | after
23 | MockStore:stop()
24 | end.
25 |
26 |
--------------------------------------------------------------------------------
/test/mock_kv_store.erl:
--------------------------------------------------------------------------------
1 | -module(mock_kv_store, [ClientID, ContentTable, MetaTable, MainBucket, MapRedDelay]).
2 |
3 | -export([get/2, put/1, put/2, put/5, get_bucket/1, set_bucket/2, mapred_bucket_stream/3, get_client_id/0, content_table/0]).
4 |
5 | -export([init/1, stop/0]).
6 |
7 | -include_lib("eunit/include/eunit.hrl").
8 |
9 | -ifdef(TEST).
10 | -export([assertEquals/1,get_contents/0]).
11 | -endif.
12 |
13 | init(Contents) ->
14 | ets:insert(MetaTable, {{bucket_props, MainBucket}, []}),
15 |
16 | lists:foreach(fun(Obj) ->
17 | TabKey = {MainBucket, riak_object:key(Obj)},
18 | ets:insert(ContentTable, {TabKey, Obj})
19 | end,
20 | Contents),
21 | ok.
22 |
23 | content_table() ->
24 | ContentTable.
25 |
26 | get_client_id() ->
27 | ClientID.
28 |
29 | stop() ->
30 | ets:delete(ContentTable),
31 | ets:delete(MetaTable),
32 | ok.
33 |
34 | get_bucket(Bucket) ->
35 | ets:lookup_element(MetaTable, {bucket_props, Bucket}, 2).
36 |
37 | set_bucket(Bucket, NewProps) ->
38 | OldProps = get_bucket(Bucket),
39 | SumProps = lists:ukeymerge(1,
40 | lists:ukeysort(1, NewProps),
41 | lists:ukeysort(1, OldProps)),
42 | ets:insert(MetaTable, {{bucket_props, Bucket}, SumProps}),
43 | ok.
44 |
45 | get(Bucket, Key) ->
46 | case ets:lookup(ContentTable, {Bucket,Key}) of
47 | [] -> {error, notfound};
48 | [{_,Obj}] -> {ok, Obj}
49 | end.
50 |
51 | put(Obj) ->
52 | THIS:put (Obj, [{w,1},{dw,1},{timeout,1}]).
53 |
54 | put(Obj,W,DW,TimeOut,Options) ->
55 | THIS:put(Obj,[{w,W},{dw,DW},{timeout,TimeOut}|Options]).
56 |
57 | put(Obj,Options) ->
58 | Bucket = riak_object:bucket(Obj),
59 | Key = riak_object:key(Obj),
60 |
61 | % error_logger:info_msg("putting ~p", [Obj]),
62 |
63 | Updated = case is_updated(Obj) of
64 | true -> riak_object:increment_vclock(riak_object:apply_updates(Obj), ClientID);
65 | false -> Obj
66 | end,
67 |
68 | case ets:lookup(ContentTable, {Bucket,Key}) of
69 | [] ->
70 | Merged = Updated;
71 | [{_,OrigObj}] ->
72 | Merged = riak_object:reconcile([OrigObj,Updated], true)
73 | end,
74 |
75 | % error_logger:info_msg("storing ~p", [{{Bucket,Key}, Merged}]),
76 |
77 | ets:insert(ContentTable, {{Bucket,Key}, Merged}),
78 |
79 | case proplists:get_bool(returnbody, Options) of
80 | true ->
81 | {ok, Merged};
82 | false ->
83 | ok
84 | end.
85 |
86 |
87 |
88 | mapred_bucket_stream(Bucket, Query, ClientPid) ->
89 | Ref = make_ref(),
90 | spawn_link(fun() -> do_mapred_bucket_stream(Bucket, Query, ClientPid, MapRedDelay, Ref) end),
91 | {ok, Ref}.
92 |
93 | do_mapred_bucket_stream(Bucket, Query, ClientPid, MapRedDelay, Ref) ->
94 | [{map, F, none, true}] = Query,
95 | ets:foldl(fun({{ObjBucket, _}, Obj}, _) ->
96 | if ObjBucket =:= Bucket ->
97 | timer:sleep(MapRedDelay),
98 | MapResult = xapply(F, [Obj, dummyKeyData, dummyAction]),
99 | lists:foreach(fun(Res) ->
100 | ClientPid ! {flow_results, dummyPhaseID, Ref, Res}
101 | end,
102 | MapResult);
103 | true ->
104 | ok
105 | end
106 | end,
107 | dummy,
108 | ContentTable),
109 | ClientPid ! {flow_results, Ref, done}.
110 |
111 | xapply({modfun, Module, Function}, Args) ->
112 | apply(Module, Function, Args);
113 | xapply({'fun', Fun}, Args) ->
114 | apply(Fun, Args).
115 |
116 | -ifdef(TEST).
117 |
118 | assertEquals(OtherPID) ->
119 | HisObjects = OtherPID:get_contents(),
120 | MyObjects = get_contents(),
121 |
122 | length(HisObjects) == length(MyObjects).
123 |
124 |
125 | get_contents() ->
126 | mapred_bucket_stream(MainBucket,
127 | [{map, {'fun', fun(Obj,_,_) -> [Obj] end}, none, true}],
128 | self()),
129 | get_flow_contents([]).
130 |
131 | get_flow_contents(Result) ->
132 | receive
133 | {flow_results, _, _, Obj} ->
134 | get_flow_contents([Obj | Result]);
135 | {flow_results, _, done} ->
136 | Result
137 | end.
138 |
139 |
140 | -endif.
141 |
142 | is_updated(O) ->
143 | M = riak_object:get_update_metadata(O),
144 | V = riak_object:get_update_value(O),
145 | case dict:find(clean, M) of
146 | error -> true;
147 | {ok,_} ->
148 | case V of
149 | undefined -> false;
150 | _ -> true
151 | end
152 | end.
153 |
--------------------------------------------------------------------------------
/test/riak_column_tests.erl:
--------------------------------------------------------------------------------
1 | -module(riak_column_tests).
2 |
3 | -include_lib("eunit/include/eunit.hrl").
4 |
5 |
6 | simple_test() ->
7 | mock_kv:with_mock_store
8 | (1, <<"buck">>, [], 0,
9 | fun(Client) ->
10 | Column = riak_column:new(Client, <<"buck">>, <<"age">>),
11 | Column:add(<<"peter1">>, 1),
12 | Column:add(<<"peter2">>, 2),
13 | Column:add(<<"peter3">>, 3),
14 | Column:add(<<"peter4">>, 4),
15 | Column:add(<<"peter5">>, 5),
16 | Column:add(<<"peter5">>, 6),
17 | Column:add(<<"peter6">>, 6),
18 | Column:add(<<"peter7">>, 7),
19 | Column:add(<<"peter8">>, 8),
20 | Column:add(<<"peter9">>, 9),
21 | {ok, {_,[3]}} = Column:lookup(<<"peter3">>),
22 |
23 | {ok, {VClock, [6,5]}} = Column:lookup(<<"peter5">>),
24 | ok = Column:put(<<"peter5">>, {VClock, [5]}),
25 | {ok, {_, [5]}} = Column:lookup(<<"peter5">>),
26 |
27 | Values = Column:fold(fun({_Key,{_VC,[V]}}, Acc) -> [V|Acc] end, []),
28 | [1,2,3,4,5,6,7,8,9] = lists:sort(Values),
29 |
30 | All = ets:tab2list(Client:content_table()),
31 | error_logger:info_msg("Store= ~p", [All]),
32 |
33 | ok
34 | end).
35 |
36 |
--------------------------------------------------------------------------------