├── README.md ├── rebar ├── rebar.config ├── src ├── riak_column.erl ├── riak_link_index.app.src ├── riak_link_index.erl └── riak_link_set.erl └── test ├── mock_kv.erl ├── mock_kv_store.erl └── riak_column_tests.erl /README.md: -------------------------------------------------------------------------------- 1 | 2 |

Link Indexing

3 | 4 | This module allows you to create simple secondary indexes 5 | in Riak based on Riaks' link model. The basic idea is thus: 6 | 7 | Assume we model person and companies as separate buckets: 8 | 9 | /riak/person/Name 10 | /riak/company/Name 11 | 12 | When you store a `/riak/person/Kresten` object, you describe the 13 | employment relation by including this link in the Kresten object: 14 | 15 | Link: ; riaktag="idx@employs" 16 | 17 | The magic is that `riak_link_index` will then automatically add (and 18 | maintain) a link in the opposite direction; from `Trifork` to 19 | `Kresten`, and that link will have tag `employs`. The tag needs to 20 | start with `idx@` for `riak_link_index` to recognize it. 21 | 22 | Whenever you update or delete a person object, you can pass in new (or 23 | multiple) such links, and the old reverse links will automatically be 24 | deleted/updated as appropriate. Deleting a company object has no 25 | effect the other way around. 26 | 27 | > The objects that contain the reverse links (in this case 28 | e.g. `/riak/company/Trifork`) will have special content used to manage 29 | the links, so you cannot use them for other stuff! 30 | 31 | This module also allows you to install an `index_hook`, which can be 32 | used to extract links from your objects. Index hooks can be written in 33 | both JavaScript for Erlang. 34 | 35 | 36 | Installation 37 | ------------ 38 | 39 | > Notice! this only works on the `master` branch of Riak; this 40 | > does not work on `riak-0.14.*` releases, because it depends on the 41 | > pre- and post-commit hooks to both run in the same internal process 42 | > (the `riak_kv_put_fsm`, if you must know). 43 | 44 | To install, you need to make the `ebin` directory containing 45 | `riak_link_index.beam` accessible to your Riak install. You can do that 46 | by adding a line like this to riaks `etc/vm.args` 47 | 48 |
-pz /Path/to/riak_function_contrib/other/riak_link_index/ebin
49 | 50 | If you're an Erlang wiz there are other ways, but that should work. 51 | 52 | 53 | Next, you configure a bucket to support indexing. This involves two things: 54 | 55 | 1. Install a set of commit hooks (indexing needs both a pre- and a 56 | post-commit hook). 57 | 58 | 2. (optionally) configure a function to extract index information 59 | from your bucket data. We'll do that later, and start out with 60 | the easy version. 61 | 62 | If your bucket is name `person`, it could be done thus: 63 | 64 | prompt$ cat > bucket_props.json 65 | { "props" : { 66 | "precommit" : [{"mod": "riak_link_index", "fun": "precommit"}], 67 | "postcommit" : [{"mod": "riak_link_index", "fun": "postcommit"}] 68 | }} 69 | ^D 70 | prompt$ curl -X PUT --data @bucket_props.json \ 71 | -H 'Content-Type: application/json' \ 72 | http://127.0.0.1:8091/riak/person 73 | 74 | There you go: you're ready for some action. 75 | 76 | Explicit Indexing 77 | ----------------- 78 | 79 | 80 | The simple indexer now works for the `person` bucket, by interpreting 81 | links on `/riak/person/XXX` objects that have tags starting with 82 | `idx@`. The special `idx@` prefix is recognized by the indexer, and 83 | it will create and maintain a link in the opposite direction, tagged 84 | with whatever comes after the `idx@` prefix. 85 | 86 | Let's say we add me: 87 | 88 | curl -X PUT \ 89 | -H 'Link: ; riaktag="idx@employs"' \ 90 | -H 'Content-Type: application/json' \ 91 | --data '{ "name": "Kresten Krab Thorup", "employer":"Trifork" }' \ 92 | http://127.0.0.1:8091/riak/person/kresten 93 | 94 | As this gets written to Riak, the indexer will then 95 | create an object by the name of `/riak/company/Trifork`, 96 | which has a link pointing back to me: 97 | 98 | curl -v -X GET http://127.0.0.1:8091/riak/company/Trifork 99 | < 200 OK 100 | < Link: ; riaktag="employs" 101 | < Content-Length: 0 102 | 103 | If there was already an object at `/company/Trifork`, then the indexer 104 | would leave the contents alone, but still add the reverse link. If no 105 | such object existed, then it would be created with empty contents. 106 | 107 | Link Walking 108 | ------------ 109 | 110 | The beauty of this is that you can now do link-walk queries to find 111 | your stuff. For instance, this link query should give you a list of 112 | person employed at Trifork. Lucky them :-) 113 | 114 | curl http://localhost:8091/riak/company/Trifork/_,_,employs 115 | 116 | Using a `link_index` hook 117 | ------------------------- 118 | 119 | You can also install an index hook as a bucket property, which designates 120 | a function that can be used to decide which index records to create. This way 121 | you can keep the index creation on the server side; and also more easily 122 | generate some more indexes. 123 | 124 | You install the index hook the same way you install a pre-commit hook; and the 125 | hook can be written in either Erlang or JavaScript, just like precommits. 126 | 127 | // Return list of [Bucket,Key] that will link to me 128 | function employmentIndexing(metaData, contents) { 129 | personData = JSON.parse(contents); 130 | if(personData.employer) { 131 | return [ ['company', personData.employer] ]; 132 | } else { 133 | return []; 134 | } 135 | } 136 | 137 | Assume you have that code in `/tmp/js_source/my_indexer.s`, and 138 | configured `{js_source_dir, "/tmp/js_source"}` in the `riak_kv` 139 | section of your `etc/app.config`. 140 | 141 | Then, to install it as an indexer, you need to get install it as a 142 | bucket property in the person bucket. You can have more indexes, so 143 | it's a list of functions. Link-Index hooks can also be erlang 144 | functions. 145 | 146 | prompt$ cat > bucket_props.json 147 | { "props" : { 148 | "link_index" : [{"name": "employmentIndexing", 149 | "tag" : "employs"}], 150 | }} 151 | ^D 152 | prompt$ curl -X PUT --data @bucket_props.json \ 153 | -H 'Content-Type: application/json' \ 154 | http://127.0.0.1:8091/riak/person 155 | 156 | Notice, that the link index also needs a `tag` property. You can 157 | install multiple index functions, but they should all have separate 158 | tags. Any `idx@...` tagged links that do not correspond to a 159 | registered link index are processed as "explicit indexing. In fact, 160 | the link_index hook is just a convenient way to have code insert the 161 | `idx@`-links for you. 162 | 163 | Now, we can add objects to the person bucket *without* having to put 164 | the `idx@employs` link on the object. The index hook will do it for 165 | you. Happy you! 166 | 167 | curl -X POST \ 168 | -H 'Content-Type: application/json' \ 169 | --data '{ "name": "Justin Sheehy", "employer":"Basho" }' \ 170 | http://127.0.0.1:8091/riak/person 171 | 172 | > While you can have multiple `link_index`'es, it is important that 173 | each `link_index` as its own distinguished tag, because 174 | `riak_link_index` will process each link index hook by first deleting 175 | any links with said tag, and then recomputing them based on the new 176 | content. 177 | 178 | 179 | Consistency 180 | ----------- 181 | 182 | The indexer will handle delete/update of your records as appropriate, 183 | and should work fine with `allow_mult` buckets too. In fact, it is 184 | recommended to enable a `allow_mult=true` on the buckets containing 185 | the company objects (company in my example above), otherwise 186 | conflicting updates may be lost. 187 | 188 | The indexer also manages conflicting updates to the link objects; 189 | which is pretty cool. Say, at the same time someone deletes some 190 | person object, and another process creates a new person object. In 191 | that case, the index object (in the company bucket) may end up with a 192 | conflicting update (i.e. get siblings); which would normally mean that 193 | someone has to take action on resolving the conflict. To manage this 194 | situation, `riak_link_index` stores a [vclock-backed 195 | set](src/vset.erl) in the content part of the index object (the 196 | company object), which is a set abstraction, which allows automatic 197 | merging based on each element in the set having its own vector clock. 198 | So, if someone adds a link, and someone else deletes a different link, 199 | then the result is quite easy to handle. 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krestenkrab/riak_link_index/4555b4c52d298d7a9f911cb629a4f66afece17cb/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [debug_info]}. 2 | {cover_enabled, true}. 3 | {deps, [ 4 | {riak_kv, "0.14.*", {git, "git://github.com/basho/riak_kv", 5 | {branch, "master"}}}, 6 | {edown, ".*", {git, "git://github.com/esl/edown.git", "HEAD"}} 7 | 8 | ]}. 9 | {edoc_opts, [{doclet, edown_doclet}, 10 | {src_path, ["src/"]}, 11 | {subpackages, true}]}. 12 | -------------------------------------------------------------------------------- /src/riak_column.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% This file is provided to you under the Apache License, 4 | %% Version 2.0 (the "License"); you may not use this file 5 | %% except in compliance with the License. You may obtain 6 | %% a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, 11 | %% software distributed under the License is distributed on an 12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | %% KIND, either express or implied. See the License for the 14 | %% specific language governing permissions and limitations 15 | %% under the License. 16 | %% 17 | %% ------------------------------------------------------------------- 18 | 19 | -module(riak_column, [Storage,Bucket,ColumnName]). 20 | -author("Kresten Krab Thorup "). 21 | 22 | %% 23 | %% Based on idea by Erik Soe Soerensen described here 24 | %% 25 | %% http://polymorphictypist.blogspot.com/ 26 | %% 2011/04/multi-version-collections-in-riak.html 27 | %% 28 | 29 | -export([lookup/1,add/2,put/2,put/3,delete/1,fold/2]). 30 | 31 | -record(main_group, { grouppointers=[] :: [bitstring()] }). 32 | -record(group, {entries=[] :: [entry()] }). 33 | 34 | -type vclock() :: vclock:vclock(). 35 | -type riak_object() :: riak_object:riak_object(). 36 | 37 | -type value() :: {vclock(),list()}. 38 | -type entry() :: {binary(), value()}. 39 | 40 | 41 | -define(GROUP_TOMBSTONE, <<"deleted_group">>). 42 | -define(VALUE_TOMBSTONE, <<"deleted_value">>). 43 | 44 | -ifdef(TEST). 45 | -define(MAX_ENTRIES_PER_GROUP, 3). 46 | -define(edbg(M,A), error_logger:info_msg(M,A)). 47 | -else. 48 | -define(MAX_ENTRIES_PER_GROUP, 100). 49 | -define(edbg(M,A), ok). 50 | -endif. 51 | 52 | %% @private 53 | group_name(GroupP) when bit_size(GroupP) =< 160 -> 54 | BitSize = bit_size(GroupP), 55 | Bits = BitSize rem 8, 56 | case Bits of 57 | 0 -> 58 | <>; 59 | _ -> 60 | <> 61 | end. 62 | 63 | 64 | 65 | -spec lookup(RowKey::binary()) -> {ok, value()} | {error, notfound}. 66 | lookup(RowKey) when is_binary(RowKey) -> 67 | {ok, #main_group{grouppointers=GroupPointers}, _} = get_main_group(), 68 | case listfind( bit_prefix_match(RowKey), GroupPointers ) of 69 | {ok, GroupP} -> 70 | ?edbg("looking up ~p (hash=~w)~nin ~p", [RowKey, crypto:sha(RowKey), group_name(GroupP)]), 71 | case lookup_in_group(GroupP,RowKey) of 72 | {ok,_}=V -> V; 73 | {error,_}=E -> E 74 | end; 75 | notfound -> 76 | {error, notfound} 77 | end. 78 | 79 | -spec fold(fun(({binary(), {vclock(), list()}}, term()) -> term), term()) -> term(). 80 | fold(Fun,Acc0) -> 81 | {ok, #main_group{grouppointers=GroupPs}, _} = get_main_group(), 82 | lists:foldl(fun(GroupP,Acc) -> 83 | case get_group(GroupP) of 84 | {ok, #group{ entries=Entries }, _} -> 85 | lists:foldl(Fun, Acc, Entries); 86 | {error, notfound} -> 87 | Acc 88 | end 89 | end, 90 | Acc0, 91 | GroupPs). 92 | 93 | add(RowKey, Value) when is_binary(RowKey) -> 94 | case lookup(RowKey) of 95 | {error, notfound} -> 96 | THIS:put(RowKey, {vclock:fresh(), [Value]}); 97 | {ok, {VC, OldValues}} -> 98 | update(RowKey, {vclock:increment(Storage:get_client_id(), VC), [Value| [V || V<-OldValues, V =/=Value]]}) 99 | end. 100 | 101 | put(RowKey, {VC, [Value]}) when is_binary(RowKey) -> 102 | put(RowKey, {VC, [Value]}, []). 103 | 104 | put(RowKey, {VC, [Value]}, Options) -> 105 | {ok, Stored} = update(RowKey, {vclock:increment(Storage:get_client_id(), VC), [Value]}), 106 | case proplists:get_bool(returnvalue, Options) of 107 | true -> {ok, Stored}; 108 | false -> ok 109 | end. 110 | 111 | -spec update(RowKey::binary(), Value::value()) -> {ok, value()}. 112 | update(RowKey, {VC,Values}) when is_list(Values) -> 113 | {ok, #main_group{grouppointers=Groups}=TheMainGroup, RObj} = get_main_group(), 114 | {ok, GroupP} = listfind( bit_prefix_match(RowKey), Groups), 115 | ?edbg("storing ~p into ~p", [RowKey, group_name(GroupP)]), 116 | case update_group(GroupP, RowKey, {VC,Values}) of 117 | {ok, [], Merged} -> 118 | %% must re-update main group to force later read repair if different 119 | update_main_group(RObj, TheMainGroup), 120 | {ok, Merged}; 121 | {ok, [GP1,GP2]=SplitGroupPs, Merged} when is_bitstring(GP1), is_bitstring(GP2) -> 122 | NewMainGroup = #main_group{ grouppointers= lists:sort( SplitGroupPs ++ [R || R <- Groups, R =/= GroupP] ) }, 123 | ok = update_main_group(RObj, NewMainGroup), 124 | {ok, Merged} 125 | end. 126 | 127 | 128 | update_group(RObj) -> 129 | MD = dict:store(<<"Links">>, [{{Bucket, ColumnName}, "column"}], 130 | riak_object:get_update_metadata(RObj)), 131 | Storage:put(riak_object:update_metadata(RObj, MD)). 132 | 133 | 134 | update_main_group_links(RObj) -> 135 | #main_group{grouppointers=Pointers} = riak_object:get_update_value(RObj), 136 | GroupLinks = lists:foldl(fun(GroupP,Links) -> 137 | [{{Bucket, group_name(GroupP)}, "colum_group"} | Links] 138 | end, 139 | [], 140 | Pointers), 141 | MD = dict:store(<<"Links">>, GroupLinks, riak_object:get_update_metadata(RObj)), 142 | RObj2 = riak_object:update_metadata(RObj, MD), 143 | RObj2. 144 | 145 | update_main_group(RObj,#main_group{}=MainGroup) -> 146 | RObj1 = riak_object:update_value(RObj, MainGroup), 147 | RObj2 = update_main_group_links( RObj1 ), 148 | Storage:put(RObj2). 149 | 150 | split_by_prefix(N,List) when N<160 -> 151 | split_by_prefix(N, List, {[], []}). 152 | 153 | split_by_prefix(_, [], {L0,L1}) -> 154 | {#group{entries=L0},#group{entries=L1}}; 155 | split_by_prefix(Bits, [{RowKey,Value}|Rest], {L0,L1}) -> 156 | case crypto:sha(RowKey) of 157 | <<_:Bits/bitstring,0:1,_/bitstring>> -> 158 | split_by_prefix(Bits, Rest, {[{RowKey,Value}|L0], L1}); 159 | <<_:Bits/bitstring,1:1,_/bitstring>> -> 160 | split_by_prefix(Bits, Rest, {L0, [{RowKey,Value}|L1]}) 161 | end. 162 | 163 | 164 | update_group(GroupP, RowKey, Value) -> 165 | case get_group(GroupP) of 166 | {ok, #group{ entries=Elems }, RObj} -> 167 | case lists:keyfind(RowKey, 1, Elems) of 168 | {RowKey,OrigValue} -> 169 | Merged = merge_values(Value, OrigValue), 170 | NewEntries = lists:keyreplace(RowKey,1,Elems,{RowKey,Merged}), 171 | update_group(riak_object:update_value(RObj, #group{entries=NewEntries})), 172 | {ok, [], Merged}; 173 | 174 | false -> 175 | Merged = Value, 176 | NewEntries = lists:keysort(1,[ {RowKey, Merged} | Elems ]), 177 | 178 | %% group needs splitting? 179 | if length(NewEntries) > ?MAX_ENTRIES_PER_GROUP -> 180 | {Group0,Group1} = split_by_prefix(bit_size(GroupP), NewEntries), 181 | Bits = bit_size(GroupP), 182 | GroupP0 = <>, 183 | GroupP1 = <>, 184 | ok = update_group(riak_object:new(Bucket, group_name(GroupP0), Group0)), 185 | ok = update_group(riak_object:new(Bucket, group_name(GroupP1), Group1)), 186 | ok = update_group(riak_object:update_value(RObj, ?GROUP_TOMBSTONE)), 187 | {ok, [GroupP0,GroupP1], Merged}; 188 | 189 | true -> 190 | update_group(riak_object:update_value(RObj, #group{entries=NewEntries})), 191 | {ok, [], Merged} 192 | end 193 | 194 | end; 195 | {error, notfound} -> 196 | ok = update_group(riak_object:new(Bucket, group_name(GroupP), #group{entries=[{RowKey, Value}]})), 197 | {ok, [], Value} 198 | end. 199 | 200 | 201 | -spec value_result(entry()) -> {ok, value()} | {error, notfound}. 202 | 203 | value_result({_,VE}) -> 204 | case VE of 205 | {_, [?VALUE_TOMBSTONE]} -> 206 | {error, notfound}; 207 | {VC,Values} -> 208 | {ok, {VC, [V || V <- Values, V =/= ?VALUE_TOMBSTONE]}} 209 | end. 210 | 211 | 212 | delete(RowKey) -> 213 | case lookup(RowKey) of 214 | {ok, {VC, _}} -> 215 | THIS:put(RowKey, {VC, [?VALUE_TOMBSTONE]}); 216 | {error, notfound} -> 217 | ok 218 | end. 219 | 220 | 221 | -spec bit_prefix_match(RowKey::binary()) -> fun( (GroupP::bitstring()) -> boolean() ). 222 | bit_prefix_match(RowKey) -> 223 | KeyHash = crypto:sha(RowKey), 224 | fun(GroupP) -> 225 | Bits = bit_size(GroupP), 226 | PadBits = 160-Bits, 227 | case KeyHash of 228 | <> -> 229 | true; 230 | _ -> 231 | false 232 | end 233 | end. 234 | 235 | -spec listfind(fun((T) -> boolean()), list(T)) -> notfound | {ok, T}. 236 | listfind(_Fun, []) -> 237 | notfound; 238 | listfind(Fun, [H|T]) -> 239 | case Fun(H) of 240 | true -> {ok, H}; 241 | false -> listfind(Fun, T) 242 | end. 243 | 244 | keyzip(N,Fun,TupList1,TupList2) -> 245 | keyzip(N,Fun,TupList1,TupList2,[]). 246 | 247 | %keyzip(N,TupList1,TupList2) -> 248 | % keyzip(N,fun(E1,E2) -> {E1,E2} end,TupList1,TupList2,[]). 249 | 250 | keyzip(N,Fun,[Tup1|Rest1]=L1,[Tup2|Rest2]=L2, Result) -> 251 | Key1 = element(N, Tup1), 252 | Key2 = element(N, Tup2), 253 | if 254 | Key1 =:= Key2 -> 255 | keyzip(N,Fun,Rest1,Rest2,[Fun(Tup1,Tup2)|Result]); 256 | Key1 < Key2 -> 257 | keyzip(N,Fun,Rest1,L2,[Fun(Tup1,undefined)|Result]); 258 | true -> 259 | keyzip(N,Fun,L1,Rest2,[Fun(undefined,Tup2)|Result]) 260 | end. 261 | 262 | -spec get_main_group() -> {ok, #main_group{}, riak_object()} | {error, _}. 263 | get_main_group() -> 264 | case Storage:get(Bucket, ColumnName) of 265 | {error, notfound} -> 266 | Storage:put(riak_object:new(Bucket, ColumnName, #main_group{ grouppointers=[<<>>] }), 267 | [{returnbody, true}]), 268 | get_main_group(); 269 | {error, E} -> 270 | {error, E}; 271 | {ok, MainGroupObject} -> 272 | case riak_object:get_values(MainGroupObject) of 273 | [] -> 274 | {error, notfound}; 275 | [MainGroup] -> 276 | {ok, MainGroup, MainGroupObject}; 277 | MainGroups -> 278 | %% do read repair 279 | MergedMainGroup = lists:foldl(fun merge_main_groups/2, #main_group{}, MainGroups), 280 | RObj = Storage:put(update_main_group_links(riak_object:update_value(MainGroupObject, 281 | MergedMainGroup)), 282 | [{returnbody, true}]), 283 | {ok, MergedMainGroup, RObj} 284 | end 285 | end. 286 | 287 | merge_main_groups(#main_group{grouppointers=Groups1}, 288 | #main_group{grouppointers=Groups2}) -> 289 | #main_group{ grouppointers=merge_grouppointers(Groups1, Groups2) }. 290 | 291 | merge_grouppointers(Groups1,Groups2) -> 292 | R = lists:umerge(Groups1,Groups2), 293 | {Dead,Alive} = compute_dead_or_live(R, {[],[]}), 294 | if Dead =:= [] -> ok; 295 | true -> read_repair_dead_groups(Dead) 296 | end, 297 | Alive. 298 | 299 | compute_dead_or_live([R1,R2|Rest], {Dead,Alive}) when bit_size(R1) < bit_size(R2) -> 300 | BS1 = bit_size(R1), 301 | case R2 of 302 | <> -> 303 | compute_dead_or_live([R2|Rest], {[R1|Dead], Alive}); 304 | _ -> 305 | compute_dead_or_live([R2|Rest], {Dead, [R1|Alive]}) 306 | end; 307 | compute_dead_or_live(Live, {Dead,Alive}) -> 308 | {Dead, lists:reverse(Alive, Live)}. 309 | 310 | read_repair_dead_groups([]) -> 311 | ok; 312 | read_repair_dead_groups([GroupP|Rest]) -> 313 | case get_group(GroupP) of 314 | {ok, #group{entries=Elms}, RObj} -> 315 | ok = bulk_update(Elms,binary), 316 | ok = Storage:put(riak_object:update_value(RObj,?GROUP_TOMBSTONE)); 317 | {error, E} -> 318 | error_logger:info_msg("read repair failed; ignoring error: ~p", [E]), 319 | ok 320 | end, 321 | read_repair_dead_groups(Rest). 322 | 323 | %% TODO: make this work sensibly 324 | bulk_update([], _) -> ok; 325 | bulk_update([{K,BVs}|Elms], binary) -> 326 | update(K,BVs), 327 | bulk_update(Elms, binary); 328 | bulk_update([{K,VEs}|Elms], term) -> 329 | update(K,VEs), 330 | bulk_update(Elms, term). 331 | 332 | -spec merge_entries([entry()], [entry()]) -> [entry()]. 333 | merge_entries(Elms1,[]) -> 334 | Elms1; 335 | merge_entries([],Elms2) -> 336 | Elms2; 337 | merge_entries(Elms1,Elms2) -> 338 | keyzip(1, 339 | fun(Elm1,undefined) -> 340 | Elm1; 341 | (undefined,Elm2) -> 342 | Elm2; 343 | (Elm1,Elm2) -> 344 | merge_entrie_pair(Elm1,Elm2) 345 | end, 346 | Elms1, 347 | Elms2). 348 | 349 | merge_entrie_pair({Key,VE1}, {Key,VE2}) -> 350 | {Key, merge_values(VE1,VE2)}. 351 | 352 | merge_values({VC1,ValueList1}=Elm1, {VC2,ValueList2}=Elm2) -> 353 | case vclock:descends(VC1,VC2) of 354 | true -> 355 | Elm1; 356 | false -> 357 | case vclock:descends(VC2,VC1) of 358 | true -> 359 | Elm2; 360 | false -> 361 | ValList = lists:umerge(lists:usort(ValueList1),lists:usort(ValueList2)), 362 | {vclock:merge(VC1,VC2),ValList} 363 | end 364 | end. 365 | 366 | 367 | -spec lookup_in_group(bitstring(),binary()) -> {ok, value()} | {error, notfound}. 368 | lookup_in_group(Groupp, RowKey) -> 369 | case get_group(Groupp) of 370 | {ok, #group{entries=Elems}, _} -> 371 | case lists:keyfind(RowKey, 1, Elems) of 372 | false -> 373 | {error, notfound}; 374 | KVE -> 375 | value_result(KVE) 376 | end; 377 | {error, _}=Error -> 378 | Error 379 | end. 380 | 381 | -spec get_group(GroupP::bitstring()) -> {ok, #group{}, riak_object()} | {error, _}. 382 | get_group(GroupP) -> 383 | case Storage:get(Bucket, group_name(GroupP)) of 384 | {error, E} -> 385 | {error, E}; 386 | {ok, GroupObject} -> 387 | case [ GroupData || 388 | GroupData <- riak_object:get_values(GroupObject), 389 | GroupData =/= ?GROUP_TOMBSTONE ] of 390 | 391 | %% TODO: read-repair if sibling is deleted 392 | 393 | [] -> 394 | {error, notfound}; 395 | 396 | [#group{}=Group] -> 397 | {ok, Group, GroupObject}; 398 | 399 | ManyGroups -> 400 | error_logger:info_msg("ManyGroups: ~p", [ManyGroups]), 401 | NewGroup = lists:foldl(fun(#group{entries=E1},#group{entries=E2}) -> 402 | #group{entries=merge_entries(E1,E2)} 403 | end, 404 | #group{}, 405 | ManyGroups), 406 | 407 | %% read repair the group 408 | {ok, RObj} = 409 | Storage:put(riak_object:update_value(GroupObject, NewGroup), 410 | [{returnbody, true}]), 411 | 412 | {ok, NewGroup, RObj} 413 | end 414 | end. 415 | -------------------------------------------------------------------------------- /src/riak_link_index.app.src: -------------------------------------------------------------------------------- 1 | {application, riak_link_index, 2 | [{description, "Simple link-based indexing for riak"}, 3 | {vsn, "0.1"}, 4 | {modules, []}, 5 | {registered, []}, 6 | {applications, [kernel, stdlib]}, 7 | {env, []} 8 | ]}. 9 | 10 | -------------------------------------------------------------------------------- /src/riak_link_index.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% This file is provided to you under the Apache License, 4 | %% Version 2.0 (the "License"); you may not use this file 5 | %% except in compliance with the License. You may obtain 6 | %% a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, 11 | %% software distributed under the License is distributed on an 12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | %% KIND, either express or implied. See the License for the 14 | %% specific language governing permissions and limitations 15 | %% under the License. 16 | %% 17 | %% ------------------------------------------------------------------- 18 | 19 | -module(riak_link_index). 20 | -author("Kresten Krab Thorup "). 21 | 22 | -export([precommit/1,postcommit/1]). 23 | 24 | -define(CTYPE_ERLANG_BINARY,"application/x-erlang-binary"). 25 | -define(CTYPE_JSON,"application/json"). 26 | -define(MD_CTYPE,<<"content-type">>). 27 | -define(MD_LINKS,<<"Links">>). 28 | -define(MD_DELETED,<<"X-Riak-Deleted">>). 29 | -define(IDX_PREFIX,"idx@"). 30 | -define(JSPOOL_HOOK, riak_kv_js_hook). 31 | 32 | -ifdef(DEBUG). 33 | -define(debug(A,B),error_logger:info_msg(A,B)). 34 | -else. 35 | -define(debug(A,B),ok). 36 | -endif. 37 | 38 | -define(ENCODE_JSON,true). 39 | 40 | precommit(Object) -> 41 | 42 | ?debug("precommit in ~p", [Object]), 43 | 44 | Bucket = riak_object:bucket(Object), 45 | Key = riak_object:key(Object), 46 | 47 | %% Indexing works in two phases: precommit will use a hook to add links as 48 | %% 49 | %% ; riaktag="idx@Tag" 50 | %% 51 | %% to the object being stored. Then postcommit creates empty-contents 52 | %% objects named IBucket/IKey, with links to this object thus: 53 | %% 54 | %% ; riaktag="Tag" 55 | %% 56 | 57 | case is_updated(Object) of 58 | true -> 59 | OldLinksToMe = get_index_links(riak_object:get_metadatas(Object)), 60 | [{MD,_Value}] = index_contents(Bucket, 61 | [{ riak_object:get_update_metadata(Object), 62 | riak_object:get_update_value(Object) }]), 63 | IndexedObject = riak_object:update_metadata(Object, MD); 64 | 65 | false -> 66 | {ok, StorageMod} = riak:local_client(), 67 | case StorageMod:get(Bucket, Key) of 68 | {ok, OldRO} -> 69 | OldLinksToMe = get_index_links(riak_object:get_metadatas(OldRO)); 70 | _ -> 71 | OldLinksToMe = [] 72 | end, 73 | MDVs = index_contents(Bucket, 74 | riak_object:get_contents(Object)), 75 | IndexedObject = riak_object:set_contents(Object, MDVs) 76 | end, 77 | 78 | %% this only works in recent riak_kv master branch 79 | put(?MODULE, {old_links, OldLinksToMe}), 80 | 81 | ?debug("precommit out ~p", [IndexedObject]), 82 | 83 | IndexedObject. 84 | 85 | postcommit(Object) -> 86 | try 87 | 88 | case erlang:erase(?MODULE) of 89 | {old_links, OldLinksToMe} -> 90 | %% compute links to add/remove in postcommit 91 | NewLinksToMe = get_index_links(Object), 92 | LinksToRemove = ordsets:subtract(OldLinksToMe, NewLinksToMe), 93 | LinksToAdd = ordsets:subtract(NewLinksToMe, OldLinksToMe), 94 | 95 | ?debug("postcommit: old=~p, new=~p", [OldLinksToMe,NewLinksToMe]), 96 | 97 | {ok, StorageMod} = riak:local_client(), 98 | Bucket = riak_object:bucket(Object), 99 | Key = riak_object:key(Object), 100 | ClientID = StorageMod:get_client_id(), 101 | add_links(StorageMod, LinksToAdd, Bucket, Key, ClientID), 102 | remove_links(StorageMod, LinksToRemove, Bucket, Key, ClientID), 103 | ok; 104 | _ -> 105 | error_logger:error_msg("error in pre/postcommit interaction", []), 106 | ok 107 | end 108 | 109 | catch 110 | Class:Reason -> 111 | error_logger:error_msg("error in postcommit ~p:~p ~p", [Class,Reason,erlang:get_stacktrace()]), 112 | ok 113 | end 114 | . 115 | 116 | add_links(StorageMod, Links, Bucket, Key, ClientID) -> 117 | lists:foreach(fun({{IndexB,IndexK}, <>}) -> 118 | add_link(StorageMod, IndexB, IndexK, {{Bucket,Key},Tag}, ClientID) 119 | end, 120 | Links). 121 | 122 | 123 | add_link(StorageMod, Bucket, Key, Link, ClientID) -> 124 | update_links( 125 | fun(VLinkSet) -> 126 | ?debug("adding link ~p/~p -> ~p", [Bucket, Key, Link]), 127 | riak_link_set:add(Link, ClientID, VLinkSet) 128 | end, 129 | StorageMod, Bucket, Key). 130 | 131 | remove_links(StorageMod, Links, Bucket, Key, ClientID) -> 132 | lists:foreach(fun({{IndexB,IndexK}, <>}) -> 133 | remove_link(StorageMod, IndexB, IndexK, {{Bucket,Key},Tag}, ClientID) 134 | end, 135 | Links). 136 | 137 | remove_link(StorageMod, Bucket, Key, Link, ClientID) -> 138 | update_links( 139 | fun(VLinkSet) -> 140 | ?debug("removing link ~p/~p -> ~p", [Bucket, Key, Link]), 141 | riak_link_set:remove(Link, ClientID, VLinkSet) 142 | end, 143 | StorageMod, Bucket, Key). 144 | 145 | update_links(Fun,StorageMod,Bucket,Key) -> 146 | case StorageMod:get(Bucket,Key) of 147 | {ok, Object} -> 148 | ?debug("1", []), 149 | VLinkSet = decode_merge_vsets(Object), 150 | ?debug("decoded: ~p", [VLinkSet]), 151 | VLinkSet2 = Fun(VLinkSet), 152 | ?debug("transformed: ~p", [VLinkSet2]), 153 | Links = riak_link_set:values(VLinkSet2), 154 | ?debug("new links: ~p", [Links]), 155 | case ?ENCODE_JSON of 156 | true -> 157 | Data = iolist_to_binary(mochijson2:encode(riak_link_set:to_json(VLinkSet2))), 158 | CType = ?CTYPE_JSON; 159 | false -> 160 | Data = term_to_binary(VLinkSet2, [compressed]), 161 | CType = ?CTYPE_ERLANG_BINARY 162 | end, 163 | IO1 = riak_object:update_value(Object, Data), 164 | Updated = riak_object:update_metadata(IO1, 165 | dict:store(?MD_CTYPE, CType, 166 | dict:store(?MD_LINKS, Links, 167 | riak_object:get_update_metadata(IO1)))); 168 | _Got -> 169 | ?debug("2: ~p from get(~p,~p)", [_Got, Bucket, Key]), 170 | VLinkSet2 = Fun(riak_link_set:new()), 171 | ?debug("new set: ~p", [VLinkSet2]), 172 | case catch (riak_link_set:values(VLinkSet2)) of 173 | Links -> ok 174 | end, 175 | ?debug("new links: ~p", [Links]), 176 | case ?ENCODE_JSON of 177 | true -> 178 | Data = iolist_to_binary(mochijson2:encode(riak_link_set:to_json(VLinkSet2))), 179 | CType = ?CTYPE_JSON; 180 | false -> 181 | Data = term_to_binary(VLinkSet2, [compressed]), 182 | CType = ?CTYPE_ERLANG_BINARY 183 | end, 184 | Updated = riak_object:new(Bucket,Key, 185 | Data, 186 | dict:from_list([{?MD_CTYPE, CType}, 187 | {?MD_LINKS, Links}])) 188 | end, 189 | 190 | ?debug("storing ~p", [Updated]), 191 | ok = StorageMod:put(Updated, 1). 192 | 193 | 194 | decode_merge_vsets(Object) -> 195 | lists:foldl(fun ({MD,V},Dict) -> 196 | case dict:fetch(?MD_CTYPE, MD) of 197 | ?CTYPE_ERLANG_BINARY -> 198 | Dict2 = binary_to_term(V), 199 | riak_link_set:merge(Dict,Dict2); 200 | ?CTYPE_JSON -> 201 | Dict2 = riak_link_set:from_json(mochijson2:decode(V)), 202 | riak_link_set:merge(Dict,Dict2); 203 | _ -> 204 | Dict 205 | end 206 | end, 207 | dict:new(), 208 | riak_object:get_contents(Object)). 209 | 210 | 211 | get_index_links(MDList) -> 212 | ordsets:filter(fun({_, <>}) -> 213 | true; 214 | (_) -> 215 | false 216 | end, 217 | get_all_links(MDList)). 218 | 219 | get_all_links(Object) when element(1,Object) =:= r_object -> 220 | get_all_links 221 | (case is_updated(Object) of 222 | true -> 223 | [riak_object:get_update_metadata(Object)] 224 | ++ riak_object:get_metadatas(Object); 225 | false -> 226 | riak_object:get_metadatas(Object) 227 | end); 228 | 229 | get_all_links(MetaDatas) when is_list(MetaDatas) -> 230 | Links = lists:foldl(fun(MetaData, Acc) -> 231 | case dict:find(?MD_LINKS, MetaData) of 232 | error -> 233 | Acc; 234 | {ok, LinksList} -> 235 | LinksList ++ Acc 236 | end 237 | end, 238 | [], 239 | MetaDatas), 240 | 241 | ordsets:from_list(Links). 242 | 243 | index_contents(Bucket, Contents) -> 244 | 245 | %% grab indexes from bucket properties 246 | {ok, IndexHooks} = get_index_hooks(Bucket), 247 | 248 | ?debug("hooks are: ~p", [IndexHooks]), 249 | 250 | lists:map 251 | (fun({MD,Value}) -> 252 | case dict:find(?MD_DELETED, MD) of 253 | {ok, "true"} -> 254 | {remove_idx_links(MD),Value}; 255 | _ -> 256 | NewMD = compute_indexed_md(MD, Value, IndexHooks), 257 | {NewMD, Value} 258 | end 259 | end, 260 | Contents). 261 | 262 | remove_idx_links(MD) -> 263 | %% remove any "idx#..." links 264 | case dict:find(?MD_LINKS, MD) of 265 | error -> 266 | MD; 267 | {ok, Links} -> 268 | dict:store 269 | (?MD_LINKS, 270 | lists:filter(fun({_,<>}) -> 271 | false; 272 | (_) -> 273 | true 274 | end, 275 | Links), 276 | MD) 277 | end. 278 | 279 | 280 | compute_indexed_md(MD, Value, IndexHooks) -> 281 | lists:foldl 282 | (fun({struct, PropList}=IndexHook, MDAcc) -> 283 | {<<"tag">>, Tag} = proplists:lookup(<<"tag">>, PropList), 284 | Links = case dict:find(?MD_LINKS, MDAcc) of 285 | error -> []; 286 | {ok, MDLinks} -> MDLinks 287 | end, 288 | IdxTag = <>, 289 | KeepLinks = 290 | lists:filter(fun({{_,_}, TagValue}) -> TagValue =/= IdxTag end, 291 | Links), 292 | NewLinksSansTag = 293 | try apply_index_hook(IndexHook, MD, Value) of 294 | {erlang, _, {ok, IL}} when is_list(IL) -> 295 | IL; 296 | {js, _, {ok, IL}} when is_list(IL) -> 297 | IL; 298 | _Val -> 299 | error_logger:error_msg 300 | ("indexing function returned ~p", [_Val]), 301 | [] 302 | catch 303 | _:_ -> 304 | error_logger:error_msg 305 | ("exception invoking indexing function", []), 306 | [] 307 | end, 308 | 309 | ResultLinks = 310 | lists:map(fun({Bucket,Key}) when is_binary(Bucket), is_binary(Key) -> 311 | {{Bucket, Key}, IdxTag}; 312 | ([Bucket, Key]) when is_binary(Bucket), is_binary(Key) -> 313 | {{Bucket, Key}, IdxTag} 314 | end, 315 | NewLinksSansTag) 316 | ++ 317 | KeepLinks, 318 | 319 | dict:store(?MD_LINKS, ResultLinks, MDAcc) 320 | end, 321 | MD, 322 | IndexHooks). 323 | 324 | 325 | %%%%%% code from riak_kv_put_fsm %%%%%% 326 | 327 | 328 | get_index_hooks(Bucket) -> 329 | 330 | {ok,Ring} = riak_core_ring_manager:get_my_ring(), 331 | BucketProps = riak_core_bucket:get_bucket(Bucket, Ring), 332 | 333 | IndexHooks = proplists:get_value(link_index, BucketProps, []), 334 | case IndexHooks of 335 | <<"none">> -> 336 | {ok, []}; 337 | {struct, Hook} -> 338 | {ok, [{struct, Hook}]}; 339 | IndexHooks when is_list(IndexHooks) -> 340 | {ok, IndexHooks}; 341 | V -> 342 | error_logger:error_msg("bad value in bucket_prop ~p:link_index: ~p", [Bucket,V]), 343 | {ok, []} 344 | end. 345 | 346 | 347 | apply_index_hook({struct, Hook}, MD, Value) -> 348 | Mod = proplists:get_value(<<"mod">>, Hook), 349 | Fun = proplists:get_value(<<"fun">>, Hook), 350 | JSName = proplists:get_value(<<"name">>, Hook), 351 | invoke_hook(Mod, Fun, JSName, MD, Value); 352 | apply_index_hook(HookDef, _, _) -> 353 | {error, {invalid_hook_def, HookDef}}. 354 | 355 | invoke_hook(Mod0, Fun0, undefined, MD, Value) when Mod0 /= undefined, Fun0 /= undefined -> 356 | Mod = binary_to_atom(Mod0, utf8), 357 | Fun = binary_to_atom(Fun0, utf8), 358 | try 359 | {erlang, {Mod, Fun}, Mod:Fun(MD, Value)} 360 | catch 361 | Class:Exception -> 362 | {erlang, {Mod, Fun}, {'EXIT', Mod, Fun, Class, Exception}} 363 | end; 364 | invoke_hook(undefined, undefined, JSName, MD, Value) when JSName /= undefined -> 365 | {js, JSName, riak_kv_js_manager:blocking_dispatch 366 | (?JSPOOL_HOOK, {{jsfun, JSName}, [jsonify_metadata(MD), Value]}, 5)}; 367 | invoke_hook(_, _, _, _, _) -> 368 | {error, {invalid_hook_def, no_hook}}. 369 | 370 | 371 | 372 | 373 | %%%%% code from riak_object %%%%%% 374 | 375 | jsonify_metadata(MD) -> 376 | MDJS = fun({LastMod, Now={_,_,_}}) -> 377 | % convert Now to JS-readable time string 378 | {LastMod, list_to_binary( 379 | httpd_util:rfc1123_date( 380 | calendar:now_to_local_time(Now)))}; 381 | ({<<"Links">>, Links}) -> 382 | {<<"Links">>, [ [B, K, T] || {{B, K}, T} <- Links ]}; 383 | ({Name, List=[_|_]}) -> 384 | {Name, jsonify_metadata_list(List)}; 385 | ({Name, Value}) -> 386 | {Name, Value} 387 | end, 388 | {struct, lists:map(MDJS, dict:to_list(MD))}. 389 | 390 | %% @doc convert strings to binaries, and proplists to JSON objects 391 | jsonify_metadata_list([]) -> []; 392 | jsonify_metadata_list(List) -> 393 | Classifier = fun({Key,_}, Type) when (is_binary(Key) orelse is_list(Key)), 394 | Type /= array, Type /= string -> 395 | struct; 396 | (C, Type) when is_integer(C), C >= 0, C =< 256, 397 | Type /= array, Type /= struct -> 398 | string; 399 | (_, _) -> 400 | array 401 | end, 402 | case lists:foldl(Classifier, undefined, List) of 403 | struct -> {struct, [ {if is_list(Key) -> list_to_binary(Key); 404 | true -> Key 405 | end, 406 | if is_list(Value) -> jsonify_metadata_list(Value); 407 | true -> Value 408 | end} 409 | || {Key, Value} <- List]}; 410 | string -> list_to_binary(List); 411 | array -> List 412 | end. 413 | 414 | is_updated(O) -> 415 | M = riak_object:get_update_metadata(O), 416 | V = riak_object:get_update_value(O), 417 | case dict:find(clean, M) of 418 | error -> true; 419 | {ok,_} -> 420 | case V of 421 | undefined -> false; 422 | _ -> true 423 | end 424 | end. 425 | -------------------------------------------------------------------------------- /src/riak_link_set.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% This file is provided to you under the Apache License, 4 | %% Version 2.0 (the "License"); you may not use this file 5 | %% except in compliance with the License. You may obtain 6 | %% a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, 11 | %% software distributed under the License is distributed on an 12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | %% KIND, either express or implied. See the License for the 14 | %% specific language governing permissions and limitations 15 | %% under the License. 16 | %% 17 | %% ------------------------------------------------------------------- 18 | 19 | %%@doc 20 | %% This is a temportal set abstraction, in which each add/remove 21 | %% operation is augmented with a vclock timestamp. Thus, it allows 22 | %% reordering (in physical time) of add and remove operation, while 23 | %% leaving only the vclock-time scale observable, and specifically 24 | %% it allows merging of concurrent updates in an orderly fashion. 25 | %% 26 | %% Essentially, a vset is a [vector 27 | %% map](http://www.javalimit.com/2011/02/the-beauty-of-vector-clocked-data.html) 28 | %% which uses the "contained values" as keys, and true and/or false 29 | %% ad the value. @end 30 | 31 | -module(riak_link_set). 32 | -author("Kresten Krab Thorup "). 33 | 34 | -export([new/0,contains/2,add/3,remove/3,merge/1,merge/2,values/1]). 35 | -export([to_json/1,from_json/1]). 36 | 37 | -ifdef(TEST). 38 | -include_lib("eunit/include/eunit.hrl"). 39 | -endif. 40 | 41 | new() -> 42 | dict:new(). 43 | 44 | values(VSet) -> 45 | dict:fold(fun(Value,{_VC,Bools},Acc) -> 46 | case lists:any(fun(Bool) -> Bool end, Bools) of 47 | true -> [Value|Acc]; 48 | false -> Acc 49 | end 50 | end, 51 | [], 52 | VSet). 53 | 54 | contains(Value,VSet) -> 55 | lists:any(fun(Bools) -> lists:any(fun(Bool) -> Bool end, Bools) end, 56 | case dict:find(Value, VSet) of 57 | error -> []; 58 | {ok, {_VC,Bools}} -> Bools 59 | end). 60 | 61 | 62 | value_merge({VC1,V1}=O1,{VC2,V2}=O2) -> 63 | case vclock:descends(VC1,VC2) of 64 | true -> 65 | O1; 66 | false -> 67 | case vclock:descends(VC2,VC1) of 68 | true -> 69 | O2; 70 | false -> 71 | {vclock:merge(VC1,VC2), lists:usort(V1 ++ V2)} 72 | end 73 | end. 74 | 75 | merge(VSet1,VSet2) -> 76 | dict:merge(fun value_merge/2, VSet1, VSet2). 77 | 78 | merge([]) -> 79 | []; 80 | merge([S1]) -> 81 | S1; 82 | merge([S1|Rest]) -> 83 | lists:fold(fun(Set1,Set2) -> merge(Set1,Set2) end, 84 | S1, 85 | Rest). 86 | 87 | get_vclock(Value,VSet) -> 88 | case dict:find(Value,VSet) of 89 | error -> 90 | vclock:fresh(); 91 | {ok, {VC,_}} -> 92 | VC 93 | end. 94 | 95 | add(Value,ClientID,VSet) -> 96 | VClock = get_vclock (Value, VSet), 97 | VC2 = vclock:increment(ClientID,VClock), 98 | dict:store(Value,{VC2,[true]}, VSet). 99 | 100 | remove(Value,ClientID,VSet) -> 101 | VClock = get_vclock (Value, VSet), 102 | VC2 = vclock:increment(ClientID,VClock), 103 | dict:store(Value,{VC2,[false]}, VSet). 104 | 105 | to_json(VSet) -> 106 | {struct, [{<<"links">>, 107 | dict:fold(fun(Link,{VClock,Bools}, Acc) -> 108 | [{struct, 109 | [ 110 | {<<"link">>, link_to_json(Link)}, 111 | {<<"vclock">>, vclock_to_json(VClock)}, 112 | {<<"active">>,Bools} 113 | ]} |Acc] 114 | end, 115 | [], 116 | VSet)}]}. 117 | 118 | from_json({struct, [{<<"links">>, JSONLinks}]}) -> 119 | lists:foldl(fun({struct, Members}, Dict) -> 120 | {_, JSONLink} = lists:keyfind(<<"link">>, 1, Members), 121 | {_, JSONVClock} = lists:keyfind(<<"vclock">>, 1, Members), 122 | {_, Bools} = lists:keyfind(<<"active">>, 1, Members), 123 | dict:store(link_from_json(JSONLink), 124 | {vclock_from_json(JSONVClock), Bools}, 125 | Dict) 126 | end, 127 | dict:new(), 128 | JSONLinks). 129 | 130 | link_from_json([Bucket,Key,Tag]) -> 131 | {{list_to_binary(mochiweb_util:unquote(Bucket)), 132 | list_to_binary(mochiweb_util:unquote(Key))}, 133 | list_to_binary(mochiweb_util:unquote(Tag))}. 134 | 135 | link_to_json({{Bucket,Key},Tag}) -> 136 | [list_to_binary(mochiweb_util:quote_plus(Bucket)), 137 | list_to_binary(mochiweb_util:quote_plus(Key)), 138 | list_to_binary(mochiweb_util:quote_plus(Tag))]. 139 | 140 | vclock_from_json(Base64Data) -> 141 | binary_to_term(zlib:unzip(base64:decode(Base64Data))). 142 | 143 | vclock_to_json(Clocks) -> 144 | base64:encode(zlib:zip(term_to_binary(Clocks))). 145 | 146 | -ifdef(TEST). 147 | 148 | 149 | 150 | -endif. 151 | 152 | -------------------------------------------------------------------------------- /test/mock_kv.erl: -------------------------------------------------------------------------------- 1 | -module(mock_kv). 2 | 3 | -export([with_mock_store/5]). 4 | 5 | %%%==================== Mock store stuff: 6 | create_mock_store(Nr, ClientID, Bucket, MapRedDelay, Contents) when is_integer(Nr) -> 7 | Table1 = ets:new(content,[public]), 8 | Table2 = ets:new(meta,[public]), 9 | Instance = mock_kv_store:new(ClientID, Table1, Table2, Bucket, MapRedDelay), 10 | Instance:init(Contents), 11 | Instance. 12 | 13 | with_mock_store(Nr, Bucket, Data, Delay, Body) when is_function(Body,1) -> 14 | ClientID = list_to_binary("peer-"++integer_to_list(Nr)), 15 | MockStore = create_mock_store(Nr, ClientID, Bucket, Delay, Data), 16 | try Body(MockStore) 17 | catch 18 | Class:Reason -> 19 | All = ets:tab2list(MockStore:content_table()), 20 | error_logger:error_msg("Failed with ~p:~p~nStore= ~p", [Class,Reason,All]), 21 | erlang:raise(Class,Reason,erlang:get_stacktrace()) 22 | after 23 | MockStore:stop() 24 | end. 25 | 26 | -------------------------------------------------------------------------------- /test/mock_kv_store.erl: -------------------------------------------------------------------------------- 1 | -module(mock_kv_store, [ClientID, ContentTable, MetaTable, MainBucket, MapRedDelay]). 2 | 3 | -export([get/2, put/1, put/2, put/5, get_bucket/1, set_bucket/2, mapred_bucket_stream/3, get_client_id/0, content_table/0]). 4 | 5 | -export([init/1, stop/0]). 6 | 7 | -include_lib("eunit/include/eunit.hrl"). 8 | 9 | -ifdef(TEST). 10 | -export([assertEquals/1,get_contents/0]). 11 | -endif. 12 | 13 | init(Contents) -> 14 | ets:insert(MetaTable, {{bucket_props, MainBucket}, []}), 15 | 16 | lists:foreach(fun(Obj) -> 17 | TabKey = {MainBucket, riak_object:key(Obj)}, 18 | ets:insert(ContentTable, {TabKey, Obj}) 19 | end, 20 | Contents), 21 | ok. 22 | 23 | content_table() -> 24 | ContentTable. 25 | 26 | get_client_id() -> 27 | ClientID. 28 | 29 | stop() -> 30 | ets:delete(ContentTable), 31 | ets:delete(MetaTable), 32 | ok. 33 | 34 | get_bucket(Bucket) -> 35 | ets:lookup_element(MetaTable, {bucket_props, Bucket}, 2). 36 | 37 | set_bucket(Bucket, NewProps) -> 38 | OldProps = get_bucket(Bucket), 39 | SumProps = lists:ukeymerge(1, 40 | lists:ukeysort(1, NewProps), 41 | lists:ukeysort(1, OldProps)), 42 | ets:insert(MetaTable, {{bucket_props, Bucket}, SumProps}), 43 | ok. 44 | 45 | get(Bucket, Key) -> 46 | case ets:lookup(ContentTable, {Bucket,Key}) of 47 | [] -> {error, notfound}; 48 | [{_,Obj}] -> {ok, Obj} 49 | end. 50 | 51 | put(Obj) -> 52 | THIS:put (Obj, [{w,1},{dw,1},{timeout,1}]). 53 | 54 | put(Obj,W,DW,TimeOut,Options) -> 55 | THIS:put(Obj,[{w,W},{dw,DW},{timeout,TimeOut}|Options]). 56 | 57 | put(Obj,Options) -> 58 | Bucket = riak_object:bucket(Obj), 59 | Key = riak_object:key(Obj), 60 | 61 | % error_logger:info_msg("putting ~p", [Obj]), 62 | 63 | Updated = case is_updated(Obj) of 64 | true -> riak_object:increment_vclock(riak_object:apply_updates(Obj), ClientID); 65 | false -> Obj 66 | end, 67 | 68 | case ets:lookup(ContentTable, {Bucket,Key}) of 69 | [] -> 70 | Merged = Updated; 71 | [{_,OrigObj}] -> 72 | Merged = riak_object:reconcile([OrigObj,Updated], true) 73 | end, 74 | 75 | % error_logger:info_msg("storing ~p", [{{Bucket,Key}, Merged}]), 76 | 77 | ets:insert(ContentTable, {{Bucket,Key}, Merged}), 78 | 79 | case proplists:get_bool(returnbody, Options) of 80 | true -> 81 | {ok, Merged}; 82 | false -> 83 | ok 84 | end. 85 | 86 | 87 | 88 | mapred_bucket_stream(Bucket, Query, ClientPid) -> 89 | Ref = make_ref(), 90 | spawn_link(fun() -> do_mapred_bucket_stream(Bucket, Query, ClientPid, MapRedDelay, Ref) end), 91 | {ok, Ref}. 92 | 93 | do_mapred_bucket_stream(Bucket, Query, ClientPid, MapRedDelay, Ref) -> 94 | [{map, F, none, true}] = Query, 95 | ets:foldl(fun({{ObjBucket, _}, Obj}, _) -> 96 | if ObjBucket =:= Bucket -> 97 | timer:sleep(MapRedDelay), 98 | MapResult = xapply(F, [Obj, dummyKeyData, dummyAction]), 99 | lists:foreach(fun(Res) -> 100 | ClientPid ! {flow_results, dummyPhaseID, Ref, Res} 101 | end, 102 | MapResult); 103 | true -> 104 | ok 105 | end 106 | end, 107 | dummy, 108 | ContentTable), 109 | ClientPid ! {flow_results, Ref, done}. 110 | 111 | xapply({modfun, Module, Function}, Args) -> 112 | apply(Module, Function, Args); 113 | xapply({'fun', Fun}, Args) -> 114 | apply(Fun, Args). 115 | 116 | -ifdef(TEST). 117 | 118 | assertEquals(OtherPID) -> 119 | HisObjects = OtherPID:get_contents(), 120 | MyObjects = get_contents(), 121 | 122 | length(HisObjects) == length(MyObjects). 123 | 124 | 125 | get_contents() -> 126 | mapred_bucket_stream(MainBucket, 127 | [{map, {'fun', fun(Obj,_,_) -> [Obj] end}, none, true}], 128 | self()), 129 | get_flow_contents([]). 130 | 131 | get_flow_contents(Result) -> 132 | receive 133 | {flow_results, _, _, Obj} -> 134 | get_flow_contents([Obj | Result]); 135 | {flow_results, _, done} -> 136 | Result 137 | end. 138 | 139 | 140 | -endif. 141 | 142 | is_updated(O) -> 143 | M = riak_object:get_update_metadata(O), 144 | V = riak_object:get_update_value(O), 145 | case dict:find(clean, M) of 146 | error -> true; 147 | {ok,_} -> 148 | case V of 149 | undefined -> false; 150 | _ -> true 151 | end 152 | end. 153 | -------------------------------------------------------------------------------- /test/riak_column_tests.erl: -------------------------------------------------------------------------------- 1 | -module(riak_column_tests). 2 | 3 | -include_lib("eunit/include/eunit.hrl"). 4 | 5 | 6 | simple_test() -> 7 | mock_kv:with_mock_store 8 | (1, <<"buck">>, [], 0, 9 | fun(Client) -> 10 | Column = riak_column:new(Client, <<"buck">>, <<"age">>), 11 | Column:add(<<"peter1">>, 1), 12 | Column:add(<<"peter2">>, 2), 13 | Column:add(<<"peter3">>, 3), 14 | Column:add(<<"peter4">>, 4), 15 | Column:add(<<"peter5">>, 5), 16 | Column:add(<<"peter5">>, 6), 17 | Column:add(<<"peter6">>, 6), 18 | Column:add(<<"peter7">>, 7), 19 | Column:add(<<"peter8">>, 8), 20 | Column:add(<<"peter9">>, 9), 21 | {ok, {_,[3]}} = Column:lookup(<<"peter3">>), 22 | 23 | {ok, {VClock, [6,5]}} = Column:lookup(<<"peter5">>), 24 | ok = Column:put(<<"peter5">>, {VClock, [5]}), 25 | {ok, {_, [5]}} = Column:lookup(<<"peter5">>), 26 | 27 | Values = Column:fold(fun({_Key,{_VC,[V]}}, Acc) -> [V|Acc] end, []), 28 | [1,2,3,4,5,6,7,8,9] = lists:sort(Values), 29 | 30 | All = ets:tab2list(Client:content_table()), 31 | error_logger:info_msg("Store= ~p", [All]), 32 | 33 | ok 34 | end). 35 | 36 | --------------------------------------------------------------------------------