├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── config └── config.exs ├── lib ├── adap.ex ├── joiner.ex ├── piper.ex ├── stream.ex └── unit.ex ├── mix.exs ├── mix.lock └── test ├── adap_test.exs └── test_helper.exs /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /deps 3 | erl_crash.dump 4 | *.ez 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: elixir 2 | elixir: 3 | - 1.0.4 4 | notifications: 5 | recipients: 6 | - arnaud.wetzel@kbrwadventure.com 7 | otp_release: 8 | - 17.3 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Arnaud Wetzel 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ADAP 2 | ## Awesome (big) Data Augmentation Pipeline 3 | 4 | Create a data stream across your information systems to query, 5 | augment and transform data according to Elixir matching rules. 6 | 7 | [![Build Status](https://travis-ci.org/kbrw/adap.svg?branch=master)](https://travis-ci.org/kbrw/adap) 8 | 9 | See the [generated documentation](http://hexdocs.pm/adap) for more detailed explanations. 10 | 11 | The principle is: 12 | 13 | - to make each element hop from node to node in order to be processed 14 | using the locally present data. 15 | - that any node at any time can emit new elements in the pipeline stream 16 | - to construct processing units on each node on demand. They can die at any time 17 | to free memory or because of an exception: they will be restarted on demand. 18 | - to pull elements by chunk in order to allow long processing time 19 | without the need of any back-pressure mechanism. 20 | 21 | Let's see a processing pipe example: 22 | 23 | - the input is a product stream : stream of `{:product,%{field1: value1, field2: value2}}` 24 | - `user@jsonserver1` contains a json file "/color.json" containing a COLOR mapping 25 | - `user@jsonserver2` contains a json file "/size.json" containing a SIZE mapping 26 | - you want to map product color and size according to these mappings 27 | - you want to add a field "deleted" when the mapped color is red 28 | 29 | ```elixir 30 | Adap.Piper.defpipe ColorPipe, [{ColorPipe.Rules,[]}] 31 | defmodule JSONMap do 32 | use Adap.Unit.Simple, ttl: 1_000 33 | def init(mapping), do: 34 | {:ok,File.read!("/#{mapping}.json") |> JSON.decode!} 35 | def node("color"), do: :"user@jsonserver1" 36 | def node("size"), do: :"user@jsonserver2" 37 | end 38 | defmodule ColorPipe.Rules do 39 | use Adap.Piper, for: :product 40 | defrule map_color(%{color: color}=prod,_) do 41 | {JSONMap,"color"},color_map-> 42 | %{prod| color: color_map[color]} 43 | end 44 | defrule map_size(%{size: size}=prod,_) do 45 | {JSONMap,"size"},size_map-> 46 | %{prod| size: size_map[size]} 47 | end 48 | defrule red_is_deleted(%{color: "red"}=prod,_) do 49 | Dict.put(prod,:deleted,true) 50 | end 51 | end 52 | result = [ 53 | {:product,%{gender: "male", category: "ipad"}}, 54 | {:product,%{color: "carmine", category: "shirt"}}, 55 | {:product,%{color: "periwinkle", size: "xxl"}} 56 | ] |> Adap.Stream.new(ColorPipe) |> Enum.to_list 57 | assert result == [ 58 | {:product,%{gender: "male", category: "ipad"}}, 59 | {:product,%{color: "red", category: "shirt", deleted: true}}, 60 | {:product,%{color: "blue", size: "large"}} 61 | ] 62 | ``` 63 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | use Mix.Config 2 | 3 | -------------------------------------------------------------------------------- /lib/adap.ex: -------------------------------------------------------------------------------- 1 | defmodule Adap do 2 | @moduledoc """ 3 | # ADAP: the Awesome Data Augmentation Pipeline 4 | 5 | This library allows you to create a data processing stream where elements 6 | will go accross nodes applying data processing rules to include data, emit 7 | new elements, or modify it according to locally present data. 8 | 9 | - `Adap.Stream` will create the stream taking an input stream and a 10 | module implementing `do_emit/2` to define the processing pipeline of each 11 | element. 12 | - `Adap.Unit` are processes started on demand where elements can be routed to 13 | use locally constructed datas 14 | - `Adap.Piper` allows to create a data processing pipeline (module implementing `do_emit/2`) as : 15 | - successive matching rules 16 | - external dependencies for each rule as a `Adap.Unit` spec 17 | 18 | See an example usage in `Adap.Piper`. 19 | """ 20 | use Application; import Supervisor.Spec 21 | @doc false 22 | def start(_,_), do: 23 | Supervisor.start_link([ 24 | worker(Adap.Unit.Router,[]) 25 | ], strategy: :one_for_one) 26 | end 27 | -------------------------------------------------------------------------------- /lib/joiner.ex: -------------------------------------------------------------------------------- 1 | defmodule Adap.Joiner do 2 | 3 | @doc """ 4 | Make a stream wich reduces input elements joining them according to specified key pattern. 5 | The principle is to keep a fixed length queue of elements waiting to 6 | receive joined elements. 7 | 8 | This is for stream of elements where order is unknown, but elements to join 9 | are supposed to be close. 10 | 11 | - each element of `enum` must be like `{:sometype,elem}` 12 | - you want to merge elements of type `from_type` into element of type `to_type` 13 | - `opts[:fk_from]` must contain an anonymous function taking an element 14 | """ 15 | def join(enum,from_type,to_type, opts \\ []) do 16 | opts = set_default_opts(opts,from_type,to_type) 17 | enum |> Stream.concat([:last]) |> Stream.transform({HashDict.new,:queue.new,0}, fn 18 | :last, {tolink,queue,_}-> 19 | {elems,tolink} = Enum.reduce(:queue.to_list(queue),{[],tolink}, fn e,{elems,tolink}-> 20 | {e,tolink} = merge(e,tolink,opts) 21 | {[{to_type,e}|elems],tolink} 22 | end) 23 | IO.puts "end join, #{Enum.count(tolink)} elements failed to join and are ignored" 24 | {elems,nil} 25 | {type,obj_from}, {tolink,queue,count} when type == from_type-> 26 | if (fk=opts.fk_from.(obj_from)) do 27 | tolink = Dict.update(tolink,fk,[obj_from],& [obj_from|&1]) 28 | {if(opts.keep, do: [{from_type,obj_from}], else: []), {tolink,queue,count}} 29 | else 30 | {[{from_type,obj_from}],{tolink,queue,count}} 31 | end 32 | {type,obj_to}, {tolink,queue,count} when type == to_type-> 33 | {queue,count} = {:queue.in(obj_to,queue),count+1} 34 | if count > opts.queue_len do 35 | {{{:value,obj_to_merge},queue},count} = {:queue.out(queue),count-1} 36 | {obj,tolink} = merge(obj_to_merge,tolink,opts) 37 | {[{to_type,obj}],{tolink,queue,count}} 38 | else 39 | {[],{tolink,queue,count}} 40 | end 41 | {type,obj}, acc->{[{type,obj}],acc} 42 | end) 43 | end 44 | 45 | defp set_default_opts(opts,from_type,to_type) do 46 | from_types = :"#{from_type}s" 47 | %{fk_from: opts[:fk_from] || &(&1[to_type]), 48 | fk_to: opts[:fk_to] || &(&1.id), 49 | keep: opts[:keep] || false, 50 | reducer: opts[:reducer] || fn from_obj,to_obj-> Dict.update(to_obj,from_types,[from_obj],& [from_obj|&1]) end, 51 | queue_len: opts[:queue_len] || 10} 52 | end 53 | 54 | defp merge(obj,tolink,opts) do 55 | {objs_tolink,tolink} = Dict.pop(tolink,opts.fk_to.(obj),[]) 56 | {Enum.reduce(objs_tolink,obj,opts.reducer), tolink} 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/piper.ex: -------------------------------------------------------------------------------- 1 | defmodule Adap.Piper do 2 | @moduledoc ~S""" 3 | Piper proposes an implementation of `Adap.Stream.Emitter` where 4 | the distributed processing of each element is defined as a 5 | succession of matching rules. 6 | 7 | Each rule can use external data to process the element or emit new 8 | ones. When external data is needed, a process is spawned on the node 9 | containing it, will receive the element and continue to apply rules. 10 | 11 | The principle is to make each element hop from node to node in 12 | order to be processed using the locally present data. 13 | 14 | The element will go to the stream sink when no more rule matches. 15 | 16 | The `Adap.Stream` stream data by chunk, so that the 17 | construction of the external state server can take as much time as 18 | necessary without congestion: never more than the chunk size number 19 | of elements will be queued. 20 | 21 | Let's see a processing pipe example: 22 | 23 | - the input is a product stream : stream of `{:product,%{field1: value1, field2: value2}}` 24 | - `user@jsonserver1` contains a json file "/color.json" containing a COLOR mapping 25 | - `user@jsonserver2` contains a json file "/size.json" containing a SIZE mapping 26 | - you want to map product color and size according to these mappings 27 | - you want to add a field "deleted" when the mapped color is red 28 | 29 | This can be implemented using: 30 | 31 | iex> Adap.Piper.defpipe ColorPipe, [{ColorPipe.Rules,[]}] 32 | iex> defmodule JSONMap do 33 | iex> use Adap.StateServer, ttl: 1_000 34 | iex> def init(mapping) do 35 | iex> {:ok,File.read!("/#{mapping}.json") |> JSON.decode!} 36 | iex> end 37 | iex> def node("color") do :"user@jsonserver1" end 38 | iex> def node("size") do :"user@jsonserver2" end 39 | iex> end 40 | iex> defmodule ColorPipe.Rules do 41 | iex> use Adap.Piper, for: :product 42 | iex> defrule map_color(%{color: color}=prod,_) do 43 | iex> {JSONMap,"color"},color_map-> 44 | iex> %{prod| color: color_map[color]} 45 | iex> end 46 | iex> defrule map_size(%{size: size}=prod,_) do 47 | iex> {JSONMap,"size"},size_map-> 48 | iex> %{prod| size: size_map[size]} 49 | iex> end 50 | iex> defrule red_is_deleted(%{color: "red"}=prod,_) do 51 | iex> Dict.put(prod,:deleted,true) 52 | iex> end 53 | iex> end 54 | iex> [ 55 | iex> {:product,%{gender: "male", category: "ipad"}}, 56 | iex> {:product,%{color: "carmine", category: "shirt"}}, 57 | iex> {:product,%{color: "periwinkle", size: "xxl"}} 58 | iex> ] |> Adap.Stream.new(ColorPipe) |> Enum.to_list 59 | [{:product,%{gender: "male", category: "ipad"}}, 60 | {:product,%{color: "red", category: "shirt", deleted: true}}, 61 | {:product,%{color: "blue", size: "large"}}] 62 | """ 63 | 64 | @doc false 65 | def next(type,elem,[{next,args}|nexts],sink), do: 66 | next.pipe(type,elem,args,nexts,sink) 67 | def next(type,elem,[],sink), do: 68 | Adap.Stream.done(sink,{type,elem}) 69 | 70 | @doc false 71 | def wrap_result(sink,{:emit,elems},prev_elem,prev_state), do: 72 | (Adap.Stream.emit(sink,elems); {prev_elem,prev_state}) 73 | def wrap_result(sink,{:emit,elems,elem},_prev_elem,prev_state), do: 74 | (Adap.Stream.emit(sink,elems); {elem,prev_state}) 75 | def wrap_result(sink,{:emit,elems,elem,state},_prev_elem,_prev_state), do: 76 | (Adap.Stream.emit(sink,elems); {elem,state}) 77 | def wrap_result(_sink,{:newstate,state},prev_elem,_prev_state), do: 78 | {prev_elem,state} 79 | def wrap_result(_sink,{:newstate,state,elem},_prev_elem,_prev_state), do: 80 | {elem,state} 81 | def wrap_result(_sink,elem,_prev_elem,prev_state), do: 82 | {elem,prev_state} 83 | 84 | defmacro defpipe(alias,pipers) do 85 | quote do 86 | defmodule unquote(alias) do 87 | use Adap.Stream.Emitter 88 | def do_emit(sink,{type,elem}), do: 89 | Adap.Piper.next(type,elem,unquote(pipers),sink) 90 | end 91 | end 92 | end 93 | 94 | defmacro __using__(opts) do 95 | quote do 96 | import Adap.Piper 97 | @behaviour Adap.Piper 98 | @rules [] 99 | @rules_for unquote(opts[:for]) 100 | @before_compile Adap.Piper 101 | 102 | def pipe(type,elem,args,nexts,sink) do 103 | {elem,pipe_state} = init(elem,args) 104 | pipe(type,init_apply_map,elem,pipe_state,nexts,sink) 105 | end 106 | 107 | def init(e,arg), do: {e,arg} 108 | 109 | defoverridable [init: 2] 110 | end 111 | end 112 | use Behaviour 113 | defcallback init(elem :: term,args :: term) :: {elem :: term,pipe_state :: term} 114 | 115 | defmacro __before_compile__(_env) do # add to the end of your module (after parsing so before compilation) 116 | quote do 117 | def pipe(type,_apply_map,elem,_pipe_state,nexts,sink) do 118 | Adap.Piper.next(type,elem,nexts,sink) 119 | end 120 | 121 | def init_apply_map, do: 122 | (@rules|>Enum.map(&{&1,false})|>Enum.into(%{})) 123 | end 124 | end 125 | 126 | 127 | defmacro defrule(sig,blocks) do 128 | {name,[elem_q,pipestate_q],guards_q} = sig_normalizer(sig) 129 | quote do 130 | @rules [unquote(name)|@rules] 131 | def pipe(@rules_for,%{unquote(name)=>false}=apply_map, unquote(elem_q)=prev_elem, unquote(pipestate_q)=prev_state,nexts,sink) when unquote(guards_q) do 132 | unquote(rule_body(blocks,name)) 133 | end 134 | end 135 | end 136 | 137 | defp sig_normalizer({:when ,_,[{name,_,params},guards]}), do: {name,params,guards} 138 | defp sig_normalizer({name,_,params}), do: {name,params,true} 139 | 140 | defp rule_body([do: [{:->, _,[[server_spec|args], body]}]],name) do 141 | quote do 142 | Adap.Unit.Router.cast(unquote(server_spec), fn unquote_splicing(args)-> 143 | spawn(fn-> 144 | {elem,state} = Adap.Piper.wrap_result(sink,unquote(body),prev_elem,prev_state) 145 | pipe(@rules_for,%{apply_map|unquote(name)=>true},elem,state,nexts,sink) 146 | end) 147 | end) 148 | end 149 | end 150 | defp rule_body([do: body],name) do 151 | quote do 152 | {elem,state} = Adap.Piper.wrap_result(sink,unquote(body),prev_elem,prev_state) 153 | pipe(@rules_for,%{apply_map|unquote(name)=>true},elem,state,nexts,sink) 154 | end 155 | end 156 | end 157 | -------------------------------------------------------------------------------- /lib/stream.ex: -------------------------------------------------------------------------------- 1 | defmodule Adap.Stream do 2 | @moduledoc """ 3 | `Adap.Stream.new/3` create a stream, it takes a source enumerable, an emitter module and a chunk size. 4 | 5 | - each element from the source is emitted and processed accross processes/nodes by `emitter.do_emit/2` 6 | - these element processing (which may take place on any node) can 7 | - append elements to the source using : `Adap.Stream.emit/2`, which will be emitted in turn 8 | - send processed element to the stream output using `Adap.Stream.done/2` 9 | - the streamed elements are pulled, emitted and received by `chunk_size` in 10 | order to avoid message congestion if element processing is too slow. 11 | 12 | Let's see an example: 13 | 14 | defmodule MyEmitter do 15 | use Adap.Stream.Emitter 16 | # if augment_from_local_data(elem) returns a modified elem according to local data 17 | # and new_from_local_data(elem) returns new elements taken from local data and elem 18 | def do_emit(sink,elem) do 19 | Node.spawn(n1,fn-> 20 | elem = augment_from_local_data(elem) 21 | emit(new_from_local_data(elem)) 22 | Node.spawn(n2,fn-> 23 | elem = append_local_data(elem) 24 | done(sink,elem) 25 | end) 26 | end) 27 | end 28 | end 29 | Adap.Stream.new(initial_elems,MyEmitter,200) 30 | """ 31 | alias Adap.Stream.Emitter 32 | 33 | def new(stream,emit_mod,chunk_size \\ 200), do: 34 | Stream.resource(fn->start!(stream,emit_mod,chunk_size) end, &{next(&1),&1},&halt/1) 35 | 36 | def emit(sink,elems) when is_list(elems), do: 37 | GenServer.cast(sink,{:new_elems,elems}) 38 | def emit(sink,elems), do: 39 | GenServer.cast(sink,{:new_emitter,Emitter.start!(elems,sink)}) 40 | 41 | def done(sink,elem), do: 42 | GenServer.cast(sink,{:done,elem}) 43 | 44 | defp start!(elems,emit_mod,chunk_size), do: 45 | ({:ok,pid} = GenServer.start_link(__MODULE__,{elems,emit_mod,chunk_size});pid) 46 | 47 | defp next(sink), do: GenServer.call(sink,:next,:infinity) 48 | defp halt(sink), do: GenServer.cast(sink,:halt) 49 | 50 | ###### Stream Sink GenServer callbacks #### 51 | use GenServer 52 | 53 | def init({elems,emit_mod,chunk_size}), do: 54 | {:ok,%{emitters: [Emitter.start!(elems,self)],elems: [],count: 0,req: nil, chunk_size: chunk_size,emit_mod: emit_mod}} 55 | 56 | ## when no more chunk source available, wait done_timeout to ensure a time 57 | ## window when you have received your last chunk elem but one of its emitted emitter arrived afterward 58 | @done_timeout 200 59 | @doc false 60 | def handle_info(:try_done,%{emitters: [],req: req}=state), do: 61 | (GenServer.reply(req,:halt);{:stop,:normal,state}) 62 | def handle_info(:try_done,%{req: req}=state), do: 63 | (GenServer.reply(req,[]);{:noreply,state}) 64 | 65 | @doc false 66 | def handle_call(:next,reply_to,%{emitters: []}=state) do 67 | Process.send_after(self,:try_done,@done_timeout) 68 | {:noreply,%{state|req: reply_to}} 69 | end 70 | ## make sure that chsize elems are emitted 71 | def handle_call(:next,reply_to,%{chunk_size: chsize}=state), do: 72 | {:noreply,%{state|req: reply_to}|>emit_chunk(chsize)} 73 | 74 | ## when sink receives an elem: reply if chunk count is reached, else buffer it 75 | @doc false 76 | def handle_cast({:done,elem},%{count: c,chunk_size: chsize}=state) when c+1 == chsize, do: 77 | (GenServer.reply(state.req,[elem|state.elems]) ; {:noreply,%{state|count: 0, elems: []}}) 78 | def handle_cast({:done,elem},%{count: count, elems: elems}=state), do: 79 | {:noreply,%{state|count: count+1, elems: [elem|elems]}} 80 | 81 | ## for small emitter (list): make it local to sink (:new_elems), else create a remote Emitter and send its pid (:new_emitter) 82 | def handle_cast({:new_emitter,pid},state), do: 83 | {:noreply,%{state| emitters: [pid|state.emitters]}} 84 | def handle_cast({:new_elems,elems},state), do: 85 | {:noreply,%{state| emitters: [Emitter.start!(elems,self)|state.emitters]}} 86 | 87 | def handle_cast(:halt,%{emitters: emitters}=state) do 88 | for emitter<-emitters, do: Emitter.halt(emitter) 89 | {:stop,:normal,state} 90 | end 91 | 92 | defp emit_chunk(%{emitters: [], count: c}=state,rem), do: 93 | %{state|count: c+rem} 94 | defp emit_chunk(%{emitters: [emitter|rest]=emitters,emit_mod: emit_mod}=state,rem) do 95 | case Emitter.next(emitter,rem,emit_mod) do 96 | ^rem -> %{state|emitters: emitters} 97 | l -> emit_chunk(%{state|emitters: rest},rem-l) 98 | end 99 | end 100 | end 101 | 102 | defmodule Adap.Stream.Emitter do 103 | use GenServer 104 | 105 | def start!(elems,sink), do: 106 | ({:ok,pid}=GenServer.start_link(__MODULE__,{elems,sink}); pid) 107 | def next(emitter,n,emit_mod), do: 108 | GenServer.call(emitter,{:next,n,emit_mod},:infinity) 109 | def halt(emitter), do: 110 | GenServer.cast(emitter,:halt) 111 | 112 | def handle_call({:next,n,emit_mod},_,cont) do 113 | case cont.({:cont,{n,emit_mod}}) do 114 | {:suspended,_,newcont}->{:reply,n,newcont} 115 | {:done,{rem,_}}->{:stop,:normal,n-rem,[]} 116 | end 117 | end 118 | def handle_cast(:halt,cont) do 119 | cont.({:halt,[]}) 120 | {:stop,:normal,[]} 121 | end 122 | def init({elems,sink}), do: 123 | {:ok,reduce_fn(elems,sink)} 124 | defp reduce_fn(elems,sink) do 125 | &Enumerable.reduce(elems,&1,fn 126 | elem,{1,emit}-> spawn_link(fn->emit.do_emit(sink,elem)end); {:suspend,{0,emit}} 127 | elem,{rem,emit}-> spawn_link(fn->emit.do_emit(sink,elem)end); {:cont,{rem-1,emit}} 128 | end) 129 | end 130 | 131 | use Behaviour 132 | defcallback do_emit(sink :: pid,elem :: term) :: :ok 133 | defmacro __using__(_) do 134 | quote do 135 | @behaviour Adap.Stream.Emitter 136 | import Adap.Stream, only: [done: 2, emit: 2] 137 | end 138 | end 139 | end 140 | -------------------------------------------------------------------------------- /lib/unit.ex: -------------------------------------------------------------------------------- 1 | defmodule Adap.Unit do 2 | @moduledoc "Behaviour describing an ADAP distributed processing unit" 3 | use Behaviour 4 | defcallback start_link(args :: term) :: {:ok,pid} 5 | defcallback cast(pid,fun) :: :ok 6 | defcallback node(args :: term) :: node 7 | end 8 | 9 | defmodule Adap.Unit.Router do 10 | @moduledoc """ 11 | Route element to a node/process started on demand: `Adap.Unit.Router.cast({mod,arg}=unit_spec,elem)` will: 12 | 13 | - route the query to `mod.node(arg)` 14 | - see if a process for the spec `{mod,arg}` is running locally 15 | - if not start a process tree with `mod.start_link(arg)` 16 | - route the query to existing or newly created process with `mod.cast(pid,elem)` 17 | 18 | Processes are monitored in order to restart them on demand when they die. 19 | 20 | A process specification is defined as a tuple `{module,args}`: module must 21 | implement behaviour `Adap.Unit` with previously described callbacks. 22 | 23 | A Unit can represent : a GenServer, a pool of GenServers, a pool of 24 | node of GenServer, etc. The reference unit is a simple GenServer: 25 | 26 | - which dies itself after a given "time to live" 27 | - where the routed element is an anonymous function with one parameter 28 | - casting the function on server and apply it with the server state as parameter 29 | 30 | You can `use Adap.Unit.Simple` to take the default implementation for this 31 | kind of processing unit. 32 | """ 33 | 34 | use GenServer 35 | def start_link, do: GenServer.start_link(__MODULE__,[], name: __MODULE__) 36 | 37 | def cast({m,a},fun), do: 38 | GenServer.cast({__MODULE__,m.node(a)},{:route,{m,a},fun}) 39 | 40 | def init(_), do: 41 | {:ok,%{pids: HashDict.new,specs: HashDict.new}} 42 | 43 | def handle_cast({:route,{m,a}=spec,fun},%{pids: pids,specs: specs}=state) do 44 | if (pid=Dict.get(pids,spec)) do 45 | m.cast(pid,fun); {:noreply,state} 46 | else 47 | {:ok,pid} = m.start_link(a) 48 | m.cast(pid,fun) 49 | {:noreply,%{state| pids: Dict.put(pids,spec,pid), specs: Dict.put(specs,pid,spec)}} 50 | end 51 | end 52 | 53 | def handle_info({:EXIT, pid, _},%{pids: pids,specs: specs}=state), do: # no need to supervise backends, since they will be restarted by next query 54 | {:noreply,%{state|pids: Dict.delete(pids,Dict.fetch!(specs,pid)), specs: Dict.delete(specs,pid)}} 55 | 56 | def terminate(_,%{pids: pids}), do: 57 | Enum.each(pids,fn {_,pid}->Process.exit(pid,:shutdown) end) 58 | end 59 | 60 | defmodule Adap.Unit.Simple do 61 | defmacro __using__(opts) do 62 | quote do 63 | @behaviour Adap.Unit 64 | use GenServer 65 | def start_link(arg), do: GenServer.start_link(__MODULE__,arg) 66 | def cast(pid,fun), do: GenServer.cast(pid,{:apply,fun}) 67 | def node(_), do: node 68 | def handle_cast({:apply,fun},state), do: 69 | (fun.(state); {:noreply,state,unquote(opts[:ttl])}) 70 | def handle_info(:timeout,state), do: 71 | {:stop,:normal,state} 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Adap.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [app: :adap, 6 | version: "0.0.1", 7 | elixir: "~> 1.0", 8 | docs: [ 9 | main: "Adap", 10 | source_url: "https://github.com/awetzel/adap", 11 | source_ref: "master" 12 | ], 13 | description: """ 14 | Create a data stream across your information systems to query, 15 | augment and transform data according to Elixir matching rules. 16 | """, 17 | package: [links: %{"Source"=>"http://github.com/awetzel/adap", 18 | "Doc"=>"http://hexdocs.pm/adap"}, 19 | contributors: ["Arnaud Wetzel"], 20 | licenses: ["MIT"]], 21 | deps: [{:ex_doc, only: :dev}]] 22 | end 23 | 24 | def application do 25 | [mod: {Adap,[]},applications: [:logger]] 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"ex_doc": {:hex, :ex_doc, "0.7.2"}} 2 | -------------------------------------------------------------------------------- /test/adap_test.exs: -------------------------------------------------------------------------------- 1 | defmodule StreamTest do 2 | use ExUnit.Case 3 | import Adap.Piper 4 | 5 | defmodule EmitterMock do 6 | use Adap.Stream.Emitter 7 | def do_emit(sink,{:t1,e}) do 8 | :random.seed(:erlang.now) 9 | Stream.cycle([ 10 | fn->emit(sink,[{:t2,e}]) end, 11 | fn->:timer.sleep(:random.uniform(200)) end, 12 | fn->done(sink,{:t1,e}) end, 13 | fn->:timer.sleep(:random.uniform(200)) end 14 | ]) |> Enum.slice(:random.uniform(4),4) |> Enum.each(& &1.()) 15 | end 16 | def do_emit(sink,{:t2,1000}) do 17 | emit(sink,Stream.map(1001..1500,&{:t2,&1})) 18 | done(sink,{:t2,1000}) 19 | end 20 | def do_emit(sink,{:t2,e}) do 21 | :random.seed(:erlang.now) 22 | :timer.sleep(:random.uniform(200)) 23 | done(sink,{:t2,e}) 24 | end 25 | end 26 | 27 | defmodule Source1 do 28 | use Adap.Unit.Simple, ttl: 1_000 29 | def init(arg), do: {:ok,arg} 30 | end 31 | 32 | defmodule Rules1 do 33 | use Adap.Piper, for: :product 34 | def init(elem,_args), do: {elem,[]} 35 | 36 | defrule has_provider(%{"provider"=>provider}=e,_), do: 37 | %{e| "provider"=>"#{provider}XXX"} 38 | defrule provider_a(%{"provider"=>"a"<>_}=e,_), do: 39 | Dict.put(e,"starts_with","a") 40 | defrule provider_b(%{"provider"=>"b"<>_}=e,_), do: 41 | Dict.put(e,"starts_with","b") 42 | end 43 | 44 | defmodule Rules2 do 45 | use Adap.Piper, for: :product 46 | def init(elem,_args), do: {elem,[]} 47 | 48 | defrule add_f1(e,_), do: 49 | Dict.put(e,"f1","v1") 50 | defrule add_f2(e,_), do: 51 | Dict.put(e,"f2","v2") 52 | defrule from_server1(%{"with_remote"=>true}=e,_) do 53 | {Source1,"d1"}, source_data-> Dict.put(e,"source_data",source_data) 54 | end 55 | defrule from_server2(%{"source_data"=> prev_data}=e,_) do 56 | {Source1,"d2"}, source_data-> %{e | "source_data"=>[prev_data,source_data]} 57 | end 58 | end 59 | 60 | defpipe EmitterPipe, [{Rules1,[]},{Rules2,[]}] 61 | 62 | @tag timeout: 30_000_000 63 | test "sink stream test" do 64 | out = Enum.map(0..1000, &{:t1,&1}) |> Adap.Stream.new(EmitterMock) |> Enum.to_list |> Enum.sort 65 | expected = Enum.concat(Enum.map(0..1000, &{:t1,&1}),Enum.map(0..1500, &{:t2,&1})) 66 | assert out == expected 67 | end 68 | 69 | test "sink stream with rules" do 70 | res = [{:product,%{"provider"=>"casto"}},{:product,%{"provider"=>"berenice"}}] 71 | |> Adap.Stream.new(EmitterPipe) 72 | |> Enum.sort 73 | assert res == [ 74 | {:product,%{"provider"=>"castoXXX","f1"=>"v1","f2"=>"v2"}}, 75 | {:product,%{"provider"=>"bereniceXXX","starts_with"=>"b","f1"=>"v1","f2"=>"v2"}} 76 | ] 77 | end 78 | 79 | test "sink stream with remote rules" do 80 | res = [{:product,%{"provider"=>"casto","with_remote"=>true}}] 81 | |> Adap.Stream.new(EmitterPipe) 82 | |> Enum.at(0) 83 | assert res == {:product,%{"provider"=>"castoXXX","f1"=>"v1","f2"=>"v2","source_data"=>["d1","d2"],"with_remote"=>true}} 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------