├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── rebar
├── rebar.config
├── src
    ├── statman.app.src
    ├── statman.erl
    ├── statman_aggregator.erl
    ├── statman_app.erl
    ├── statman_benchmark.erl
    ├── statman_counter.erl
    ├── statman_decorators.erl
    ├── statman_gauge.erl
    ├── statman_histogram.erl
    ├── statman_merger.erl
    ├── statman_poller.erl
    ├── statman_poller_sup.erl
    ├── statman_poller_worker.erl
    ├── statman_server.erl
    ├── statman_sup.erl
    └── statman_vm_metrics.erl
└── test
    └── statman_tests.erl


/.gitignore:
--------------------------------------------------------------------------------
1 | .eunit
2 | deps
3 | priv
4 | ebin
5 | *.o
6 | *.beam
7 | *.plt
8 | *~
9 | *#


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: erlang
 2 | notifications:
 3 |   email: knutin@gmail.com
 4 | otp_release:
 5 |    - 17.0
 6 |    - R16B03-1
 7 |    - R16B03
 8 |    - R16B02
 9 |    - R16B01
10 |    - R16B
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Knut Nesheim <knutin@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # statman - Statistics man to the rescue!
  2 | 
  3 | Statman makes it possible to instrument and collect statistics from
  4 | your high-traffic production Erlang systems with very low
  5 | overhead. The collected data points are aggregated in the VM and can
  6 | be sent to services like Graphite, Munin, New Relic, etc.
  7 | 
  8 | Statman uses in-memory ETS tables for low overhead logging and to
  9 | avoid single process bottlenecks. See "How does it work" below.
 10 | 
 11 | Integration options:
 12 | 
 13 |  * [statman_elli][]: real-time (mobile friendly) web
 14 |    dashboard. Exposes a small web app and a HTTP API where external
 15 |    tools like Munin(plugin included), Librato, etc, can pull
 16 |    aggregated stats.
 17 | 
 18 |  * [newrelic-erlang][]: Track web transactions happening in any Erlang
 19 |    webserver in New Relic, a hosted application monitoring service.
 20 | 
 21 |  * [statman_graphite][]: Push data to a Graphite instance, also works
 22 |    with hostedgraphite.com.
 23 | 
 24 |  * [hatman][]: Push data to stathat
 25 | 
 26 | 
 27 | ## Usage
 28 | 
 29 | Add `statman_server` to one of your supervisors with the following
 30 | child specification. You can adjust the poll interval to your liking,
 31 | it determines how frequently metrics will be pushed to the
 32 | subscribers:
 33 | 
 34 | 
 35 | ```erlang
 36 |     {statman_server, {statman_server, start_link, [1000]},
 37 |      permanent, 5000, worker, []}.
 38 | ```
 39 | 
 40 | Statman offers three data types. Here's how to use them:
 41 | 
 42 | ```erlang
 43 | %% Counters measure the frequency of an event
 44 | statman_counter:incr(my_queue_in).
 45 | 
 46 | %% A gauge is a point in time snapshot of a value
 47 | statman_gauge:set(queue_size, N).
 48 | 
 49 | %% Histograms show you the distribution of values
 50 | Result = statman:run({foo, bar}, fun () -> do_something() end)
 51 | ```
 52 | 
 53 | Updates to counters, gauges and histograms involves one atomic write
 54 | in ETS.
 55 | 
 56 | 
 57 | ## Decorators
 58 | 
 59 | You can instrument a function using one of the supplied decorators:
 60 | 
 61 | ```erlang
 62 | -decorate({statman_decorators, call_rate}).
 63 | my_function(A, B) ->
 64 |     A + B.
 65 | 
 66 | -decorate({statman_decorators, runtime, [{key, {statman, key}}]}).
 67 | other_function(foo) ->
 68 |     bar.
 69 | ```
 70 | 
 71 | ## `statman_poller`
 72 | 
 73 | It's quite common to want to poll something at an interval, like
 74 | memory usage, reduction counts, etc. To this end, Statman includes
 75 | `statman_poller` which can run functions at intervals on your
 76 | behalf. Add the supervisor to your supervision tree with the following
 77 | child specification:
 78 | 
 79 | ```erlang
 80 |     {statman_poller_sup, {statman_poller_sup, start_link, []},
 81 |         permanent, 5000, worker, []}]}}.
 82 | ```
 83 | 
 84 | In your app startup, you can then create pollers, which will be
 85 | restarted if they crash and shut down together with your application:
 86 | 
 87 | ```erlang
 88 | queue_sizes() ->
 89 |     [{my_queue_size, my_queue:get_size()},
 90 |      {other_queue, foo:queue_size()}].
 91 | 
 92 | app_setup() ->
 93 |     ok = statman_poller:add_gauge(fun ?MODULE:queue_sizes/0, 1000).
 94 | ```
 95 | 
 96 | A polling function can also be "stateful". Allowing you to measure the
 97 | rate of change in an absolute number. If the function has arity 1, it
 98 | will be passed the state and expected to return a new state:
 99 | 
100 | ```erlang
101 | widget_rate(undefined) ->
102 |     TotalWidgets = count_total_widgets(),
103 |     {TotalWidgets, []};
104 | widget_rate(PrevTotalWidgets) ->
105 |     TotalWidgets = count_total_widgets(),
106 |     {TotalWidgets, [{created_widgets, TotalWidgets - PrevTotalWidgets}]}.
107 | 
108 | app_setup() ->
109 |     ok = statman_poller:add_counter(fun ?MODULE:widget_rate/1, 1000).
110 | ```
111 | 
112 | It's important to pass a function reference rather than the function
113 | itself, to make code upgrades smoother.
114 | 
115 | ## How does it work
116 | 
117 | Using `ets:update_counter/3` we get very efficient atomic increments /
118 | decrements of counters. With this primitive, counters, gauges and
119 | histograms become very efficient.
120 | 
121 | A histogram is really a frequency table of values. By keeping a count
122 | (weight) of how many times we have seen the different values, we have
123 | enough information to calculate the mean, min, max, standard deviation
124 | and percentiles.
125 | 
126 | Now, from this we can build something really cool:
127 | 
128 |  * The space required is proportionate to how many different values we
129 |    have seen, not by the total number of observations. Binning values
130 |    requires even less space.
131 |  * Basic aggregation is done very early in the process. Binning also
132 |    helps with this.
133 |  * The frequency tables can easily be merged together, either to
134 |    create an aggregate of multiple nodes to create a cluster view or
135 |    aggregate over time to create for example 5 minute summaries.
136 | 
137 | 
138 | ## Clusters
139 | 
140 | In a single node application, you can collect, aggregate and push out
141 | metrics from that single node. In bigger applications it might be
142 | helpful to collect metrics inside of each node, but aggregate together
143 | and view metrics for the whole cluster in one place. Having a "ops
144 | dashboard" showing message queues in key processes, node throughput,
145 | cluster throughput, request latency per node, request latency as a
146 | whole, etc, is extremely useful.
147 | 
148 | ## Setup
149 | 
150 | Statman has two parts, `statman_server` and `statman_aggregator`. The
151 | server owns the ETS-tables and periodically forwards the changes to
152 | any interested aggregator. The aggregator keeps a moving window of
153 | metrics coming from one ore more servers. You can ask the aggregator
154 | for the stats collected in the last N seconds.
155 | 
156 | You need to run one server under a supervisor in each node. If you
157 | have a cluster of nodes, you can run the aggregator on just one of
158 | them, collecting stats for the whole cluster.
159 | 
160 | 
161 | [statman_elli]: https://github.com/knutin/statman_elli
162 | [newrelic-erlang]: https://github.com/wooga/newrelic-erlang
163 | [statman_graphite]: https://github.com/chrisavl/statman_graphite
164 | [hatman]: https://github.com/chrisavl/hatman
165 | 


--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knutin/statman/e6ff10815eac1e613b619e69db06d635f1a4a488/rebar


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {erl_opts, [debug_info]}.
2 | 
3 | {deps,
4 |  [
5 |   {decorators, "", {git, "git://github.com/chrisavl/erlang_decorators.git", {branch, "master"}}}
6 |  ]
7 | }.
8 | 


--------------------------------------------------------------------------------
/src/statman.app.src:
--------------------------------------------------------------------------------
 1 | {application, statman,
 2 |  [
 3 |   {description, "Statman to the rescue!"},
 4 |   {vsn, "0.5"},
 5 |   {registered, []},
 6 |   {applications, [
 7 |                   kernel,
 8 |                   stdlib
 9 |                  ]},
10 |   {mod, { statman_app, []}},
11 |   {env, []}
12 |  ]}.
13 | 


--------------------------------------------------------------------------------
/src/statman.erl:
--------------------------------------------------------------------------------
 1 | -module(statman).
 2 | -export([
 3 |          incr/1,
 4 |          incr/2,
 5 |          set_gauge/2,
 6 |          incr_gauge/1,
 7 |          decr_gauge/1,
 8 |          run/2,
 9 |          run/3,
10 |          run/4,
11 |          time/2
12 |         ]).
13 | 
14 | incr(Key)             -> statman_counter:incr(Key).
15 | incr(Key, Increment)  -> statman_counter:incr(Key, Increment).
16 | 
17 | set_gauge(Key, Value) -> statman_gauge:set(Key, Value).
18 | incr_gauge(Key)       -> statman_gauge:incr(Key).
19 | decr_gauge(Key)       -> statman_gauge:decr(Key).
20 | 
21 | run(Key, F)           -> statman_histogram:run(Key, F).
22 | run(Key, F, Args)     -> statman_histogram:run(Key, F, Args).
23 | run(Key, M, F, Args)  -> statman_histogram:run(Key, M, F, Args).
24 | 
25 | time(Key, Value)      -> statman_histogram:record_value(
26 |                            Key, statman_histogram:bin(Value)).
27 | 
28 |     
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/src/statman_aggregator.erl:
--------------------------------------------------------------------------------
  1 | %% @doc  Aggregate statman samples
  2 | %%
  3 | %% statman_aggregator receives metrics from statman_servers running in
  4 | %% your cluster, picks them apart and keeps a moving window of the raw
  5 | %% values. On demand, the samples are aggregated together. Metrics
  6 | %% with the same key, but from different nodes are also merged.
  7 | -module(statman_aggregator).
  8 | -behaviour(gen_server).
  9 | 
 10 | -export([start_link/0,
 11 |          get_window/1,
 12 |          get_window/2,
 13 |          get_merged_window/1,
 14 |          get_merged_window/2,
 15 |          get_keys/0]).
 16 | 
 17 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 18 |          terminate/2, code_change/3]).
 19 | 
 20 | -ifdef(TEST).
 21 | -include_lib("eunit/include/eunit.hrl").
 22 | -endif.
 23 | 
 24 | -record(state, {
 25 |           subscribers = [],
 26 |           last_sample = [],
 27 |           metrics = dict:new()
 28 | 
 29 |          }).
 30 | 
 31 | %%%===================================================================
 32 | %%% API
 33 | %%%===================================================================
 34 | 
 35 | start_link() ->
 36 |     gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
 37 | 
 38 | get_window(Size) ->
 39 |     get_window(Size, 5000).
 40 | 
 41 | get_window(Size, Timeout) ->
 42 |     gen_server:call(?MODULE, {get_window, Size, false}, Timeout).
 43 | 
 44 | get_merged_window(Size) ->
 45 |     get_merged_window(Size, 5000).
 46 | 
 47 | get_merged_window(Size, Timeout) ->
 48 |     gen_server:call(?MODULE, {get_window, Size, true}, Timeout).
 49 | 
 50 | get_keys() ->
 51 |     gen_server:call(?MODULE, get_keys).
 52 | 
 53 | %%%===================================================================
 54 | %%% gen_server callbacks
 55 | %%%===================================================================
 56 | 
 57 | init([]) ->
 58 |     timer:send_interval(10000, push),
 59 |     {ok, #state{metrics = dict:new()}}.
 60 | 
 61 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 62 |     {reply, ok, State#state{subscribers = [Ref | Sub]}};
 63 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 64 |     {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}};
 65 | 
 66 | 
 67 | handle_call({get_window, Size, MergeNodes}, From, #state{metrics = Metrics} = State) ->
 68 |     PurgedMetrics = dict:map(fun (_, {Type, Samples}) ->
 69 |                                      {Type, purge(Samples)}
 70 |                              end, Metrics),
 71 | 
 72 |     spawn(fun() -> do_reply(From, PurgedMetrics, Size, MergeNodes) end),
 73 |     {noreply, State#state{metrics = PurgedMetrics}};
 74 | 
 75 | 
 76 | handle_call(get_keys, _From, State) ->
 77 |     Reply = dict:fold(fun (Key, {Type, _Samples}, Acc) ->
 78 |                               [{Key, Type} | Acc]
 79 |                       end, [], State#state.metrics),
 80 |     {reply, {ok, Reply}, State}.
 81 | 
 82 | 
 83 | handle_cast({statman_update, NewSamples}, #state{metrics = Metrics} = State) ->
 84 |     NewMetrics = lists:foldl(fun insert/2, Metrics, NewSamples),
 85 |     {noreply, State#state{metrics = NewMetrics}}.
 86 | 
 87 | handle_info(_, State) ->
 88 |     {noreply, State}.
 89 | 
 90 | terminate(_Reason, _State) ->
 91 |     ok.
 92 | 
 93 | code_change(_OldVsn, State, _Extra) ->
 94 |     {ok, State}.
 95 | 
 96 | %%%===================================================================
 97 | %%% Internal functions
 98 | %%%===================================================================
 99 | 
100 | do_reply(Client, Metrics, Size, MergeNodes) ->
101 |     Aggregated = lists:map(
102 |                      fun ({{Node, Key}, {Type, Samples}}) ->
103 |                              {Node, Key, Type, merge_samples(Type, window(Size, Samples))}
104 |                      end, dict:to_list(Metrics)),
105 | 
106 |     Reply = case MergeNodes of
107 |                 false ->
108 |                     format(Size, Aggregated);
109 |                 true ->
110 |                     format(Size, Aggregated) ++ format(Size, merge(Aggregated))
111 |             end,
112 |     gen_server:reply(Client, {ok, Reply}).
113 | 
114 | 
115 | insert(Metric, Metrics) ->
116 |     dict:update(nodekey(Metric),
117 |                 %% FIXME: this breaks if you have the same key for different types of metrics
118 |                 fun ({_Type, Samples}) ->
119 |                         {type(Metric), [{now_to_seconds(), value(Metric)} | Samples]}
120 |                 end,
121 |                 {type(Metric), [{now_to_seconds(), value(Metric)}]},
122 |                 Metrics).
123 | 
124 | window(_, []) ->
125 |     [];
126 | window(1, [{_, Sample} | _]) ->
127 |     [Sample];
128 | 
129 | window(Size, Samples) ->
130 |     element(2, lists:unzip(samples_after(now_to_seconds() - Size, Samples))).
131 | 
132 | purge(Samples) ->
133 |     samples_after(now_to_seconds() - 300, Samples).
134 | 
135 | 
136 | samples_after(Threshold, Samples) ->
137 |     lists:takewhile(fun ({Ts, _}) -> Ts >= Threshold end, Samples).
138 | 
139 | 
140 | 
141 | merge(Metrics) ->
142 |     {_, Merged} =
143 |         lists:unzip(
144 |           orddict:to_list(
145 |             lists:foldl(
146 |               fun ({_, _, gauge, _}, Acc) ->
147 |                       Acc;
148 | 
149 |                   ({Node, Key, counter, Sample}, Acc) ->
150 |                       case orddict:find(Key, Acc) of
151 |                           {ok, {Nodes, Key, counter, OtherSample}} ->
152 |                               orddict:store(Key, {[Node | Nodes], Key, counter,
153 |                                                   Sample + OtherSample}, Acc);
154 |                           error ->
155 |                               orddict:store(Key, {[Node], Key, counter, Sample}, Acc)
156 |                       end;
157 | 
158 |                   ({Node, Key, Type, Samples}, Acc) ->
159 |                       case orddict:find(Key, Acc) of
160 |                           {ok, {Nodes, Key, Type, OtherSamples}} ->
161 |                               Merged = merge_samples(Type, [Samples, OtherSamples]),
162 |                               orddict:store(Key, {[Node | Nodes], Key, Type, Merged}, Acc);
163 |                           error ->
164 |                               orddict:store(Key, {[Node], Key, Type, Samples}, Acc)
165 |                       end
166 |               end, orddict:new(), Metrics))),
167 | 
168 |     lists:filter(fun ({Nodes, _, _, _}) -> length(Nodes) > 1 end, Merged).
169 | 
170 | 
171 | merge_samples(histogram, Samples) ->
172 |     lists:foldl(fun (Sample, Agg) ->
173 |                         orddict:merge(fun (_, A, B) ->
174 |                                               A + B
175 |                                       end,
176 |                                       orddict:from_list(Sample),
177 |                                       Agg)
178 |                 end, orddict:new(), Samples);
179 | 
180 | 
181 | merge_samples(counter, Samples) ->
182 |     lists:sum(Samples);
183 | 
184 | merge_samples(gauge, []) ->
185 |     0;
186 | merge_samples(gauge, Samples) ->
187 |     hd(Samples).
188 | 
189 | 
190 | 
191 | format(_, []) ->
192 |     [];
193 | 
194 | format(Size, [{Nodes, Key, Type, Value} | Rest]) ->
195 |     [
196 |      [{key, Key},
197 |       {node, Nodes},
198 |       {type, Type},
199 |       {value, Value},
200 |       {window, Size * 1000}]
201 | 
202 |      | format(Size, Rest)].
203 | 
204 | 
205 | 
206 | type(Metric)  -> proplists:get_value(type, Metric).
207 | value(Metric) -> proplists:get_value(value, Metric).
208 | 
209 | nodekey(Metric) ->
210 |     {proplists:get_value(node, Metric),
211 |      proplists:get_value(key, Metric)}.
212 | 
213 | 
214 | now_to_seconds() ->
215 |     {MegaSeconds, Seconds, _} = os:timestamp(),
216 |     MegaSeconds * 1000000 + Seconds.
217 | 
218 | 
219 | %%
220 | %% TESTS
221 | %%
222 | 
223 | -ifdef(TEST).
224 | aggregator_test_() ->
225 |     {foreach,
226 |      fun setup/0, fun teardown/1,
227 |      [?_test(expire()),
228 |       ?_test(window()),
229 |       ?_test(merged_window())
230 |      ]
231 |     }.
232 | 
233 | setup() ->
234 |     {ok, Pid} = start_link(),
235 |     true = unlink(Pid),
236 |     Pid.
237 | 
238 | teardown(Pid) ->
239 |     exit(Pid, kill),
240 |     timer:sleep(1000),
241 |     false = is_process_alive(Pid).
242 | 
243 | expire() ->
244 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
245 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
246 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
247 |     gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 1)]}),
248 |     gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 3)]}),
249 | 
250 |     ?assert(lists:all(fun (M) ->
251 |                               V = proplists:get_value(value, M, 0),
252 |                               V =/= 0 andalso V =/= []
253 |                       end, element(2, get_window(2)))),
254 | 
255 |     timer:sleep(3000),
256 | 
257 |     ?assert(lists:all(fun (M) ->
258 |                               V = proplists:get_value(value, M),
259 |                               V == 0 orelse V =:= []
260 |                       end, element(2, get_window(2)))).
261 | 
262 | window() ->
263 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
264 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
265 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
266 |     gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 1)]}),
267 |     gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 3)]}),
268 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}),
269 | 
270 |     timer:sleep(1000),
271 | 
272 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
273 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
274 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
275 |     gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 2)]}),
276 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}),
277 | 
278 |     {ok, Aggregated} = get_window(60),
279 | 
280 |     [MergedCounter, MergedGauge, MergedHistogramA, MergedHistogramB] = lists:sort(Aggregated),
281 | 
282 | 
283 |     ?assertEqual([{key, {<<"/highscores">>,db_a_latency}},
284 |                   {node, 'a@knutin'},
285 |                   {type, histogram},
286 |                   {value, [{1, 3}, {2, 6}, {3, 9}]},
287 |                   {window, 60000}], MergedHistogramA),
288 | 
289 |     ?assertEqual([{key, {<<"/highscores">>,db_a_latency}},
290 |                   {node, 'b@knutin'},
291 |                   {type, histogram},
292 |                   {value, [{1, 2}, {2, 4}, {3, 6}]},
293 |                   {window, 60000}], MergedHistogramB),
294 | 
295 |     ?assertEqual([{key, {foo, bar}},
296 |                   {node, 'a@knutin'},
297 |                   {type, counter},
298 |                   {value, 90},
299 |                   {window, 60000}], MergedCounter),
300 | 
301 |     ?assertEqual([{key, {foo, baz}},
302 |                   {node, 'a@knutin'},
303 |                   {type, gauge},
304 |                   {value, 2},
305 |                   {window, 60000}], MergedGauge).
306 | 
307 | merged_window() ->
308 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
309 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}),
310 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
311 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('b@knutin')]}),
312 | 
313 |     timer:sleep(1000),
314 | 
315 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}),
316 |     gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}),
317 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}),
318 |     gen_server:cast(?MODULE, {statman_update, [sample_counter('b@knutin')]}),
319 | 
320 |     ?assertEqual([
321 |                   {nodekey(sample_counter('a@knutin')), counter},
322 |                   {nodekey(sample_histogram('a@knutin')), histogram},
323 |                   {nodekey(sample_counter('b@knutin')), counter},
324 |                   {nodekey(sample_histogram('b@knutin')), histogram}
325 |                  ], lists:sort(element(2, get_keys()))),
326 | 
327 |     {ok, Aggregated} = get_merged_window(60),
328 | 
329 |     [_CounterA, _CounterB, MergedCounter,
330 |      _HistogramA, _HistogramB, MergedHistogram] = lists:sort(Aggregated),
331 | 
332 | 
333 |     ?assertEqual([{key, {<<"/highscores">>,db_a_latency}},
334 |                   {node, ['a@knutin', 'b@knutin']},
335 |                   {type, histogram},
336 |                   {value, [{1, 4}, {2, 8}, {3, 12}]},
337 |                   {window, 60000}], MergedHistogram),
338 | 
339 |     ?assertEqual([{key, {foo, bar}},
340 |                   {node, ['a@knutin', 'b@knutin']},
341 |                   {type, counter},
342 |                   {value, 120},
343 |                   {window, 60000}], MergedCounter).
344 | 
345 | 
346 | 
347 | sample_histogram(Node) ->
348 |     [{key,{<<"/highscores">>,db_a_latency}},
349 |      {node,Node},
350 |      {type,histogram},
351 |      {value,[{1,1},
352 |              {2,2},
353 |              {3,3}]},
354 |      {window,1000}].
355 | 
356 | sample_counter(Node) ->
357 |     [{key,{foo, bar}},
358 |      {node,Node},
359 |      {type,counter},
360 |      {value,30},
361 |      {window,1000}].
362 | 
363 | sample_gauge(Node, Value) ->
364 |     [{key,{foo, baz}},
365 |      {node,Node},
366 |      {type,gauge},
367 |      {value,Value},
368 |      {window,1000}].
369 | -endif.
370 | 


--------------------------------------------------------------------------------
/src/statman_app.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_app).
 2 | 
 3 | -behaviour(application).
 4 | 
 5 | %% Application callbacks
 6 | -export([start/2, stop/1]).
 7 | 
 8 | %% ===================================================================
 9 | %% Application callbacks
10 | %% ===================================================================
11 | 
12 | start(_StartType, _StartArgs) ->
13 |     statman_sup:start_link([1000]).
14 | 
15 | stop(_State) ->
16 |     ok.
17 | 


--------------------------------------------------------------------------------
/src/statman_benchmark.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_benchmark).
 2 | -compile([export_all]).
 3 | 
 4 | -define(PARETO_SHAPE, 1.5).
 5 | -define(MAX_VALUE, 1000).
 6 | 
 7 | 
 8 | histogram_run(Writes) ->
 9 |     {InsertTime, _} = timer:tc(?MODULE, do_histogram_run, [Writes]),
10 |     {StatTime, Stats} = timer:tc(statman_histogram, summary, [foo]),
11 |     [{insert_time, InsertTime}, {start_time, StatTime}, {stats, Stats}].
12 | 
13 | do_histogram_run(0) ->
14 |     ok;
15 | do_histogram_run(Writes) ->
16 |     Value = pareto(trunc(?MAX_VALUE * 0.2), ?PARETO_SHAPE),
17 |     statman_histogram:record_value(foo, Value),
18 |     do_histogram_run(Writes - 1).
19 | 
20 | pareto(Mean, Shape) ->
21 |     S1 = (-1 / Shape),
22 |     S2 = Mean * (Shape - 1),
23 |     U = 1 - random:uniform(),
24 |     trunc((math:pow(U, S1) - 1) * S2).
25 | 


--------------------------------------------------------------------------------
/src/statman_counter.erl:
--------------------------------------------------------------------------------
  1 | -module(statman_counter).
  2 | -export([init/0, counters/0, get/1, get_all/0, reset/2]).
  3 | -export([incr/1, incr/2, decr/1, decr/2, set/2]).
  4 | -compile([{no_auto_import, [get/1]}]).
  5 | -include_lib("eunit/include/eunit.hrl").
  6 | 
  7 | 
  8 | -define(TABLE, statman_counters).
  9 | 
 10 | 
 11 | %%
 12 | %% API
 13 | %%
 14 | 
 15 | init() ->
 16 |     ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]),
 17 |     ok.
 18 | 
 19 | 
 20 | get(Key) ->
 21 |     case ets:match(?TABLE, {Key, '$1'}) of
 22 |         [[N]] when is_integer(N) ->
 23 |             N;
 24 |         [] ->
 25 |             error(badarg)
 26 |     end.
 27 | 
 28 | get_all() ->
 29 |     ets:select(?TABLE, [{ {'$1', '$2'},  [], [{{'$1', '$2'}}]}]).
 30 | 
 31 | 
 32 | incr(Key) -> incr(Key, 1).
 33 | 
 34 | decr(Key) -> decr(Key, 1).
 35 | decr(Key, Incr) -> incr(Key, -Incr).
 36 | 
 37 | 
 38 | counters() ->
 39 |     ets:select(?TABLE, [{ {'$1', '$2'}, [], ['$1'] }]).
 40 | 
 41 | reset(Key, Value) ->
 42 |     decr(Key, Value).
 43 | 
 44 | %%
 45 | %% INTERNAL HELPERS
 46 | %%
 47 | 
 48 | set(Key, Value) ->
 49 |     case catch ets:update_element(?TABLE, Key, Value) of
 50 |         {'EXIT', {badarg, _}} ->
 51 |             (catch ets:insert(?TABLE, {Key, Value})),
 52 |             ok;
 53 |         _ ->
 54 |             ok
 55 |     end.
 56 | 
 57 | 
 58 | incr(Key, Incr) when is_integer(Incr) ->
 59 |     %% If lock contention on the single key becomes a problem, we can
 60 |     %% use multiple keys and try to snapshot a value across all
 61 |     %% subkeys. See
 62 |     %% https://github.com/boundary/high-scale-lib/blob/master/src/main/java/org/cliffc/high_scale_lib/ConcurrentAutoTable.java
 63 |     case catch ets:update_counter(?TABLE, Key, Incr) of
 64 |         {'EXIT', {badarg, _}} ->
 65 |             (catch ets:insert(?TABLE, {Key, Incr})),
 66 |             ok;
 67 |         _ ->
 68 |             ok
 69 |     end;
 70 | 
 71 | incr(_Key, Float) when is_float(Float) ->
 72 |     error(badarg).
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | %%
 79 | %% TESTS
 80 | %%
 81 | 
 82 | counter_test_() ->
 83 |     {foreach,
 84 |      fun setup/0, fun teardown/1,
 85 |      [
 86 |       ?_test(test_operations()),
 87 |       ?_test(find_counters()),
 88 |       {timeout, 100, ?_test(benchmark())},
 89 |       ?_test(test_reset()),
 90 |       ?_test(floats())
 91 |      ]
 92 |     }.
 93 | 
 94 | setup() ->
 95 |     init(),
 96 |     [?TABLE].
 97 | 
 98 | teardown(Tables) ->
 99 |     lists:map(fun ets:delete/1, Tables).
100 | 
101 | 
102 | test_operations() ->
103 |     ?assertError(badarg, get(key)),
104 | 
105 |     ?assertEqual(ok, incr(key)),
106 |     ?assertEqual(1, get(key)),
107 | 
108 |     ?assertEqual(ok, decr(key)),
109 |     ?assertEqual(0, get(key)),
110 | 
111 |     ?assertEqual(ok, decr(key)),
112 |     ?assertEqual(-1, get(key)),
113 | 
114 |     ?assertEqual(ok, set(key, 5)),
115 |     ?assertEqual(5, get(key)),
116 | 
117 |     ?assertEqual(ok, decr(key)),
118 |     ?assertEqual(4, get(key)).
119 | 
120 | 
121 | find_counters() ->
122 |     ?assertEqual([], counters()),
123 |     ?assertEqual([], get_all()),
124 | 
125 |     ?assertEqual(ok, incr(foo)),
126 |     ?assertEqual(ok, incr(bar)),
127 |     ?assertEqual(lists:sort([bar, foo]), lists:sort(counters())),
128 |     ?assertEqual(lists:sort([{bar, 1}, {foo, 1}]), lists:sort(get_all())).
129 | 
130 | 
131 | 
132 | test_reset() ->
133 |     ?assertEqual([], counters()),
134 | 
135 |     ok = incr(foo, 5),
136 |     ?assertEqual(5, get(foo)),
137 | 
138 |     [{foo, Count}] = get_all(),
139 |     incr(foo, 3),
140 |     ?assertEqual(8, get(foo)),
141 | 
142 |     ok = reset(foo, Count),
143 |     ?assertEqual(3, get(foo)).
144 | 
145 | 
146 | floats() ->
147 |     ?assertError(badarg, get(foo)),
148 |     ?assertError(badarg, incr(foo, 2.5)).
149 | 
150 | 
151 | 
152 | benchmark() ->
153 |     do_benchmark(4, 100000),
154 |     do_benchmark(8, 100000),
155 |     do_benchmark(32, 100000).
156 | 
157 | do_benchmark(Processes, Writes) ->
158 |     Start = now(),
159 |     Parent = self(),
160 |     Pids = [spawn(fun() ->
161 |                           benchmark_incrementer(foo, Writes),
162 |                           Parent ! {self(), done}
163 |                   end) || _ <- lists:seq(1, Processes)],
164 |     receive_all(Pids, done),
165 |     error_logger:info_msg("~p processes, ~p writes in ~p ms~n",
166 |                           [Processes, Writes, timer:now_diff(now(), Start) / 1000]),
167 |     ok.
168 | 
169 | receive_all([], _) ->
170 |     ok;
171 | receive_all(Pids, Msg) ->
172 |     receive
173 |         {Pid, Msg} ->
174 |             receive_all(lists:delete(Pid, Pids), Msg)
175 |     end.
176 | 
177 | benchmark_incrementer(_, 0) ->
178 |     ok;
179 | benchmark_incrementer(Key, N) ->
180 |     incr(Key),
181 |     benchmark_incrementer(Key, N-1).
182 | 


--------------------------------------------------------------------------------
/src/statman_decorators.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_decorators).
 2 | -include_lib("eunit/include/eunit.hrl").
 3 | -compile([{parse_transform, decorators}]).
 4 | 
 5 | 
 6 | -export([runtime/3, reductions/3, memory/3, call_rate/3]).
 7 | 
 8 | %%
 9 | %% DECORATORS
10 | %%
11 | 
12 | runtime(Fun, Args, Options) ->
13 |     Key = proplists:get_value(key, Options, name(Fun)),
14 |     Start = os:timestamp(),
15 |     Result = erlang:apply(Fun, Args),
16 |     statman_histogram:record_value(Key, Start),
17 |     Result.
18 | 
19 | reductions(Fun, Args, Options) ->
20 |     process_info_decorator(Fun, Args, Options, reductions).
21 | 
22 | memory(Fun, Args, Options) ->
23 |     process_info_decorator(Fun, Args, Options, memory).
24 | 
25 | call_rate(Fun, Args, Options) ->
26 |     Key = proplists:get_value(key, Options, name(Fun)),
27 |     statman_counter:incr(Key),
28 |     apply(Fun, Args).
29 | 
30 | 
31 | %%
32 | %% INTERNAL
33 | %%
34 | 
35 | process_info_decorator(Fun, Args, Options, InfoKey) ->
36 |     Key = proplists:get_value(key, Options, name(Fun)),
37 |     {InfoKey, Start} = process_info(self(), InfoKey),
38 |     Result = erlang:apply(Fun, Args),
39 |     {InfoKey, End} = process_info(self(), InfoKey),
40 |     statman_histogram:record_value(Key, (End - Start)),
41 |     Result.
42 | 
43 | name(Fun) ->
44 |     {name, Name} = erlang:fun_info(Fun, name),
45 |     Name.
46 | 
47 | 
48 | 
49 | -ifdef(TEST).
50 | -decorate({statman_decorators, runtime, [{key, runtime_key}]}).
51 | -decorate({statman_decorators, memory, [{key, memory_key}]}).
52 | -decorate({statman_decorators, reductions, [{key, reductions_key}]}).
53 | -decorate({statman_decorators, call_rate, [{key, rate_key}]}).
54 | decorated_function(A, B) ->
55 |     A + B.
56 | 
57 | -decorate({statman_decorators, call_rate}).
58 | no_key(A, B) ->
59 |     A + B.
60 | 
61 | decorators_test() ->
62 |     ok = delete_tables(), %% remove leftovers from other tests
63 |     ok = create_tables(),
64 |     3 = decorated_function(1, 2),
65 |     ?assertEqual(1, statman_counter:get(rate_key)),
66 |     ?assertEqual([memory_key, reductions_key, runtime_key],
67 |                  lists:sort(statman_histogram:keys())),
68 |     ok = delete_tables().
69 | 
70 | no_key_test() ->
71 |     ok = delete_tables(),
72 |     ok = create_tables(),
73 |     3 = no_key(1, 2),
74 |     ?assertEqual([{'-no_key_decorator1___/2-fun-0-',1}],
75 |                  statman_counter:get_all()),
76 |     ok = delete_tables().
77 | 
78 | 
79 | create_tables() ->
80 |     ok = statman_histogram:init(),
81 |     ok = statman_counter:init().
82 | 
83 | delete_tables() ->
84 |     (catch ets:delete(statman_histograms)),
85 |     (catch ets:delete(statman_counters)),
86 |     ok.
87 | -endif.
88 | 


--------------------------------------------------------------------------------
/src/statman_gauge.erl:
--------------------------------------------------------------------------------
  1 | -module(statman_gauge).
  2 | -export([init/0, expire/0, get_all/0]).
  3 | -export([set/2, incr/1, incr/2, decr/1, decr/2]).
  4 | -include_lib("eunit/include/eunit.hrl").
  5 | 
  6 | -define(TABLE, statman_gauges).
  7 | 
  8 | init() ->
  9 |     ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]),
 10 |     ok.
 11 | 
 12 | set(Key, Value) when is_integer(Value) orelse is_float(Value) ->
 13 |     set(Key, Value, now_to_seconds()).
 14 | 
 15 | set(Key, Value, Timestamp) ->
 16 |     (catch ets:insert(?TABLE, {Key, Timestamp, Value})),
 17 |     ok.
 18 | 
 19 | incr(Key) -> incr(Key, 1).
 20 | 
 21 | decr(Key) -> incr(Key, -1).
 22 | decr(Key, Decr) -> incr(Key, -Decr).
 23 | 
 24 | 
 25 | incr(Key, Incr) ->
 26 |     case catch ets:update_counter(?TABLE, Key, {3, Incr}) of
 27 |         {'EXIT', {badarg, _}} ->
 28 |             set(Key, Incr),
 29 |             ok;
 30 |         _ ->
 31 |             ets:update_element(?TABLE, Key, {2, now_to_seconds()}),
 32 |             ok
 33 |     end.
 34 | 
 35 | expire() ->
 36 |     expire(now_to_seconds() - 60).
 37 | 
 38 | %% @doc: Deletes any gauges that has not been updated since the given
 39 | %% threshold.
 40 | expire(Threshold) ->
 41 |     ets:select_delete(?TABLE, [{{'_', '$1', '_'}, [{'<', '$1', Threshold}], [true]}]).
 42 | 
 43 | get_all() ->
 44 |     ets:select(?TABLE, [{ {'$1', '_', '$2'}, [], [{{'$1', '$2'}}]}]).
 45 | 
 46 | now_to_seconds() ->
 47 |     {MegaSeconds, Seconds, _} = os:timestamp(),
 48 |     MegaSeconds * 1000000 + Seconds.
 49 | 
 50 | 
 51 | %%
 52 | %% TESTS
 53 | %%
 54 | 
 55 | gauge_test_() ->
 56 |     {foreach,
 57 |      fun setup/0, fun teardown/1,
 58 |      [
 59 |       ?_test(test_expire()),
 60 |       ?_test(test_expire_incr_decr()),
 61 |       ?_test(test_set_incr())
 62 |      ]
 63 |     }.
 64 | 
 65 | setup() ->
 66 |     init(),
 67 |     [?TABLE].
 68 | 
 69 | teardown(Tables) ->
 70 |     lists:map(fun ets:delete/1, Tables).
 71 | 
 72 | 
 73 | test_expire() ->
 74 |     ?assertEqual([], get_all()),
 75 |     set(foo, 30, now_to_seconds() - 3),
 76 |     ?assertEqual([{foo, 30}], get_all()),
 77 |     ?assertEqual(0, expire(now_to_seconds() - 5)),
 78 |     ?assertEqual(1, expire(now_to_seconds() - 0)),
 79 |     ?assertEqual([], get_all()).
 80 | 
 81 | test_expire_incr_decr() ->
 82 |     ?assertEqual([], get_all()),
 83 | 
 84 |     ok = set(problems, 100, now_to_seconds() - 3),
 85 |     ok = decr(problems),
 86 |     ?assertEqual([{problems, 99}], get_all()),
 87 |     ?assertEqual(0, expire(now_to_seconds()-1)),
 88 |     ?assertEqual([{problems, 99}], get_all()).
 89 | 
 90 | 
 91 | test_set_incr() ->
 92 |     incr(foo, 2),
 93 |     ?assertEqual([{foo, 2}], get_all()),
 94 | 
 95 |     set(foo, 10),
 96 |     incr(foo),
 97 |     incr(foo),
 98 |     ?assertEqual([{foo, 12}], get_all()),
 99 | 
100 |     decr(foo),
101 |     ?assertEqual([{foo, 11}], get_all()).
102 | 


--------------------------------------------------------------------------------
/src/statman_histogram.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: Histogram backed by ETS and ets:update_counter/3.
  2 | %%
  3 | %% Calculation of statistics is borrowed from basho_stats_histogram
  4 | %% and basho_stats_sample.
  5 | 
  6 | -module(statman_histogram).
  7 | -export([init/0,
  8 |          record_value/2,
  9 |          run/2,
 10 |          run/3,
 11 |          run/4,
 12 |          clear/1,
 13 |          keys/0,
 14 |          get_data/1,
 15 |          summary/1,
 16 |          reset/2,
 17 |          gc/0]).
 18 | 
 19 | -export([bin/1]).
 20 | 
 21 | -compile([native]).
 22 | 
 23 | -define(TABLE, statman_histograms).
 24 | 
 25 | 
 26 | %%
 27 | %% API
 28 | %%
 29 | 
 30 | init() ->
 31 |     ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]),
 32 |     ok.
 33 | 
 34 | record_value(UserKey, {MegaSecs, Secs, MicroSecs}) when
 35 |       is_integer(MegaSecs) andalso MegaSecs >= 0 andalso
 36 |       is_integer(Secs) andalso Secs >=0 andalso
 37 |       is_integer(MicroSecs) andalso MicroSecs >= 0 ->
 38 |     record_value(UserKey,
 39 |                  bin(timer:now_diff(now(), {MegaSecs, Secs, MicroSecs})));
 40 | 
 41 | record_value(UserKey, Value) when is_integer(Value) ->
 42 |     histogram_incr({UserKey, Value}, 1),
 43 |     ok.
 44 | 
 45 | 
 46 | run(Key, F) ->
 47 |     Start = os:timestamp(),
 48 |     Result = F(),
 49 |     record_value(Key, Start),
 50 |     Result.
 51 | 
 52 | run(Key, F, Args) ->
 53 |     Start = os:timestamp(),
 54 |     Result = erlang:apply(F, Args),
 55 |     record_value(Key, Start),
 56 |     Result.
 57 | 
 58 | run(Key, M, F, Args) ->
 59 |     Start = os:timestamp(),
 60 |     Result = erlang:apply(M, F, Args),
 61 |     record_value(Key, Start),
 62 |     Result.
 63 | 
 64 | 
 65 | keys() ->
 66 |     %% TODO: Maybe keep a special table of all used keys?
 67 |     lists:usort(ets:select(?TABLE, [{ { {'$1', '_'}, '_' }, [], ['$1'] }])).
 68 | 
 69 | gc() ->
 70 |     ets:select_delete(?TABLE, [{ {{'_', '_'}, 0}, [], [true] }]).
 71 | 
 72 | clear(UserKey) ->
 73 |     ets:select_delete(?TABLE, [{{{UserKey, '_'}, '_'}, [], [true]  }]).
 74 | 
 75 | 
 76 | %% @doc: Returns the raw histogram recorded by record_value/2,
 77 | %% suitable for passing to summary/1 and reset/2
 78 | get_data(UserKey) ->
 79 |     Query = [{{{UserKey, '$1'}, '$2'}, [{'>', '$2', 0}], [{{'$1', '$2'}}]}],
 80 |     lists:sort(
 81 |       ets:select(?TABLE, Query)).
 82 | 
 83 | 
 84 | %% @doc: Returns summary statistics from the raw data
 85 | summary([]) ->
 86 |     [];
 87 | summary(Data) ->
 88 |     {N, Sum, Sum2, Max} = scan(Data),
 89 | 
 90 |     [{observations, N},
 91 |      {min, find_quantile(Data, 0)},
 92 |      {median, find_quantile(Data, 0.50 * N)},
 93 |      {mean, Sum / N},
 94 |      {max, Max},
 95 |      {sd, sd(N, Sum, Sum2)},
 96 |      {sum, Sum},
 97 |      {sum2, Sum2},
 98 |      {p25, find_quantile(Data, 0.25 * N)},
 99 |      {p75, find_quantile(Data, 0.75 * N)},
100 |      {p95, find_quantile(Data, 0.95 * N)},
101 |      {p99, find_quantile(Data, 0.99 * N)},
102 |      {p999, find_quantile(Data, 0.999 * N)}
103 |     ].
104 | 
105 | 
106 | 
107 | %% @doc: Decrements the frequency counters with the current values
108 | %% given, effectively resetting while keeping updates written during
109 | %% our stats calculations.
110 | reset(_UserKey, []) ->
111 |     ok;
112 | reset(UserKey, [{Key, Value} | Data]) ->
113 |     ets:update_counter(?TABLE, {UserKey, Key}, -Value),
114 |     reset(UserKey, Data).
115 | 
116 | 
117 | %%
118 | %% INTERNAL HELPERS
119 | %%
120 | -spec bin(integer()) -> integer().
121 | bin(0) -> 0;
122 | bin(N) ->
123 |     Binner =
124 |         if N < 10000 -> 1000;
125 |            true ->
126 |                 %% keep 2 digits
127 |                 round(math:pow(10, trunc(math:log10(N)) - 1))
128 |         end,
129 |     case (N div Binner) * Binner of
130 |         0 ->
131 |             1;
132 |         Bin ->
133 |             Bin
134 |     end.
135 | 
136 | scan(Data) ->
137 |     scan(0, 0, 0, 0, Data).
138 | 
139 | scan(N, Sum, Sum2, Max, []) ->
140 |     {N, Sum, Sum2, Max};
141 | scan(N, Sum, Sum2, Max, [{Value, Weight} | Rest]) ->
142 |     V = Value * Weight,
143 |     scan(N + Weight,
144 |          Sum + V,
145 |          Sum2 + ((Value * Value) * Weight),
146 |          max(Max, Value),
147 |          Rest).
148 | 
149 | 
150 | sd(N, _Sum, _Sum2) when N < 2 ->
151 |     'NaN';
152 | sd(N, Sum, Sum2) ->
153 |     SumSq = Sum * Sum,
154 |     math:sqrt((Sum2 - (SumSq / N)) / (N - 1)).
155 | 
156 | 
157 | histogram_incr(Key, Incr) ->
158 |     case catch ets:update_counter(?TABLE, Key, Incr) of
159 |         {'EXIT', {badarg, _}} ->
160 |             (catch ets:insert(?TABLE, {Key, Incr})),
161 |             ok;
162 |         _ ->
163 |             ok
164 |     end.
165 | 
166 | find_quantile(Freqs, NeededSamples) ->
167 |     find_quantile(Freqs, 0, NeededSamples).
168 | 
169 | find_quantile([{Value, _Freq} | []], _Samples, _NeededSamples) ->
170 |     Value;
171 | find_quantile([{Value, Freq} | Rest], Samples, NeededSamples) ->
172 |     Samples2 = Samples + Freq,
173 |     if
174 |         Samples2 < NeededSamples ->
175 |             find_quantile(Rest, Samples2, NeededSamples);
176 |         true ->
177 |             Value
178 |     end.
179 | 
180 | 
181 | 
182 | %%
183 | %% TESTS
184 | %%
185 | 
186 | -ifdef(TEST).
187 | -include_lib("eunit/include/eunit.hrl").
188 | 
189 | histogram_test_() ->
190 |     {foreach,
191 |      fun setup/0, fun teardown/1,
192 |      [
193 |       ?_test(test_stats()),
194 |       ?_test(test_histogram()),
195 |       ?_test(test_samples()),
196 |       ?_test(test_reset()),
197 |       ?_test(test_gc()),
198 |       ?_test(test_keys()),
199 |       ?_test(test_binning()),
200 |       ?_test(test_run())
201 |      ]
202 |     }.
203 | 
204 | setup() ->
205 |     init(),
206 |     [?TABLE].
207 | 
208 | teardown(Tables) ->
209 |     lists:map(fun ets:delete/1, Tables).
210 | 
211 | test_stats() ->
212 |     ExpectedStats = [{observations, 300},
213 |                      {min, 1},
214 |                      {median, 50},
215 |                      {mean, 50.5},
216 |                      {max, 100},
217 |                      {sd, 28.914300774835606}, %% Checked with R
218 |                      {sum, 15150},
219 |                      {sum2, 1015050},
220 |                      {p25, 25},
221 |                      {p75, 75},
222 |                      {p95, 95},
223 |                      {p99, 99},
224 |                      {p999, 100}],
225 |     ?assertEqual(ExpectedStats, summary([{N, 3} || N <- lists:seq(1, 100)])).
226 | 
227 | test_histogram() ->
228 |     [record_value(key, N) || N <- lists:seq(1, 100)],
229 |     [record_value(key, N) || N <- lists:seq(1, 100)],
230 |     [record_value(key, N) || N <- lists:seq(1, 100)],
231 | 
232 |     ExpectedStats = [{observations, 300},
233 |                      {min, 1},
234 |                      {median, 50},
235 |                      {mean, 50.5},
236 |                      {max, 100},
237 |                      {sd, 28.914300774835606}, %% Checked with R
238 |                      {sum, 15150},
239 |                      {sum2, 1015050},
240 |                      {p25, 25},
241 |                      {p75, 75},
242 |                      {p95, 95},
243 |                      {p99, 99},
244 |                      {p999, 100}],
245 | 
246 |     ?assertEqual(ExpectedStats, summary(get_data(key))),
247 | 
248 |     ?assertEqual(100, clear(key)),
249 |     ?assertEqual([], summary(get_data(key))),
250 | 
251 |     [record_value(key, N) || N <- lists:seq(1, 100)],
252 |     [record_value(key, N) || N <- lists:seq(1, 100)],
253 |     [record_value(key, N) || N <- lists:seq(1, 100)],
254 |     ?assertEqual(ExpectedStats, summary(get_data(key))).
255 | 
256 | test_gc() ->
257 |     [record_value(key, N) || N <- lists:seq(1, 100)],
258 |     ?assertEqual(100, proplists:get_value(observations, summary(get_data(key)))),
259 | 
260 |     ?assertEqual([{{key, 5}, 1}], ets:lookup(?TABLE, {key, 5})),
261 |     ?assertEqual(0, gc()),
262 | 
263 |     record_value(other_key, 42),
264 | 
265 |     reset(key, get_data(key)),
266 |     ?assertEqual(100, gc()),
267 |     ?assertEqual(0, gc()),
268 | 
269 |     ?assertEqual([], get_data(key)),
270 |     ?assertEqual([{42, 1}], get_data(other_key)),
271 |     ok.
272 | 
273 | test_reset() ->
274 |     [record_value(key, N) || N <- lists:seq(1, 100)],
275 |     Sum = fun () ->
276 |                   lists:sum(
277 |                     ets:select(?TABLE, [{{{key, '_'}, '$1'}, [], ['$1']}]))
278 |           end,
279 |     ?assertEqual(100, Sum()),
280 |     reset(key, get_data(key)),
281 |     ?assertEqual(0, Sum()).
282 | 
283 | 
284 | test_samples() ->
285 |     %% In R: sd(1:100) = 29.01149
286 |     [record_value(key, N) || N <- lists:seq(1, 100)],
287 |     ?assertEqual(29.011491975882016,
288 |                  proplists:get_value(sd, summary(get_data(key)))),
289 | 
290 |     %% ?assertEqual(103, clear(key)),
291 |     %% ?assertEqual('NaN', sd(key)).
292 |     ok.
293 | 
294 | 
295 | test_keys() ->
296 |     ?assertEqual([], keys()),
297 | 
298 |     record_value(foo, 1),
299 |     record_value(bar, 1),
300 |     record_value(baz, 1),
301 | 
302 |     ?assertEqual([bar, baz, foo], keys()).
303 | 
304 | 
305 | test_binning() ->
306 |     random:seed({1, 2, 3}),
307 |     Values = [random:uniform(1000000) || _ <- lists:seq(1, 1000)],
308 | 
309 |     [record_value(foo, V) || V <- Values],
310 |     _NormalSummary = summary(get_data(foo)),
311 |     reset(foo, get_data(foo)),
312 | 
313 |     [record_value(foo, bin(V)) || V <- Values],
314 |     _BinnedSummary = summary(get_data(foo)),
315 | 
316 |     ok.
317 | 
318 | 
319 | bin_test() ->
320 |     ?assertEqual(0, bin(0)),
321 |     ?assertEqual(1, bin(1)),
322 |     ?assertEqual(1, bin(999)),
323 |     ?assertEqual(1000, bin(1000)),
324 |     ?assertEqual(1000, bin(1001)),
325 |     ?assertEqual(2000, bin(2000)),
326 |     ?assertEqual(1000, bin(1010)),
327 |     ?assertEqual(1000, bin(1100)),
328 |     ?assertEqual(10000, bin(10001)),
329 |     ?assertEqual(10000, bin(10010)),
330 |     ?assertEqual(10000, bin(10010)),
331 |     ?assertEqual(10000, bin(10100)),
332 |     ?assertEqual(11000, bin(11000)),
333 |     ?assertEqual(12000000, bin(12345678)),
334 |     ?assertEqual(120000000, bin(123456789)).
335 | 
336 | 
337 | test_run() ->
338 |     ?assertEqual([], keys()),
339 |     2 = run(foo, fun () -> 1 + 1 end),
340 |     ?assertEqual([foo], keys()),
341 | 
342 |     2 = run(bar, fun (A, B) -> A + B end, [1, 1]),
343 |     ?assertEqual([bar, foo], keys()).
344 | 
345 | -endif. %% TEST
346 | 


--------------------------------------------------------------------------------
/src/statman_merger.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: Merges multiple streams
  2 | %%
  3 | %% statman_merger merges the raw data pushed from statman_server into
  4 | %% an aggregated view per metric.
  5 | -module(statman_merger).
  6 | -behaviour(gen_server).
  7 | -include_lib("eunit/include/eunit.hrl").
  8 | 
  9 | -export([start_link/0, add_subscriber/1, remove_subscriber/1, merge/1]).
 10 | 
 11 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 12 |          terminate/2, code_change/3]).
 13 | 
 14 | -record(state, {subscribers = [],
 15 |                 metrics = orddict:new()
 16 | }).
 17 | 
 18 | %%%===================================================================
 19 | %%% API
 20 | %%%===================================================================
 21 | 
 22 | start_link() ->
 23 |     gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
 24 | 
 25 | add_subscriber(Ref) ->
 26 |     gen_server:call(?MODULE, {add_subscriber, Ref}).
 27 | 
 28 | remove_subscriber(Ref) ->
 29 |     gen_server:call(?MODULE, {remove_subscriber, Ref}).
 30 | 
 31 | 
 32 | %%%===================================================================
 33 | %%% gen_server callbacks
 34 | %%%===================================================================
 35 | 
 36 | init([]) ->
 37 |     erlang:send_after(1000, self(), report),
 38 |     {ok, #state{subscribers = [], metrics = orddict:new()}}.
 39 | 
 40 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 41 |     {reply, ok, State#state{subscribers = [Ref | Sub]}};
 42 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 43 |     {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}}.
 44 | 
 45 | 
 46 | handle_cast({statman_update, Updates}, #state{metrics = Metrics} = State) ->
 47 |     NewMetrics = lists:foldl(fun (Update, Acc) ->
 48 |                                      Key = {proplists:get_value(node, Update),
 49 |                                             proplists:get_value(key, Update)},
 50 |                                      orddict:store(Key, Update, Acc)
 51 |                              end, Metrics, Updates),
 52 | 
 53 |     {noreply, State#state{metrics = NewMetrics}}.
 54 | 
 55 | handle_info(report, State) ->
 56 |     erlang:send_after(1000, self(), report),
 57 |     Merged = merge(State#state.metrics),
 58 | 
 59 |     KeyedMetrics = Merged ++ orddict:to_list(State#state.metrics),
 60 |     {_, Metrics} = lists:unzip(KeyedMetrics),
 61 | 
 62 |     lists:foreach(fun (S) ->
 63 |                           gen_server:cast(S, {statman_update, Metrics})
 64 |                   end, State#state.subscribers),
 65 | 
 66 |     {noreply, State}.
 67 | 
 68 | terminate(_Reason, _State) ->
 69 |     ok.
 70 | 
 71 | code_change(_OldVsn, State, _Extra) ->
 72 |     {ok, State}.
 73 | 
 74 | %%%===================================================================
 75 | %%% Internal functions
 76 | %%%===================================================================
 77 | 
 78 | merge(Metrics) ->
 79 |     %% Find metrics with the same key
 80 |     %% Merge values if the type allows it
 81 |     %% Change node atom to node list
 82 | 
 83 |     orddict:fold(
 84 |       fun (_Key, Metric, Acc) ->
 85 |               case type(Metric) =:= histogram of
 86 |                   true ->
 87 |                       case orddict:find(key(Metric), Acc) of
 88 |                           {ok, OtherMetric} ->
 89 |                               orddict:store(key(Metric),
 90 |                                             do_merge(Metric, OtherMetric),
 91 |                                             Acc);
 92 |                           error ->
 93 |                               orddict:store(key(Metric),
 94 |                                             Metric,
 95 |                                             Acc)
 96 |                       end;
 97 |                   false ->
 98 |                       Acc
 99 |               end
100 |       end, orddict:new(), Metrics).
101 | 
102 | 
103 | type(Metric) -> proplists:get_value(type, Metric).
104 | key(Metric) -> proplists:get_value(key, Metric).
105 | 
106 | do_merge(Left, Right) ->
107 |     MergeHistogramF = fun (_Key, ValueLeft, ValueRight) ->
108 |                               ValueLeft + ValueRight
109 |                       end,
110 | 
111 |     orddict:merge(
112 |       fun (node, A, Nodes) when is_list(Nodes) ->
113 |               [A | Nodes];
114 |           (node, A, B) ->
115 |               [A, B];
116 |           (value, A, B) ->
117 |               orddict:merge(MergeHistogramF, A, B);
118 |           (_Other, A, _) ->
119 |               A
120 |       end,
121 |       Left, Right).
122 | 
123 | 
124 | %%
125 | %% TESTS
126 | %%
127 | 
128 | example_nodedata(Node) ->
129 |     [[{key,{db,hits}},
130 |       {node,Node},
131 |       {type,counter},
132 |       {value,6240},
133 |       {window,1000}],
134 |      [{key,{<<"/highscores">>,db_a_latency}},
135 |       {node,Node},
136 |       {type,histogram},
137 |       {value,[{2,3},
138 |               {3,4},
139 |               {4,1},
140 |               {5,1}]}],
141 | 
142 |      [{key,{<<"/highscores">>,db_b_latency}},
143 |       {node,Node},
144 |       {type,histogram},
145 |       {value,[{2,3},
146 |               {3,4},
147 |               {4,1},
148 |               {5,1}]}],
149 | 
150 |      [{key,{db,connections}},
151 |       {node,Node},
152 |       {type,gauge},
153 |       {value,7},
154 |       {window,1000}]
155 |      ].
156 | 
157 | 
158 | %% merge_test() ->
159 | %%     ?assertEqual([{histograms, [{{foo, bar},
160 | %%                                  [{1,2}, {2,2}, {3,2}]}]},
161 | %%                   {nodes, [node2, node1]}],
162 | %%                  merge(orddict:from_list(
163 | %%                          [{node1, example_nodedata(node1)},
164 | %%                           {node2, example_nodedata(node2)}]))).
165 | 
166 | 
167 | report_test() ->
168 |     {ok, Init} = init([]),
169 |     {noreply, S1} = handle_cast({statman_update, example_nodedata(foo)}, Init),
170 |     {noreply, S2} = handle_cast({statman_update, example_nodedata(bar)}, S1),
171 |     {noreply, S3} = handle_cast({statman_update, example_nodedata(quux)}, S2),
172 | 
173 | 
174 |     ?assertEqual([{{<<"/highscores">>,db_a_latency},
175 |                    [{key,{<<"/highscores">>,db_a_latency}},
176 |                     {node,[quux,foo,bar]},
177 |                     {type,histogram},
178 |                     {value,[{2,9},{3,12},{4,3},{5,3}]}]},
179 |                   {{<<"/highscores">>,db_b_latency},
180 |                    [{key,{<<"/highscores">>,db_b_latency}},
181 |                     {node,[quux,foo,bar]},
182 |                     {type,histogram},
183 |                     {value,[{2,9},{3,12},{4,3},{5,3}]}]}], merge(S3#state.metrics)).
184 | 
185 | 


--------------------------------------------------------------------------------
/src/statman_poller.erl:
--------------------------------------------------------------------------------
 1 | %% @doc: Poller backwards compatibilty API helper
 2 | -module(statman_poller).
 3 | 
 4 | %% API
 5 | -export([add_gauge/1, add_gauge/2,
 6 |          add_counter/1, add_counter/2,
 7 |          add_histogram/1, add_histogram/2
 8 |         ]).
 9 | -export([remove_gauge/1, remove_counter/1, remove_histogram/1]).
10 | 
11 | 
12 | %%%===================================================================
13 | %%% API
14 | %%%===================================================================
15 | 
16 | -spec add_gauge(fun()) -> ok.
17 | add_gauge(F) -> add_worker({gauge, F}, 10000).
18 | 
19 | -spec add_gauge(fun(), pos_integer()) -> ok.
20 | add_gauge(F, Interval) -> add_worker({gauge, F}, Interval).
21 | 
22 | -spec add_counter(fun()) -> ok.
23 | add_counter(F) -> add_worker({counter, F}, 10000).
24 | 
25 | -spec add_counter(fun(), pos_integer()) -> ok.
26 | add_counter(F, Interval) -> add_worker({counter, F}, Interval).
27 | 
28 | -spec add_histogram(fun()) -> ok.
29 | add_histogram(F) -> add_worker({histogram, F}, 10000).
30 | 
31 | -spec add_histogram(fun(), pos_integer()) -> ok.
32 | add_histogram(F, Interval) -> add_worker({histogram, F}, Interval).
33 | 
34 | -spec remove_gauge(fun()) -> ok.
35 | remove_gauge(F) -> remove_worker({gauge, F}).
36 | 
37 | -spec remove_counter(fun()) -> ok.
38 | remove_counter(F) -> remove_worker({counter, F}).
39 | 
40 | -spec remove_histogram(fun()) -> ok.
41 | remove_histogram(F) -> remove_worker({histogram, F}).
42 | 
43 | 
44 | %%%===================================================================
45 | %%% Internal functionality
46 | %%%===================================================================
47 | 
48 | add_worker(TypedF, Interval) ->
49 |     {ok, _Pid} = statman_poller_sup:add_worker(TypedF, Interval),
50 |     ok.
51 | 
52 | remove_worker(TypedF) ->
53 |     statman_poller_sup:remove_worker(TypedF).
54 | 


--------------------------------------------------------------------------------
/src/statman_poller_sup.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: Poller supervisor provides API for starting poller
  2 | -module(statman_poller_sup).
  3 | -behaviour(supervisor).
  4 | 
  5 | %% API
  6 | -export([start_link/0]).
  7 | -export([init/1]).
  8 | -export([add_gauge/1, add_gauge/2,
  9 |          add_counter/1, add_counter/2,
 10 |          add_histogram/1, add_histogram/2
 11 |         ]).
 12 | -export([get_workers/0]).
 13 | -export([remove_gauge/1, remove_counter/1, remove_histogram/1]).
 14 | -export([add_worker/2, remove_worker/1]).
 15 | 
 16 | %% Types
 17 | -type types() :: gauge | counter | histogram.
 18 | -type typed_fun() :: {types(), fun()}.
 19 | -export_type([typed_fun/0, types/0]).
 20 | 
 21 | 
 22 | %%%===================================================================
 23 | %%% API
 24 | %%%===================================================================
 25 | 
 26 | -spec add_gauge(fun()) -> {ok, pid()}.
 27 | add_gauge(F) -> add_worker({gauge, F}, 10000).
 28 | 
 29 | -spec add_gauge(fun(), pos_integer()) -> {ok, pid()}.
 30 | add_gauge(F, Interval) -> add_worker({gauge, F}, Interval).
 31 | 
 32 | -spec add_counter(fun()) -> {ok, pid()}.
 33 | add_counter(F) -> add_worker({counter, F}, 10000).
 34 | 
 35 | -spec add_counter(fun(), pos_integer()) -> {ok, pid()}.
 36 | add_counter(F, Interval) -> add_worker({counter, F}, Interval).
 37 | 
 38 | -spec add_histogram(fun()) -> {ok, pid()}.
 39 | add_histogram(F) -> add_worker({histogram, F}, 10000).
 40 | 
 41 | -spec add_histogram(fun(), pos_integer()) -> {ok, pid()}.
 42 | add_histogram(F, Interval) -> add_worker({histogram, F}, Interval).
 43 | 
 44 | -spec remove_gauge(fun()) -> ok.
 45 | remove_gauge(F) -> remove_worker({gauge, F}).
 46 | 
 47 | -spec remove_counter(fun()) -> ok.
 48 | remove_counter(F) -> remove_worker({counter, F}).
 49 | 
 50 | -spec remove_histogram(fun()) -> ok.
 51 | remove_histogram(F) -> remove_worker({histogram, F}).
 52 | 
 53 | -spec get_workers() -> list().
 54 | get_workers() ->
 55 |     supervisor:which_children(?MODULE).
 56 | 
 57 | -spec start_link() -> ignore | {error, any()} | {ok, pid()}.
 58 | start_link() ->
 59 |     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
 60 | 
 61 | -spec add_worker(typed_fun(), pos_integer()) -> {ok, pid()}.
 62 | add_worker(TypedF, Interval) ->
 63 |     Id = get_unique_id(TypedF),
 64 |     ChildSpec = get_worker_spec(Id, TypedF, Interval),
 65 |     case supervisor:start_child(?MODULE, ChildSpec) of
 66 |         {error, {already_started, Pid}} ->
 67 |             {ok, Pid};
 68 |         {error, Reason} ->
 69 |             throw({unable_to_start_worker, Id, Reason});
 70 |         {ok, Pid} ->
 71 |             {ok, Pid}
 72 |     end.
 73 | 
 74 | -spec remove_worker(typed_fun()) -> ok.
 75 | remove_worker(TypedF) ->
 76 |     Name = get_worker_name(get_unique_id(TypedF)),
 77 |     ok = supervisor:terminate_child(?MODULE, Name),
 78 |     ok = supervisor:delete_child(?MODULE, Name),
 79 |     ok.
 80 | 
 81 | %%%===================================================================
 82 | %%% Supervisor callbacks
 83 | %%%===================================================================
 84 | 
 85 | init([]) ->
 86 |     {ok, {{one_for_one, 5, 10}, []}}.
 87 | 
 88 | 
 89 | %%%===================================================================
 90 | %%% Internal functionality
 91 | %%%===================================================================
 92 | 
 93 | get_worker_spec(Id, TypedF, Interval) ->
 94 |     Name = get_worker_name(Id),
 95 |     {Name,
 96 |      {statman_poller_worker, start_link, [Name, TypedF, Interval]},
 97 |      transient, 5000, worker, [statman_poller_worker]
 98 |     }.
 99 | 
100 | get_worker_name(Id) ->
101 |     list_to_atom(
102 |       atom_to_list(statman_poller_worker) ++ "_" ++ integer_to_list(Id)
103 |      ).
104 | 
105 | get_unique_id(TypedF) ->
106 |     erlang:phash2(TypedF).
107 | 


--------------------------------------------------------------------------------
/src/statman_poller_worker.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_poller_worker).
 2 | -behaviour(gen_server).
 3 | 
 4 | -include_lib("eunit/include/eunit.hrl").
 5 | 
 6 | %% API
 7 | -export([start_link/3]).
 8 | 
 9 | %% gen_server callbacks
10 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
11 |          terminate/2, code_change/3]).
12 | 
13 | 
14 | -record(state, {typed_fun :: tuple(),
15 |                 fun_state :: any(),
16 |                 timer_ref :: term(),
17 |                 interval  :: integer()
18 |                }).
19 | 
20 | %%%===================================================================
21 | %%% API
22 | %%%===================================================================
23 | 
24 | -spec start_link(atom(), statman_poller_sup:typed_fun(), pos_integer())
25 |                 -> ignore | {error, any()} | {ok, pid()}.
26 | start_link(Name, TypedF, Interval) ->
27 |     gen_server:start_link({local, Name}, ?MODULE, [TypedF, Interval], []).
28 | 
29 | 
30 | %%%===================================================================
31 | %%% gen_server callbacks
32 | %%%===================================================================
33 | 
34 | init([TypedF, Interval]) ->
35 |     {ok, #state{typed_fun = TypedF,
36 |                 timer_ref = start_timer(Interval),
37 |                 interval  = Interval,
38 |                 fun_state = undefined}}.
39 | 
40 | handle_call(_Msg, _From, State) ->
41 |     {reply, unknown_call, State}.
42 | 
43 | handle_cast(_Msg, State) ->
44 |     {noreply, State}.
45 | 
46 | %%TODO: do we need to spawn here?
47 | handle_info(poll, #state{typed_fun = {Type, F}, fun_state = FunState} = State) ->
48 |     NewTimer = start_timer(State#state.interval),
49 | 
50 |     {NewFunState, Updates} = case erlang:fun_info(F, arity) of
51 |                                  {arity, 0} -> {FunState, F()};
52 |                                  {arity, 1} -> F(FunState)
53 |                              end,
54 |     case Type of
55 |         gauge ->
56 |             [statman_gauge:set(K, V) || {K, V} <- Updates];
57 |         counter ->
58 |             [statman_counter:incr(K, V) || {K, V} <- Updates];
59 |         histogram ->
60 |             [statman_histogram:record_value(K, statman_histogram:bin(V))
61 |              || {K, V} <- Updates]
62 |     end,
63 | 
64 |     {noreply, State#state{fun_state = NewFunState, timer_ref = NewTimer}};
65 | 
66 | handle_info(_, State) ->
67 |     %% Ignore unknown messages, might come from gen calls that timed
68 |     %% out, but response got sent anyway..
69 |     {noreply, State}.
70 | 
71 | terminate(_Reason, #state{timer_ref = undefined}) ->
72 |     ok;
73 | terminate(_Reason, #state{timer_ref = TRef}) ->
74 |     erlang:cancel_timer(TRef),
75 |     ok.
76 | 
77 | code_change(_OldVsn, State, _Extra) ->
78 |     {ok, State}.
79 | 
80 | 
81 | %%%===================================================================
82 | %%% Internal functionality
83 | %%%===================================================================
84 | 
85 | start_timer(Interval) ->
86 |     erlang:send_after(Interval, self(), poll).
87 | 


--------------------------------------------------------------------------------
/src/statman_server.erl:
--------------------------------------------------------------------------------
  1 | %% @doc: Statman server, sends reports and owns the ETS tables
  2 | %%
  3 | %% Every second this gen_server sends a summary and the raw data of
  4 | %% all available statistics to the installed subscribers which can
  5 | %% further aggregate, summarize or publish the statistics.
  6 | %%
  7 | -module(statman_server).
  8 | -behaviour(gen_server).
  9 | 
 10 | -export([start_link/1, start_link/2, start_link/3,
 11 |          add_subscriber/1, remove_subscriber/1, report/0]).
 12 | 
 13 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 14 |          terminate/2, code_change/3]).
 15 | 
 16 | -record(state, {counters, subscribers = [], report_interval}).
 17 | -define(COUNTERS_TABLE, statman_server_counters).
 18 | 
 19 | %%%===================================================================
 20 | %%% API
 21 | %%%===================================================================
 22 | 
 23 | start_link(ReportInterval) ->
 24 |     start_link(ReportInterval, []).
 25 | 
 26 | start_link(ReportInterval, StartSubscribers) ->
 27 |     start_link(ReportInterval, StartSubscribers, infinity).
 28 | 
 29 | start_link(ReportInterval, StartSubscribers, GcInterval) ->
 30 |     gen_server:start_link({local, ?MODULE}, ?MODULE,
 31 |                           [ReportInterval, StartSubscribers, GcInterval], []).
 32 | 
 33 | add_subscriber(Ref) ->
 34 |     gen_server:call(?MODULE, {add_subscriber, Ref}).
 35 | 
 36 | remove_subscriber(Ref) ->
 37 |     gen_server:call(?MODULE, {remove_subscriber, Ref}).
 38 | 
 39 | report() ->
 40 |     ?MODULE ! report.
 41 | 
 42 | %%%===================================================================
 43 | %%% gen_server callbacks
 44 | %%%===================================================================
 45 | 
 46 | init([ReportInterval, StartSubscribers, GcInterval]) ->
 47 |     ok = statman_counter:init(),
 48 |     ok = statman_gauge:init(),
 49 |     ok = statman_histogram:init(),
 50 | 
 51 |     erlang:send_after(ReportInterval, self(), report),
 52 |     case GcInterval of
 53 |         infinity -> ok;
 54 |         N when is_integer(N) ->
 55 |             erlang:send_after(GcInterval, self(), {gc, GcInterval})
 56 |     end,
 57 | 
 58 |     {ok, #state{counters = dict:new(),
 59 |                 subscribers = StartSubscribers,
 60 |                 report_interval = ReportInterval}}.
 61 | 
 62 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 63 |     {reply, ok, State#state{subscribers = [Ref | Sub]}};
 64 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) ->
 65 |     {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}}.
 66 | 
 67 | 
 68 | handle_cast(_Msg, State) ->
 69 |     {noreply, State}.
 70 | 
 71 | handle_info(report, #state{report_interval = Window} = State) ->
 72 |     erlang:send_after(State#state.report_interval, self(), report),
 73 | 
 74 |     Stats = counters(Window) ++ histograms(Window) ++ gauges(Window),
 75 |     lists:foreach(fun (S) ->
 76 |                           gen_server:cast(S, {statman_update, Stats})
 77 |                   end, State#state.subscribers),
 78 | 
 79 |     {noreply, State};
 80 | 
 81 | handle_info({gc, GcInterval} = GcMsg, State) ->
 82 |     erlang:send_after(GcInterval, self(), GcMsg),
 83 | 
 84 |     _NumGCed = statman_histogram:gc(),
 85 | 
 86 |     {noreply, State}.
 87 | 
 88 | terminate(_Reason, _State) ->
 89 |     ok.
 90 | 
 91 | code_change(_OldVsn, State, _Extra) ->
 92 |     {ok, State}.
 93 | 
 94 | %%%===================================================================
 95 | %%% Internal functions
 96 | %%%===================================================================
 97 | 
 98 | counters(Window) ->
 99 |     lists:map(fun ({K, V}) ->
100 |                       statman_counter:reset(K, V),
101 |                       [{key, K}, {node, node()}, {type, counter},
102 |                        {value, V}, {window, Window}]
103 |               end, statman_counter:get_all()).
104 | 
105 | histograms(Window) ->
106 |     lists:map(fun (Key) ->
107 |                       Data = statman_histogram:get_data(Key),
108 |                       statman_histogram:reset(Key, Data),
109 |                       [{key, Key}, {node, node()}, {type, histogram},
110 |                        {value, Data}, {window, Window}]
111 |               end, statman_histogram:keys()).
112 | 
113 | gauges(Window) ->
114 |     statman_gauge:expire(),
115 |     lists:map(fun ({Key, Value}) ->
116 |                       [{key, Key}, {node, node()}, {type, gauge},
117 |                        {value, Value}, {window, Window}]
118 |               end, statman_gauge:get_all()).
119 | 
120 | 


--------------------------------------------------------------------------------
/src/statman_sup.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_sup).
 2 | -behaviour(supervisor).
 3 | 
 4 | %% API
 5 | -export([start_link/1]).
 6 | -export([init/1]).
 7 | 
 8 | -define(CHILD(I, Type, Args),
 9 |         {I, {I, start_link, Args}, permanent, 5000, Type, [I]}).
10 | 
11 | %%%===================================================================
12 | %%% API
13 | %%%===================================================================
14 | 
15 | start_link([]) ->
16 |     supervisor:start_link({local, ?MODULE}, ?MODULE, [1000]);
17 | start_link([ReportInterval]) ->
18 |     supervisor:start_link({local, ?MODULE}, ?MODULE, [ReportInterval]).
19 | 
20 | 
21 | %%%===================================================================
22 | %%% Supervisor callbacks
23 | %%%===================================================================
24 | 
25 | init([ReportInterval]) ->
26 |     Children = [
27 |                 ?CHILD(statman_server, worker, [ReportInterval]),
28 |                 ?CHILD(statman_poller_sup, supervisor, [])
29 |                ],
30 |     {ok, {{one_for_one, 5, 10}, Children}}.
31 | 


--------------------------------------------------------------------------------
/src/statman_vm_metrics.erl:
--------------------------------------------------------------------------------
 1 | %% @doc: Collection of functions for sending statistics from the
 2 | 
 3 | -module(statman_vm_metrics).
 4 | -compile([export_all]).
 5 | 
 6 | 
 7 | get_counters() ->
 8 |     {{input, InputBytes}, {output, OutputBytes}} = erlang:statistics(io),
 9 |     [{{vm, io_in_bytes}, InputBytes}, {{vm, io_out_bytes}, OutputBytes}].
10 | 
11 | 
12 | get_gauges() ->
13 |     Memory = lists:map(fun ({K, V}) ->
14 |                                {{vm_memory, K}, V}
15 |                        end, erlang:memory()),
16 | 
17 |     RunQueue = {{vm, run_queue}, erlang:statistics(run_queue)},
18 |     ProcessCount = {{vm, process_count}, erlang:system_info(process_count)},
19 | 
20 |     [RunQueue, ProcessCount] ++ Memory ++ message_stats() ++ ets_stats().
21 | 
22 | 
23 | message_stats() ->
24 |     ProcessInfo = lists:flatmap(
25 |                     fun (Pid) ->
26 |                             case process_info(Pid, message_queue_len) of
27 |                                 undefined ->
28 |                                     [];
29 |                                 {message_queue_len, 0} ->
30 |                                     [];
31 |                                 {message_queue_len, Count} ->
32 |                                     [{Count, Pid}]
33 |                             end
34 |                     end, processes()),
35 |     TotalQueue = lists:sum(element(1, lists:unzip(ProcessInfo))),
36 | 
37 |     [{{vm, processes_with_queues}, length(ProcessInfo)},
38 |      {{vm, messages_in_queue}, TotalQueue}].
39 | 
40 | ets_stats() ->
41 |     TotalSize = lists:sum(
42 |                   lists:map(fun (T) ->
43 |                                     case ets:info(T, size) of
44 |                                         N when is_integer(N) ->
45 |                                             N;
46 |                                         undefined ->
47 |                                             0
48 |                                     end
49 |                             end, ets:all())),
50 |     [{{vm_ets, objects}, TotalSize}].
51 | 
52 | 
53 | gc(undefined) ->
54 |     {NumGCs, _, _} = erlang:statistics(garbage_collection),
55 |     {NumGCs, []};
56 | gc(PrevNumGCs) ->
57 |     {NumGCs, _, 0} = erlang:statistics(garbage_collection),
58 |     {NumGCs, [{{vm, gcs}, NumGCs - PrevNumGCs}]}.
59 | 


--------------------------------------------------------------------------------
/test/statman_tests.erl:
--------------------------------------------------------------------------------
 1 | -module(statman_tests).
 2 | 
 3 | -compile(export_all).
 4 | 
 5 | -include_lib("eunit/include/eunit.hrl").
 6 | 
 7 | %% =============================================================================
 8 | statman_test_() ->
 9 |     {foreach,
10 |         fun setup/0, fun teardown/1,
11 |         [
12 |          {timeout, 200, {"Add/remove pollers", fun test_start_remove_pollers/0}},
13 |          {timeout, 200, {"Stateful pollers", fun test_stateful_pollers/0}}
14 |         ]
15 |     }.
16 | 
17 | %% =============================================================================
18 | setup() ->
19 |     {ok, Pid} = statman_poller_sup:start_link(),
20 | 
21 |     statman_counter:init(),
22 |     statman_gauge:init(),
23 |     statman_histogram:init(),
24 |     Pid.
25 | 
26 | teardown(Pid) ->
27 |     [ets:delete(T) || T <- [statman_counters, statman_gauges, statman_histograms]],
28 | 
29 |     process_flag(trap_exit, true),
30 |     exit(Pid, kill),
31 |     receive {'EXIT', Pid, killed} -> ok end,
32 |     ok.
33 | 
34 | test_start_remove_pollers() ->
35 |     GaugeF     = fun() -> [{gauge, 5}] end,
36 |     CounterF   = fun() -> [{counter, 5}] end,
37 |     HistogramF = fun() -> [{histogram, 5}, {histogram, 10}] end,
38 | 
39 |     ?assertEqual([], statman_gauge:get_all()),
40 |     ?assertEqual([], statman_counter:get_all()),
41 |     ?assertEqual([], statman_histogram:keys()),
42 | 
43 |     {ok, GaugePid}     = statman_poller_sup:add_gauge(GaugeF, 100),
44 |     {ok, CounterPid}   = statman_poller_sup:add_counter(CounterF, 100),
45 |     {ok, HistogramPid} = statman_poller_sup:add_histogram(HistogramF, 100),
46 | 
47 |     ?assertEqual({ok, GaugePid}, statman_poller_sup:add_gauge(GaugeF, 100)),
48 |     ?assertEqual({ok, CounterPid}, statman_poller_sup:add_counter(CounterF, 100)),
49 |     ?assertEqual({ok, HistogramPid}, statman_poller_sup:add_histogram(HistogramF, 100)),
50 | 
51 |     timer:sleep(250),
52 | 
53 |     ?assertMatch([{gauge, _}], statman_gauge:get_all()),
54 |     ?assertEqual([counter], statman_counter:counters()),
55 |     ?assertEqual([histogram], statman_histogram:keys()),
56 | 
57 |     ok = statman_poller_sup:remove_gauge(GaugeF),
58 |     ok = statman_poller_sup:remove_counter(CounterF),
59 |     ok = statman_poller_sup:remove_histogram(HistogramF),
60 | 
61 |     ?assertEqual([], statman_poller_sup:get_workers()).
62 | 
63 | test_stateful_pollers() ->
64 |     ?assertEqual([], statman_counter:get_all()),
65 | 
66 |     {ok, _} = statman_poller_sup:add_counter(fun statman_vm_metrics:gc/1, 100),
67 |     timer:sleep(250),
68 |     ?assertEqual([{vm, gcs}], statman_counter:counters()).
69 | 
70 | periodic_gc_test() ->
71 |     GcInterval = 100,
72 |     {ok, State} = statman_server:init([60000, [], GcInterval]),
73 |     receive
74 |         {gc, GcInterval} -> ok
75 |     end,
76 |     statman_histogram:record_value(test, os:timestamp()),
77 |     statman_histogram:reset(test, statman_histogram:get_data(test)),
78 |     {noreply, State} = statman_server:handle_info({gc, GcInterval}, State),
79 |     ?assertEqual(0, ets:info(statman_histograms, size)),
80 |     ok.
81 | 


--------------------------------------------------------------------------------