├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── rebar ├── rebar.config ├── src ├── statman.app.src ├── statman.erl ├── statman_aggregator.erl ├── statman_app.erl ├── statman_benchmark.erl ├── statman_counter.erl ├── statman_decorators.erl ├── statman_gauge.erl ├── statman_histogram.erl ├── statman_merger.erl ├── statman_poller.erl ├── statman_poller_sup.erl ├── statman_poller_worker.erl ├── statman_server.erl ├── statman_sup.erl └── statman_vm_metrics.erl └── test └── statman_tests.erl /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | priv 4 | ebin 5 | *.o 6 | *.beam 7 | *.plt 8 | *~ 9 | *# -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | notifications: 3 | email: knutin@gmail.com 4 | otp_release: 5 | - 17.0 6 | - R16B03-1 7 | - R16B03 8 | - R16B02 9 | - R16B01 10 | - R16B 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Knut Nesheim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # statman - Statistics man to the rescue! 2 | 3 | Statman makes it possible to instrument and collect statistics from 4 | your high-traffic production Erlang systems with very low 5 | overhead. The collected data points are aggregated in the VM and can 6 | be sent to services like Graphite, Munin, New Relic, etc. 7 | 8 | Statman uses in-memory ETS tables for low overhead logging and to 9 | avoid single process bottlenecks. See "How does it work" below. 10 | 11 | Integration options: 12 | 13 | * [statman_elli][]: real-time (mobile friendly) web 14 | dashboard. Exposes a small web app and a HTTP API where external 15 | tools like Munin(plugin included), Librato, etc, can pull 16 | aggregated stats. 17 | 18 | * [newrelic-erlang][]: Track web transactions happening in any Erlang 19 | webserver in New Relic, a hosted application monitoring service. 20 | 21 | * [statman_graphite][]: Push data to a Graphite instance, also works 22 | with hostedgraphite.com. 23 | 24 | * [hatman][]: Push data to stathat 25 | 26 | 27 | ## Usage 28 | 29 | Add `statman_server` to one of your supervisors with the following 30 | child specification. You can adjust the poll interval to your liking, 31 | it determines how frequently metrics will be pushed to the 32 | subscribers: 33 | 34 | 35 | ```erlang 36 | {statman_server, {statman_server, start_link, [1000]}, 37 | permanent, 5000, worker, []}. 38 | ``` 39 | 40 | Statman offers three data types. Here's how to use them: 41 | 42 | ```erlang 43 | %% Counters measure the frequency of an event 44 | statman_counter:incr(my_queue_in). 45 | 46 | %% A gauge is a point in time snapshot of a value 47 | statman_gauge:set(queue_size, N). 48 | 49 | %% Histograms show you the distribution of values 50 | Result = statman:run({foo, bar}, fun () -> do_something() end) 51 | ``` 52 | 53 | Updates to counters, gauges and histograms involves one atomic write 54 | in ETS. 55 | 56 | 57 | ## Decorators 58 | 59 | You can instrument a function using one of the supplied decorators: 60 | 61 | ```erlang 62 | -decorate({statman_decorators, call_rate}). 63 | my_function(A, B) -> 64 | A + B. 65 | 66 | -decorate({statman_decorators, runtime, [{key, {statman, key}}]}). 67 | other_function(foo) -> 68 | bar. 69 | ``` 70 | 71 | ## `statman_poller` 72 | 73 | It's quite common to want to poll something at an interval, like 74 | memory usage, reduction counts, etc. To this end, Statman includes 75 | `statman_poller` which can run functions at intervals on your 76 | behalf. Add the supervisor to your supervision tree with the following 77 | child specification: 78 | 79 | ```erlang 80 | {statman_poller_sup, {statman_poller_sup, start_link, []}, 81 | permanent, 5000, worker, []}]}}. 82 | ``` 83 | 84 | In your app startup, you can then create pollers, which will be 85 | restarted if they crash and shut down together with your application: 86 | 87 | ```erlang 88 | queue_sizes() -> 89 | [{my_queue_size, my_queue:get_size()}, 90 | {other_queue, foo:queue_size()}]. 91 | 92 | app_setup() -> 93 | ok = statman_poller:add_gauge(fun ?MODULE:queue_sizes/0, 1000). 94 | ``` 95 | 96 | A polling function can also be "stateful". Allowing you to measure the 97 | rate of change in an absolute number. If the function has arity 1, it 98 | will be passed the state and expected to return a new state: 99 | 100 | ```erlang 101 | widget_rate(undefined) -> 102 | TotalWidgets = count_total_widgets(), 103 | {TotalWidgets, []}; 104 | widget_rate(PrevTotalWidgets) -> 105 | TotalWidgets = count_total_widgets(), 106 | {TotalWidgets, [{created_widgets, TotalWidgets - PrevTotalWidgets}]}. 107 | 108 | app_setup() -> 109 | ok = statman_poller:add_counter(fun ?MODULE:widget_rate/1, 1000). 110 | ``` 111 | 112 | It's important to pass a function reference rather than the function 113 | itself, to make code upgrades smoother. 114 | 115 | ## How does it work 116 | 117 | Using `ets:update_counter/3` we get very efficient atomic increments / 118 | decrements of counters. With this primitive, counters, gauges and 119 | histograms become very efficient. 120 | 121 | A histogram is really a frequency table of values. By keeping a count 122 | (weight) of how many times we have seen the different values, we have 123 | enough information to calculate the mean, min, max, standard deviation 124 | and percentiles. 125 | 126 | Now, from this we can build something really cool: 127 | 128 | * The space required is proportionate to how many different values we 129 | have seen, not by the total number of observations. Binning values 130 | requires even less space. 131 | * Basic aggregation is done very early in the process. Binning also 132 | helps with this. 133 | * The frequency tables can easily be merged together, either to 134 | create an aggregate of multiple nodes to create a cluster view or 135 | aggregate over time to create for example 5 minute summaries. 136 | 137 | 138 | ## Clusters 139 | 140 | In a single node application, you can collect, aggregate and push out 141 | metrics from that single node. In bigger applications it might be 142 | helpful to collect metrics inside of each node, but aggregate together 143 | and view metrics for the whole cluster in one place. Having a "ops 144 | dashboard" showing message queues in key processes, node throughput, 145 | cluster throughput, request latency per node, request latency as a 146 | whole, etc, is extremely useful. 147 | 148 | ## Setup 149 | 150 | Statman has two parts, `statman_server` and `statman_aggregator`. The 151 | server owns the ETS-tables and periodically forwards the changes to 152 | any interested aggregator. The aggregator keeps a moving window of 153 | metrics coming from one ore more servers. You can ask the aggregator 154 | for the stats collected in the last N seconds. 155 | 156 | You need to run one server under a supervisor in each node. If you 157 | have a cluster of nodes, you can run the aggregator on just one of 158 | them, collecting stats for the whole cluster. 159 | 160 | 161 | [statman_elli]: https://github.com/knutin/statman_elli 162 | [newrelic-erlang]: https://github.com/wooga/newrelic-erlang 163 | [statman_graphite]: https://github.com/chrisavl/statman_graphite 164 | [hatman]: https://github.com/chrisavl/hatman 165 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knutin/statman/e6ff10815eac1e613b619e69db06d635f1a4a488/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [debug_info]}. 2 | 3 | {deps, 4 | [ 5 | {decorators, "", {git, "git://github.com/chrisavl/erlang_decorators.git", {branch, "master"}}} 6 | ] 7 | }. 8 | -------------------------------------------------------------------------------- /src/statman.app.src: -------------------------------------------------------------------------------- 1 | {application, statman, 2 | [ 3 | {description, "Statman to the rescue!"}, 4 | {vsn, "0.5"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {mod, { statman_app, []}}, 11 | {env, []} 12 | ]}. 13 | -------------------------------------------------------------------------------- /src/statman.erl: -------------------------------------------------------------------------------- 1 | -module(statman). 2 | -export([ 3 | incr/1, 4 | incr/2, 5 | set_gauge/2, 6 | incr_gauge/1, 7 | decr_gauge/1, 8 | run/2, 9 | run/3, 10 | run/4, 11 | time/2 12 | ]). 13 | 14 | incr(Key) -> statman_counter:incr(Key). 15 | incr(Key, Increment) -> statman_counter:incr(Key, Increment). 16 | 17 | set_gauge(Key, Value) -> statman_gauge:set(Key, Value). 18 | incr_gauge(Key) -> statman_gauge:incr(Key). 19 | decr_gauge(Key) -> statman_gauge:decr(Key). 20 | 21 | run(Key, F) -> statman_histogram:run(Key, F). 22 | run(Key, F, Args) -> statman_histogram:run(Key, F, Args). 23 | run(Key, M, F, Args) -> statman_histogram:run(Key, M, F, Args). 24 | 25 | time(Key, Value) -> statman_histogram:record_value( 26 | Key, statman_histogram:bin(Value)). 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/statman_aggregator.erl: -------------------------------------------------------------------------------- 1 | %% @doc Aggregate statman samples 2 | %% 3 | %% statman_aggregator receives metrics from statman_servers running in 4 | %% your cluster, picks them apart and keeps a moving window of the raw 5 | %% values. On demand, the samples are aggregated together. Metrics 6 | %% with the same key, but from different nodes are also merged. 7 | -module(statman_aggregator). 8 | -behaviour(gen_server). 9 | 10 | -export([start_link/0, 11 | get_window/1, 12 | get_window/2, 13 | get_merged_window/1, 14 | get_merged_window/2, 15 | get_keys/0]). 16 | 17 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 18 | terminate/2, code_change/3]). 19 | 20 | -ifdef(TEST). 21 | -include_lib("eunit/include/eunit.hrl"). 22 | -endif. 23 | 24 | -record(state, { 25 | subscribers = [], 26 | last_sample = [], 27 | metrics = dict:new() 28 | 29 | }). 30 | 31 | %%%=================================================================== 32 | %%% API 33 | %%%=================================================================== 34 | 35 | start_link() -> 36 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 37 | 38 | get_window(Size) -> 39 | get_window(Size, 5000). 40 | 41 | get_window(Size, Timeout) -> 42 | gen_server:call(?MODULE, {get_window, Size, false}, Timeout). 43 | 44 | get_merged_window(Size) -> 45 | get_merged_window(Size, 5000). 46 | 47 | get_merged_window(Size, Timeout) -> 48 | gen_server:call(?MODULE, {get_window, Size, true}, Timeout). 49 | 50 | get_keys() -> 51 | gen_server:call(?MODULE, get_keys). 52 | 53 | %%%=================================================================== 54 | %%% gen_server callbacks 55 | %%%=================================================================== 56 | 57 | init([]) -> 58 | timer:send_interval(10000, push), 59 | {ok, #state{metrics = dict:new()}}. 60 | 61 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 62 | {reply, ok, State#state{subscribers = [Ref | Sub]}}; 63 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 64 | {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}}; 65 | 66 | 67 | handle_call({get_window, Size, MergeNodes}, From, #state{metrics = Metrics} = State) -> 68 | PurgedMetrics = dict:map(fun (_, {Type, Samples}) -> 69 | {Type, purge(Samples)} 70 | end, Metrics), 71 | 72 | spawn(fun() -> do_reply(From, PurgedMetrics, Size, MergeNodes) end), 73 | {noreply, State#state{metrics = PurgedMetrics}}; 74 | 75 | 76 | handle_call(get_keys, _From, State) -> 77 | Reply = dict:fold(fun (Key, {Type, _Samples}, Acc) -> 78 | [{Key, Type} | Acc] 79 | end, [], State#state.metrics), 80 | {reply, {ok, Reply}, State}. 81 | 82 | 83 | handle_cast({statman_update, NewSamples}, #state{metrics = Metrics} = State) -> 84 | NewMetrics = lists:foldl(fun insert/2, Metrics, NewSamples), 85 | {noreply, State#state{metrics = NewMetrics}}. 86 | 87 | handle_info(_, State) -> 88 | {noreply, State}. 89 | 90 | terminate(_Reason, _State) -> 91 | ok. 92 | 93 | code_change(_OldVsn, State, _Extra) -> 94 | {ok, State}. 95 | 96 | %%%=================================================================== 97 | %%% Internal functions 98 | %%%=================================================================== 99 | 100 | do_reply(Client, Metrics, Size, MergeNodes) -> 101 | Aggregated = lists:map( 102 | fun ({{Node, Key}, {Type, Samples}}) -> 103 | {Node, Key, Type, merge_samples(Type, window(Size, Samples))} 104 | end, dict:to_list(Metrics)), 105 | 106 | Reply = case MergeNodes of 107 | false -> 108 | format(Size, Aggregated); 109 | true -> 110 | format(Size, Aggregated) ++ format(Size, merge(Aggregated)) 111 | end, 112 | gen_server:reply(Client, {ok, Reply}). 113 | 114 | 115 | insert(Metric, Metrics) -> 116 | dict:update(nodekey(Metric), 117 | %% FIXME: this breaks if you have the same key for different types of metrics 118 | fun ({_Type, Samples}) -> 119 | {type(Metric), [{now_to_seconds(), value(Metric)} | Samples]} 120 | end, 121 | {type(Metric), [{now_to_seconds(), value(Metric)}]}, 122 | Metrics). 123 | 124 | window(_, []) -> 125 | []; 126 | window(1, [{_, Sample} | _]) -> 127 | [Sample]; 128 | 129 | window(Size, Samples) -> 130 | element(2, lists:unzip(samples_after(now_to_seconds() - Size, Samples))). 131 | 132 | purge(Samples) -> 133 | samples_after(now_to_seconds() - 300, Samples). 134 | 135 | 136 | samples_after(Threshold, Samples) -> 137 | lists:takewhile(fun ({Ts, _}) -> Ts >= Threshold end, Samples). 138 | 139 | 140 | 141 | merge(Metrics) -> 142 | {_, Merged} = 143 | lists:unzip( 144 | orddict:to_list( 145 | lists:foldl( 146 | fun ({_, _, gauge, _}, Acc) -> 147 | Acc; 148 | 149 | ({Node, Key, counter, Sample}, Acc) -> 150 | case orddict:find(Key, Acc) of 151 | {ok, {Nodes, Key, counter, OtherSample}} -> 152 | orddict:store(Key, {[Node | Nodes], Key, counter, 153 | Sample + OtherSample}, Acc); 154 | error -> 155 | orddict:store(Key, {[Node], Key, counter, Sample}, Acc) 156 | end; 157 | 158 | ({Node, Key, Type, Samples}, Acc) -> 159 | case orddict:find(Key, Acc) of 160 | {ok, {Nodes, Key, Type, OtherSamples}} -> 161 | Merged = merge_samples(Type, [Samples, OtherSamples]), 162 | orddict:store(Key, {[Node | Nodes], Key, Type, Merged}, Acc); 163 | error -> 164 | orddict:store(Key, {[Node], Key, Type, Samples}, Acc) 165 | end 166 | end, orddict:new(), Metrics))), 167 | 168 | lists:filter(fun ({Nodes, _, _, _}) -> length(Nodes) > 1 end, Merged). 169 | 170 | 171 | merge_samples(histogram, Samples) -> 172 | lists:foldl(fun (Sample, Agg) -> 173 | orddict:merge(fun (_, A, B) -> 174 | A + B 175 | end, 176 | orddict:from_list(Sample), 177 | Agg) 178 | end, orddict:new(), Samples); 179 | 180 | 181 | merge_samples(counter, Samples) -> 182 | lists:sum(Samples); 183 | 184 | merge_samples(gauge, []) -> 185 | 0; 186 | merge_samples(gauge, Samples) -> 187 | hd(Samples). 188 | 189 | 190 | 191 | format(_, []) -> 192 | []; 193 | 194 | format(Size, [{Nodes, Key, Type, Value} | Rest]) -> 195 | [ 196 | [{key, Key}, 197 | {node, Nodes}, 198 | {type, Type}, 199 | {value, Value}, 200 | {window, Size * 1000}] 201 | 202 | | format(Size, Rest)]. 203 | 204 | 205 | 206 | type(Metric) -> proplists:get_value(type, Metric). 207 | value(Metric) -> proplists:get_value(value, Metric). 208 | 209 | nodekey(Metric) -> 210 | {proplists:get_value(node, Metric), 211 | proplists:get_value(key, Metric)}. 212 | 213 | 214 | now_to_seconds() -> 215 | {MegaSeconds, Seconds, _} = os:timestamp(), 216 | MegaSeconds * 1000000 + Seconds. 217 | 218 | 219 | %% 220 | %% TESTS 221 | %% 222 | 223 | -ifdef(TEST). 224 | aggregator_test_() -> 225 | {foreach, 226 | fun setup/0, fun teardown/1, 227 | [?_test(expire()), 228 | ?_test(window()), 229 | ?_test(merged_window()) 230 | ] 231 | }. 232 | 233 | setup() -> 234 | {ok, Pid} = start_link(), 235 | true = unlink(Pid), 236 | Pid. 237 | 238 | teardown(Pid) -> 239 | exit(Pid, kill), 240 | timer:sleep(1000), 241 | false = is_process_alive(Pid). 242 | 243 | expire() -> 244 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 245 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 246 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 247 | gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 1)]}), 248 | gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 3)]}), 249 | 250 | ?assert(lists:all(fun (M) -> 251 | V = proplists:get_value(value, M, 0), 252 | V =/= 0 andalso V =/= [] 253 | end, element(2, get_window(2)))), 254 | 255 | timer:sleep(3000), 256 | 257 | ?assert(lists:all(fun (M) -> 258 | V = proplists:get_value(value, M), 259 | V == 0 orelse V =:= [] 260 | end, element(2, get_window(2)))). 261 | 262 | window() -> 263 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 264 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 265 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 266 | gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 1)]}), 267 | gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 3)]}), 268 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}), 269 | 270 | timer:sleep(1000), 271 | 272 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 273 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 274 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 275 | gen_server:cast(?MODULE, {statman_update, [sample_gauge('a@knutin', 2)]}), 276 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}), 277 | 278 | {ok, Aggregated} = get_window(60), 279 | 280 | [MergedCounter, MergedGauge, MergedHistogramA, MergedHistogramB] = lists:sort(Aggregated), 281 | 282 | 283 | ?assertEqual([{key, {<<"/highscores">>,db_a_latency}}, 284 | {node, 'a@knutin'}, 285 | {type, histogram}, 286 | {value, [{1, 3}, {2, 6}, {3, 9}]}, 287 | {window, 60000}], MergedHistogramA), 288 | 289 | ?assertEqual([{key, {<<"/highscores">>,db_a_latency}}, 290 | {node, 'b@knutin'}, 291 | {type, histogram}, 292 | {value, [{1, 2}, {2, 4}, {3, 6}]}, 293 | {window, 60000}], MergedHistogramB), 294 | 295 | ?assertEqual([{key, {foo, bar}}, 296 | {node, 'a@knutin'}, 297 | {type, counter}, 298 | {value, 90}, 299 | {window, 60000}], MergedCounter), 300 | 301 | ?assertEqual([{key, {foo, baz}}, 302 | {node, 'a@knutin'}, 303 | {type, gauge}, 304 | {value, 2}, 305 | {window, 60000}], MergedGauge). 306 | 307 | merged_window() -> 308 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 309 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}), 310 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 311 | gen_server:cast(?MODULE, {statman_update, [sample_counter('b@knutin')]}), 312 | 313 | timer:sleep(1000), 314 | 315 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('a@knutin')]}), 316 | gen_server:cast(?MODULE, {statman_update, [sample_histogram('b@knutin')]}), 317 | gen_server:cast(?MODULE, {statman_update, [sample_counter('a@knutin')]}), 318 | gen_server:cast(?MODULE, {statman_update, [sample_counter('b@knutin')]}), 319 | 320 | ?assertEqual([ 321 | {nodekey(sample_counter('a@knutin')), counter}, 322 | {nodekey(sample_histogram('a@knutin')), histogram}, 323 | {nodekey(sample_counter('b@knutin')), counter}, 324 | {nodekey(sample_histogram('b@knutin')), histogram} 325 | ], lists:sort(element(2, get_keys()))), 326 | 327 | {ok, Aggregated} = get_merged_window(60), 328 | 329 | [_CounterA, _CounterB, MergedCounter, 330 | _HistogramA, _HistogramB, MergedHistogram] = lists:sort(Aggregated), 331 | 332 | 333 | ?assertEqual([{key, {<<"/highscores">>,db_a_latency}}, 334 | {node, ['a@knutin', 'b@knutin']}, 335 | {type, histogram}, 336 | {value, [{1, 4}, {2, 8}, {3, 12}]}, 337 | {window, 60000}], MergedHistogram), 338 | 339 | ?assertEqual([{key, {foo, bar}}, 340 | {node, ['a@knutin', 'b@knutin']}, 341 | {type, counter}, 342 | {value, 120}, 343 | {window, 60000}], MergedCounter). 344 | 345 | 346 | 347 | sample_histogram(Node) -> 348 | [{key,{<<"/highscores">>,db_a_latency}}, 349 | {node,Node}, 350 | {type,histogram}, 351 | {value,[{1,1}, 352 | {2,2}, 353 | {3,3}]}, 354 | {window,1000}]. 355 | 356 | sample_counter(Node) -> 357 | [{key,{foo, bar}}, 358 | {node,Node}, 359 | {type,counter}, 360 | {value,30}, 361 | {window,1000}]. 362 | 363 | sample_gauge(Node, Value) -> 364 | [{key,{foo, baz}}, 365 | {node,Node}, 366 | {type,gauge}, 367 | {value,Value}, 368 | {window,1000}]. 369 | -endif. 370 | -------------------------------------------------------------------------------- /src/statman_app.erl: -------------------------------------------------------------------------------- 1 | -module(statman_app). 2 | 3 | -behaviour(application). 4 | 5 | %% Application callbacks 6 | -export([start/2, stop/1]). 7 | 8 | %% =================================================================== 9 | %% Application callbacks 10 | %% =================================================================== 11 | 12 | start(_StartType, _StartArgs) -> 13 | statman_sup:start_link([1000]). 14 | 15 | stop(_State) -> 16 | ok. 17 | -------------------------------------------------------------------------------- /src/statman_benchmark.erl: -------------------------------------------------------------------------------- 1 | -module(statman_benchmark). 2 | -compile([export_all]). 3 | 4 | -define(PARETO_SHAPE, 1.5). 5 | -define(MAX_VALUE, 1000). 6 | 7 | 8 | histogram_run(Writes) -> 9 | {InsertTime, _} = timer:tc(?MODULE, do_histogram_run, [Writes]), 10 | {StatTime, Stats} = timer:tc(statman_histogram, summary, [foo]), 11 | [{insert_time, InsertTime}, {start_time, StatTime}, {stats, Stats}]. 12 | 13 | do_histogram_run(0) -> 14 | ok; 15 | do_histogram_run(Writes) -> 16 | Value = pareto(trunc(?MAX_VALUE * 0.2), ?PARETO_SHAPE), 17 | statman_histogram:record_value(foo, Value), 18 | do_histogram_run(Writes - 1). 19 | 20 | pareto(Mean, Shape) -> 21 | S1 = (-1 / Shape), 22 | S2 = Mean * (Shape - 1), 23 | U = 1 - random:uniform(), 24 | trunc((math:pow(U, S1) - 1) * S2). 25 | -------------------------------------------------------------------------------- /src/statman_counter.erl: -------------------------------------------------------------------------------- 1 | -module(statman_counter). 2 | -export([init/0, counters/0, get/1, get_all/0, reset/2]). 3 | -export([incr/1, incr/2, decr/1, decr/2, set/2]). 4 | -compile([{no_auto_import, [get/1]}]). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | 8 | -define(TABLE, statman_counters). 9 | 10 | 11 | %% 12 | %% API 13 | %% 14 | 15 | init() -> 16 | ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]), 17 | ok. 18 | 19 | 20 | get(Key) -> 21 | case ets:match(?TABLE, {Key, '$1'}) of 22 | [[N]] when is_integer(N) -> 23 | N; 24 | [] -> 25 | error(badarg) 26 | end. 27 | 28 | get_all() -> 29 | ets:select(?TABLE, [{ {'$1', '$2'}, [], [{{'$1', '$2'}}]}]). 30 | 31 | 32 | incr(Key) -> incr(Key, 1). 33 | 34 | decr(Key) -> decr(Key, 1). 35 | decr(Key, Incr) -> incr(Key, -Incr). 36 | 37 | 38 | counters() -> 39 | ets:select(?TABLE, [{ {'$1', '$2'}, [], ['$1'] }]). 40 | 41 | reset(Key, Value) -> 42 | decr(Key, Value). 43 | 44 | %% 45 | %% INTERNAL HELPERS 46 | %% 47 | 48 | set(Key, Value) -> 49 | case catch ets:update_element(?TABLE, Key, Value) of 50 | {'EXIT', {badarg, _}} -> 51 | (catch ets:insert(?TABLE, {Key, Value})), 52 | ok; 53 | _ -> 54 | ok 55 | end. 56 | 57 | 58 | incr(Key, Incr) when is_integer(Incr) -> 59 | %% If lock contention on the single key becomes a problem, we can 60 | %% use multiple keys and try to snapshot a value across all 61 | %% subkeys. See 62 | %% https://github.com/boundary/high-scale-lib/blob/master/src/main/java/org/cliffc/high_scale_lib/ConcurrentAutoTable.java 63 | case catch ets:update_counter(?TABLE, Key, Incr) of 64 | {'EXIT', {badarg, _}} -> 65 | (catch ets:insert(?TABLE, {Key, Incr})), 66 | ok; 67 | _ -> 68 | ok 69 | end; 70 | 71 | incr(_Key, Float) when is_float(Float) -> 72 | error(badarg). 73 | 74 | 75 | 76 | 77 | 78 | %% 79 | %% TESTS 80 | %% 81 | 82 | counter_test_() -> 83 | {foreach, 84 | fun setup/0, fun teardown/1, 85 | [ 86 | ?_test(test_operations()), 87 | ?_test(find_counters()), 88 | {timeout, 100, ?_test(benchmark())}, 89 | ?_test(test_reset()), 90 | ?_test(floats()) 91 | ] 92 | }. 93 | 94 | setup() -> 95 | init(), 96 | [?TABLE]. 97 | 98 | teardown(Tables) -> 99 | lists:map(fun ets:delete/1, Tables). 100 | 101 | 102 | test_operations() -> 103 | ?assertError(badarg, get(key)), 104 | 105 | ?assertEqual(ok, incr(key)), 106 | ?assertEqual(1, get(key)), 107 | 108 | ?assertEqual(ok, decr(key)), 109 | ?assertEqual(0, get(key)), 110 | 111 | ?assertEqual(ok, decr(key)), 112 | ?assertEqual(-1, get(key)), 113 | 114 | ?assertEqual(ok, set(key, 5)), 115 | ?assertEqual(5, get(key)), 116 | 117 | ?assertEqual(ok, decr(key)), 118 | ?assertEqual(4, get(key)). 119 | 120 | 121 | find_counters() -> 122 | ?assertEqual([], counters()), 123 | ?assertEqual([], get_all()), 124 | 125 | ?assertEqual(ok, incr(foo)), 126 | ?assertEqual(ok, incr(bar)), 127 | ?assertEqual(lists:sort([bar, foo]), lists:sort(counters())), 128 | ?assertEqual(lists:sort([{bar, 1}, {foo, 1}]), lists:sort(get_all())). 129 | 130 | 131 | 132 | test_reset() -> 133 | ?assertEqual([], counters()), 134 | 135 | ok = incr(foo, 5), 136 | ?assertEqual(5, get(foo)), 137 | 138 | [{foo, Count}] = get_all(), 139 | incr(foo, 3), 140 | ?assertEqual(8, get(foo)), 141 | 142 | ok = reset(foo, Count), 143 | ?assertEqual(3, get(foo)). 144 | 145 | 146 | floats() -> 147 | ?assertError(badarg, get(foo)), 148 | ?assertError(badarg, incr(foo, 2.5)). 149 | 150 | 151 | 152 | benchmark() -> 153 | do_benchmark(4, 100000), 154 | do_benchmark(8, 100000), 155 | do_benchmark(32, 100000). 156 | 157 | do_benchmark(Processes, Writes) -> 158 | Start = now(), 159 | Parent = self(), 160 | Pids = [spawn(fun() -> 161 | benchmark_incrementer(foo, Writes), 162 | Parent ! {self(), done} 163 | end) || _ <- lists:seq(1, Processes)], 164 | receive_all(Pids, done), 165 | error_logger:info_msg("~p processes, ~p writes in ~p ms~n", 166 | [Processes, Writes, timer:now_diff(now(), Start) / 1000]), 167 | ok. 168 | 169 | receive_all([], _) -> 170 | ok; 171 | receive_all(Pids, Msg) -> 172 | receive 173 | {Pid, Msg} -> 174 | receive_all(lists:delete(Pid, Pids), Msg) 175 | end. 176 | 177 | benchmark_incrementer(_, 0) -> 178 | ok; 179 | benchmark_incrementer(Key, N) -> 180 | incr(Key), 181 | benchmark_incrementer(Key, N-1). 182 | -------------------------------------------------------------------------------- /src/statman_decorators.erl: -------------------------------------------------------------------------------- 1 | -module(statman_decorators). 2 | -include_lib("eunit/include/eunit.hrl"). 3 | -compile([{parse_transform, decorators}]). 4 | 5 | 6 | -export([runtime/3, reductions/3, memory/3, call_rate/3]). 7 | 8 | %% 9 | %% DECORATORS 10 | %% 11 | 12 | runtime(Fun, Args, Options) -> 13 | Key = proplists:get_value(key, Options, name(Fun)), 14 | Start = os:timestamp(), 15 | Result = erlang:apply(Fun, Args), 16 | statman_histogram:record_value(Key, Start), 17 | Result. 18 | 19 | reductions(Fun, Args, Options) -> 20 | process_info_decorator(Fun, Args, Options, reductions). 21 | 22 | memory(Fun, Args, Options) -> 23 | process_info_decorator(Fun, Args, Options, memory). 24 | 25 | call_rate(Fun, Args, Options) -> 26 | Key = proplists:get_value(key, Options, name(Fun)), 27 | statman_counter:incr(Key), 28 | apply(Fun, Args). 29 | 30 | 31 | %% 32 | %% INTERNAL 33 | %% 34 | 35 | process_info_decorator(Fun, Args, Options, InfoKey) -> 36 | Key = proplists:get_value(key, Options, name(Fun)), 37 | {InfoKey, Start} = process_info(self(), InfoKey), 38 | Result = erlang:apply(Fun, Args), 39 | {InfoKey, End} = process_info(self(), InfoKey), 40 | statman_histogram:record_value(Key, (End - Start)), 41 | Result. 42 | 43 | name(Fun) -> 44 | {name, Name} = erlang:fun_info(Fun, name), 45 | Name. 46 | 47 | 48 | 49 | -ifdef(TEST). 50 | -decorate({statman_decorators, runtime, [{key, runtime_key}]}). 51 | -decorate({statman_decorators, memory, [{key, memory_key}]}). 52 | -decorate({statman_decorators, reductions, [{key, reductions_key}]}). 53 | -decorate({statman_decorators, call_rate, [{key, rate_key}]}). 54 | decorated_function(A, B) -> 55 | A + B. 56 | 57 | -decorate({statman_decorators, call_rate}). 58 | no_key(A, B) -> 59 | A + B. 60 | 61 | decorators_test() -> 62 | ok = delete_tables(), %% remove leftovers from other tests 63 | ok = create_tables(), 64 | 3 = decorated_function(1, 2), 65 | ?assertEqual(1, statman_counter:get(rate_key)), 66 | ?assertEqual([memory_key, reductions_key, runtime_key], 67 | lists:sort(statman_histogram:keys())), 68 | ok = delete_tables(). 69 | 70 | no_key_test() -> 71 | ok = delete_tables(), 72 | ok = create_tables(), 73 | 3 = no_key(1, 2), 74 | ?assertEqual([{'-no_key_decorator1___/2-fun-0-',1}], 75 | statman_counter:get_all()), 76 | ok = delete_tables(). 77 | 78 | 79 | create_tables() -> 80 | ok = statman_histogram:init(), 81 | ok = statman_counter:init(). 82 | 83 | delete_tables() -> 84 | (catch ets:delete(statman_histograms)), 85 | (catch ets:delete(statman_counters)), 86 | ok. 87 | -endif. 88 | -------------------------------------------------------------------------------- /src/statman_gauge.erl: -------------------------------------------------------------------------------- 1 | -module(statman_gauge). 2 | -export([init/0, expire/0, get_all/0]). 3 | -export([set/2, incr/1, incr/2, decr/1, decr/2]). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | -define(TABLE, statman_gauges). 7 | 8 | init() -> 9 | ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]), 10 | ok. 11 | 12 | set(Key, Value) when is_integer(Value) orelse is_float(Value) -> 13 | set(Key, Value, now_to_seconds()). 14 | 15 | set(Key, Value, Timestamp) -> 16 | (catch ets:insert(?TABLE, {Key, Timestamp, Value})), 17 | ok. 18 | 19 | incr(Key) -> incr(Key, 1). 20 | 21 | decr(Key) -> incr(Key, -1). 22 | decr(Key, Decr) -> incr(Key, -Decr). 23 | 24 | 25 | incr(Key, Incr) -> 26 | case catch ets:update_counter(?TABLE, Key, {3, Incr}) of 27 | {'EXIT', {badarg, _}} -> 28 | set(Key, Incr), 29 | ok; 30 | _ -> 31 | ets:update_element(?TABLE, Key, {2, now_to_seconds()}), 32 | ok 33 | end. 34 | 35 | expire() -> 36 | expire(now_to_seconds() - 60). 37 | 38 | %% @doc: Deletes any gauges that has not been updated since the given 39 | %% threshold. 40 | expire(Threshold) -> 41 | ets:select_delete(?TABLE, [{{'_', '$1', '_'}, [{'<', '$1', Threshold}], [true]}]). 42 | 43 | get_all() -> 44 | ets:select(?TABLE, [{ {'$1', '_', '$2'}, [], [{{'$1', '$2'}}]}]). 45 | 46 | now_to_seconds() -> 47 | {MegaSeconds, Seconds, _} = os:timestamp(), 48 | MegaSeconds * 1000000 + Seconds. 49 | 50 | 51 | %% 52 | %% TESTS 53 | %% 54 | 55 | gauge_test_() -> 56 | {foreach, 57 | fun setup/0, fun teardown/1, 58 | [ 59 | ?_test(test_expire()), 60 | ?_test(test_expire_incr_decr()), 61 | ?_test(test_set_incr()) 62 | ] 63 | }. 64 | 65 | setup() -> 66 | init(), 67 | [?TABLE]. 68 | 69 | teardown(Tables) -> 70 | lists:map(fun ets:delete/1, Tables). 71 | 72 | 73 | test_expire() -> 74 | ?assertEqual([], get_all()), 75 | set(foo, 30, now_to_seconds() - 3), 76 | ?assertEqual([{foo, 30}], get_all()), 77 | ?assertEqual(0, expire(now_to_seconds() - 5)), 78 | ?assertEqual(1, expire(now_to_seconds() - 0)), 79 | ?assertEqual([], get_all()). 80 | 81 | test_expire_incr_decr() -> 82 | ?assertEqual([], get_all()), 83 | 84 | ok = set(problems, 100, now_to_seconds() - 3), 85 | ok = decr(problems), 86 | ?assertEqual([{problems, 99}], get_all()), 87 | ?assertEqual(0, expire(now_to_seconds()-1)), 88 | ?assertEqual([{problems, 99}], get_all()). 89 | 90 | 91 | test_set_incr() -> 92 | incr(foo, 2), 93 | ?assertEqual([{foo, 2}], get_all()), 94 | 95 | set(foo, 10), 96 | incr(foo), 97 | incr(foo), 98 | ?assertEqual([{foo, 12}], get_all()), 99 | 100 | decr(foo), 101 | ?assertEqual([{foo, 11}], get_all()). 102 | -------------------------------------------------------------------------------- /src/statman_histogram.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Histogram backed by ETS and ets:update_counter/3. 2 | %% 3 | %% Calculation of statistics is borrowed from basho_stats_histogram 4 | %% and basho_stats_sample. 5 | 6 | -module(statman_histogram). 7 | -export([init/0, 8 | record_value/2, 9 | run/2, 10 | run/3, 11 | run/4, 12 | clear/1, 13 | keys/0, 14 | get_data/1, 15 | summary/1, 16 | reset/2, 17 | gc/0]). 18 | 19 | -export([bin/1]). 20 | 21 | -compile([native]). 22 | 23 | -define(TABLE, statman_histograms). 24 | 25 | 26 | %% 27 | %% API 28 | %% 29 | 30 | init() -> 31 | ets:new(?TABLE, [named_table, public, set, {write_concurrency, true}]), 32 | ok. 33 | 34 | record_value(UserKey, {MegaSecs, Secs, MicroSecs}) when 35 | is_integer(MegaSecs) andalso MegaSecs >= 0 andalso 36 | is_integer(Secs) andalso Secs >=0 andalso 37 | is_integer(MicroSecs) andalso MicroSecs >= 0 -> 38 | record_value(UserKey, 39 | bin(timer:now_diff(now(), {MegaSecs, Secs, MicroSecs}))); 40 | 41 | record_value(UserKey, Value) when is_integer(Value) -> 42 | histogram_incr({UserKey, Value}, 1), 43 | ok. 44 | 45 | 46 | run(Key, F) -> 47 | Start = os:timestamp(), 48 | Result = F(), 49 | record_value(Key, Start), 50 | Result. 51 | 52 | run(Key, F, Args) -> 53 | Start = os:timestamp(), 54 | Result = erlang:apply(F, Args), 55 | record_value(Key, Start), 56 | Result. 57 | 58 | run(Key, M, F, Args) -> 59 | Start = os:timestamp(), 60 | Result = erlang:apply(M, F, Args), 61 | record_value(Key, Start), 62 | Result. 63 | 64 | 65 | keys() -> 66 | %% TODO: Maybe keep a special table of all used keys? 67 | lists:usort(ets:select(?TABLE, [{ { {'$1', '_'}, '_' }, [], ['$1'] }])). 68 | 69 | gc() -> 70 | ets:select_delete(?TABLE, [{ {{'_', '_'}, 0}, [], [true] }]). 71 | 72 | clear(UserKey) -> 73 | ets:select_delete(?TABLE, [{{{UserKey, '_'}, '_'}, [], [true] }]). 74 | 75 | 76 | %% @doc: Returns the raw histogram recorded by record_value/2, 77 | %% suitable for passing to summary/1 and reset/2 78 | get_data(UserKey) -> 79 | Query = [{{{UserKey, '$1'}, '$2'}, [{'>', '$2', 0}], [{{'$1', '$2'}}]}], 80 | lists:sort( 81 | ets:select(?TABLE, Query)). 82 | 83 | 84 | %% @doc: Returns summary statistics from the raw data 85 | summary([]) -> 86 | []; 87 | summary(Data) -> 88 | {N, Sum, Sum2, Max} = scan(Data), 89 | 90 | [{observations, N}, 91 | {min, find_quantile(Data, 0)}, 92 | {median, find_quantile(Data, 0.50 * N)}, 93 | {mean, Sum / N}, 94 | {max, Max}, 95 | {sd, sd(N, Sum, Sum2)}, 96 | {sum, Sum}, 97 | {sum2, Sum2}, 98 | {p25, find_quantile(Data, 0.25 * N)}, 99 | {p75, find_quantile(Data, 0.75 * N)}, 100 | {p95, find_quantile(Data, 0.95 * N)}, 101 | {p99, find_quantile(Data, 0.99 * N)}, 102 | {p999, find_quantile(Data, 0.999 * N)} 103 | ]. 104 | 105 | 106 | 107 | %% @doc: Decrements the frequency counters with the current values 108 | %% given, effectively resetting while keeping updates written during 109 | %% our stats calculations. 110 | reset(_UserKey, []) -> 111 | ok; 112 | reset(UserKey, [{Key, Value} | Data]) -> 113 | ets:update_counter(?TABLE, {UserKey, Key}, -Value), 114 | reset(UserKey, Data). 115 | 116 | 117 | %% 118 | %% INTERNAL HELPERS 119 | %% 120 | -spec bin(integer()) -> integer(). 121 | bin(0) -> 0; 122 | bin(N) -> 123 | Binner = 124 | if N < 10000 -> 1000; 125 | true -> 126 | %% keep 2 digits 127 | round(math:pow(10, trunc(math:log10(N)) - 1)) 128 | end, 129 | case (N div Binner) * Binner of 130 | 0 -> 131 | 1; 132 | Bin -> 133 | Bin 134 | end. 135 | 136 | scan(Data) -> 137 | scan(0, 0, 0, 0, Data). 138 | 139 | scan(N, Sum, Sum2, Max, []) -> 140 | {N, Sum, Sum2, Max}; 141 | scan(N, Sum, Sum2, Max, [{Value, Weight} | Rest]) -> 142 | V = Value * Weight, 143 | scan(N + Weight, 144 | Sum + V, 145 | Sum2 + ((Value * Value) * Weight), 146 | max(Max, Value), 147 | Rest). 148 | 149 | 150 | sd(N, _Sum, _Sum2) when N < 2 -> 151 | 'NaN'; 152 | sd(N, Sum, Sum2) -> 153 | SumSq = Sum * Sum, 154 | math:sqrt((Sum2 - (SumSq / N)) / (N - 1)). 155 | 156 | 157 | histogram_incr(Key, Incr) -> 158 | case catch ets:update_counter(?TABLE, Key, Incr) of 159 | {'EXIT', {badarg, _}} -> 160 | (catch ets:insert(?TABLE, {Key, Incr})), 161 | ok; 162 | _ -> 163 | ok 164 | end. 165 | 166 | find_quantile(Freqs, NeededSamples) -> 167 | find_quantile(Freqs, 0, NeededSamples). 168 | 169 | find_quantile([{Value, _Freq} | []], _Samples, _NeededSamples) -> 170 | Value; 171 | find_quantile([{Value, Freq} | Rest], Samples, NeededSamples) -> 172 | Samples2 = Samples + Freq, 173 | if 174 | Samples2 < NeededSamples -> 175 | find_quantile(Rest, Samples2, NeededSamples); 176 | true -> 177 | Value 178 | end. 179 | 180 | 181 | 182 | %% 183 | %% TESTS 184 | %% 185 | 186 | -ifdef(TEST). 187 | -include_lib("eunit/include/eunit.hrl"). 188 | 189 | histogram_test_() -> 190 | {foreach, 191 | fun setup/0, fun teardown/1, 192 | [ 193 | ?_test(test_stats()), 194 | ?_test(test_histogram()), 195 | ?_test(test_samples()), 196 | ?_test(test_reset()), 197 | ?_test(test_gc()), 198 | ?_test(test_keys()), 199 | ?_test(test_binning()), 200 | ?_test(test_run()) 201 | ] 202 | }. 203 | 204 | setup() -> 205 | init(), 206 | [?TABLE]. 207 | 208 | teardown(Tables) -> 209 | lists:map(fun ets:delete/1, Tables). 210 | 211 | test_stats() -> 212 | ExpectedStats = [{observations, 300}, 213 | {min, 1}, 214 | {median, 50}, 215 | {mean, 50.5}, 216 | {max, 100}, 217 | {sd, 28.914300774835606}, %% Checked with R 218 | {sum, 15150}, 219 | {sum2, 1015050}, 220 | {p25, 25}, 221 | {p75, 75}, 222 | {p95, 95}, 223 | {p99, 99}, 224 | {p999, 100}], 225 | ?assertEqual(ExpectedStats, summary([{N, 3} || N <- lists:seq(1, 100)])). 226 | 227 | test_histogram() -> 228 | [record_value(key, N) || N <- lists:seq(1, 100)], 229 | [record_value(key, N) || N <- lists:seq(1, 100)], 230 | [record_value(key, N) || N <- lists:seq(1, 100)], 231 | 232 | ExpectedStats = [{observations, 300}, 233 | {min, 1}, 234 | {median, 50}, 235 | {mean, 50.5}, 236 | {max, 100}, 237 | {sd, 28.914300774835606}, %% Checked with R 238 | {sum, 15150}, 239 | {sum2, 1015050}, 240 | {p25, 25}, 241 | {p75, 75}, 242 | {p95, 95}, 243 | {p99, 99}, 244 | {p999, 100}], 245 | 246 | ?assertEqual(ExpectedStats, summary(get_data(key))), 247 | 248 | ?assertEqual(100, clear(key)), 249 | ?assertEqual([], summary(get_data(key))), 250 | 251 | [record_value(key, N) || N <- lists:seq(1, 100)], 252 | [record_value(key, N) || N <- lists:seq(1, 100)], 253 | [record_value(key, N) || N <- lists:seq(1, 100)], 254 | ?assertEqual(ExpectedStats, summary(get_data(key))). 255 | 256 | test_gc() -> 257 | [record_value(key, N) || N <- lists:seq(1, 100)], 258 | ?assertEqual(100, proplists:get_value(observations, summary(get_data(key)))), 259 | 260 | ?assertEqual([{{key, 5}, 1}], ets:lookup(?TABLE, {key, 5})), 261 | ?assertEqual(0, gc()), 262 | 263 | record_value(other_key, 42), 264 | 265 | reset(key, get_data(key)), 266 | ?assertEqual(100, gc()), 267 | ?assertEqual(0, gc()), 268 | 269 | ?assertEqual([], get_data(key)), 270 | ?assertEqual([{42, 1}], get_data(other_key)), 271 | ok. 272 | 273 | test_reset() -> 274 | [record_value(key, N) || N <- lists:seq(1, 100)], 275 | Sum = fun () -> 276 | lists:sum( 277 | ets:select(?TABLE, [{{{key, '_'}, '$1'}, [], ['$1']}])) 278 | end, 279 | ?assertEqual(100, Sum()), 280 | reset(key, get_data(key)), 281 | ?assertEqual(0, Sum()). 282 | 283 | 284 | test_samples() -> 285 | %% In R: sd(1:100) = 29.01149 286 | [record_value(key, N) || N <- lists:seq(1, 100)], 287 | ?assertEqual(29.011491975882016, 288 | proplists:get_value(sd, summary(get_data(key)))), 289 | 290 | %% ?assertEqual(103, clear(key)), 291 | %% ?assertEqual('NaN', sd(key)). 292 | ok. 293 | 294 | 295 | test_keys() -> 296 | ?assertEqual([], keys()), 297 | 298 | record_value(foo, 1), 299 | record_value(bar, 1), 300 | record_value(baz, 1), 301 | 302 | ?assertEqual([bar, baz, foo], keys()). 303 | 304 | 305 | test_binning() -> 306 | random:seed({1, 2, 3}), 307 | Values = [random:uniform(1000000) || _ <- lists:seq(1, 1000)], 308 | 309 | [record_value(foo, V) || V <- Values], 310 | _NormalSummary = summary(get_data(foo)), 311 | reset(foo, get_data(foo)), 312 | 313 | [record_value(foo, bin(V)) || V <- Values], 314 | _BinnedSummary = summary(get_data(foo)), 315 | 316 | ok. 317 | 318 | 319 | bin_test() -> 320 | ?assertEqual(0, bin(0)), 321 | ?assertEqual(1, bin(1)), 322 | ?assertEqual(1, bin(999)), 323 | ?assertEqual(1000, bin(1000)), 324 | ?assertEqual(1000, bin(1001)), 325 | ?assertEqual(2000, bin(2000)), 326 | ?assertEqual(1000, bin(1010)), 327 | ?assertEqual(1000, bin(1100)), 328 | ?assertEqual(10000, bin(10001)), 329 | ?assertEqual(10000, bin(10010)), 330 | ?assertEqual(10000, bin(10010)), 331 | ?assertEqual(10000, bin(10100)), 332 | ?assertEqual(11000, bin(11000)), 333 | ?assertEqual(12000000, bin(12345678)), 334 | ?assertEqual(120000000, bin(123456789)). 335 | 336 | 337 | test_run() -> 338 | ?assertEqual([], keys()), 339 | 2 = run(foo, fun () -> 1 + 1 end), 340 | ?assertEqual([foo], keys()), 341 | 342 | 2 = run(bar, fun (A, B) -> A + B end, [1, 1]), 343 | ?assertEqual([bar, foo], keys()). 344 | 345 | -endif. %% TEST 346 | -------------------------------------------------------------------------------- /src/statman_merger.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Merges multiple streams 2 | %% 3 | %% statman_merger merges the raw data pushed from statman_server into 4 | %% an aggregated view per metric. 5 | -module(statman_merger). 6 | -behaviour(gen_server). 7 | -include_lib("eunit/include/eunit.hrl"). 8 | 9 | -export([start_link/0, add_subscriber/1, remove_subscriber/1, merge/1]). 10 | 11 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 12 | terminate/2, code_change/3]). 13 | 14 | -record(state, {subscribers = [], 15 | metrics = orddict:new() 16 | }). 17 | 18 | %%%=================================================================== 19 | %%% API 20 | %%%=================================================================== 21 | 22 | start_link() -> 23 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 24 | 25 | add_subscriber(Ref) -> 26 | gen_server:call(?MODULE, {add_subscriber, Ref}). 27 | 28 | remove_subscriber(Ref) -> 29 | gen_server:call(?MODULE, {remove_subscriber, Ref}). 30 | 31 | 32 | %%%=================================================================== 33 | %%% gen_server callbacks 34 | %%%=================================================================== 35 | 36 | init([]) -> 37 | erlang:send_after(1000, self(), report), 38 | {ok, #state{subscribers = [], metrics = orddict:new()}}. 39 | 40 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 41 | {reply, ok, State#state{subscribers = [Ref | Sub]}}; 42 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 43 | {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}}. 44 | 45 | 46 | handle_cast({statman_update, Updates}, #state{metrics = Metrics} = State) -> 47 | NewMetrics = lists:foldl(fun (Update, Acc) -> 48 | Key = {proplists:get_value(node, Update), 49 | proplists:get_value(key, Update)}, 50 | orddict:store(Key, Update, Acc) 51 | end, Metrics, Updates), 52 | 53 | {noreply, State#state{metrics = NewMetrics}}. 54 | 55 | handle_info(report, State) -> 56 | erlang:send_after(1000, self(), report), 57 | Merged = merge(State#state.metrics), 58 | 59 | KeyedMetrics = Merged ++ orddict:to_list(State#state.metrics), 60 | {_, Metrics} = lists:unzip(KeyedMetrics), 61 | 62 | lists:foreach(fun (S) -> 63 | gen_server:cast(S, {statman_update, Metrics}) 64 | end, State#state.subscribers), 65 | 66 | {noreply, State}. 67 | 68 | terminate(_Reason, _State) -> 69 | ok. 70 | 71 | code_change(_OldVsn, State, _Extra) -> 72 | {ok, State}. 73 | 74 | %%%=================================================================== 75 | %%% Internal functions 76 | %%%=================================================================== 77 | 78 | merge(Metrics) -> 79 | %% Find metrics with the same key 80 | %% Merge values if the type allows it 81 | %% Change node atom to node list 82 | 83 | orddict:fold( 84 | fun (_Key, Metric, Acc) -> 85 | case type(Metric) =:= histogram of 86 | true -> 87 | case orddict:find(key(Metric), Acc) of 88 | {ok, OtherMetric} -> 89 | orddict:store(key(Metric), 90 | do_merge(Metric, OtherMetric), 91 | Acc); 92 | error -> 93 | orddict:store(key(Metric), 94 | Metric, 95 | Acc) 96 | end; 97 | false -> 98 | Acc 99 | end 100 | end, orddict:new(), Metrics). 101 | 102 | 103 | type(Metric) -> proplists:get_value(type, Metric). 104 | key(Metric) -> proplists:get_value(key, Metric). 105 | 106 | do_merge(Left, Right) -> 107 | MergeHistogramF = fun (_Key, ValueLeft, ValueRight) -> 108 | ValueLeft + ValueRight 109 | end, 110 | 111 | orddict:merge( 112 | fun (node, A, Nodes) when is_list(Nodes) -> 113 | [A | Nodes]; 114 | (node, A, B) -> 115 | [A, B]; 116 | (value, A, B) -> 117 | orddict:merge(MergeHistogramF, A, B); 118 | (_Other, A, _) -> 119 | A 120 | end, 121 | Left, Right). 122 | 123 | 124 | %% 125 | %% TESTS 126 | %% 127 | 128 | example_nodedata(Node) -> 129 | [[{key,{db,hits}}, 130 | {node,Node}, 131 | {type,counter}, 132 | {value,6240}, 133 | {window,1000}], 134 | [{key,{<<"/highscores">>,db_a_latency}}, 135 | {node,Node}, 136 | {type,histogram}, 137 | {value,[{2,3}, 138 | {3,4}, 139 | {4,1}, 140 | {5,1}]}], 141 | 142 | [{key,{<<"/highscores">>,db_b_latency}}, 143 | {node,Node}, 144 | {type,histogram}, 145 | {value,[{2,3}, 146 | {3,4}, 147 | {4,1}, 148 | {5,1}]}], 149 | 150 | [{key,{db,connections}}, 151 | {node,Node}, 152 | {type,gauge}, 153 | {value,7}, 154 | {window,1000}] 155 | ]. 156 | 157 | 158 | %% merge_test() -> 159 | %% ?assertEqual([{histograms, [{{foo, bar}, 160 | %% [{1,2}, {2,2}, {3,2}]}]}, 161 | %% {nodes, [node2, node1]}], 162 | %% merge(orddict:from_list( 163 | %% [{node1, example_nodedata(node1)}, 164 | %% {node2, example_nodedata(node2)}]))). 165 | 166 | 167 | report_test() -> 168 | {ok, Init} = init([]), 169 | {noreply, S1} = handle_cast({statman_update, example_nodedata(foo)}, Init), 170 | {noreply, S2} = handle_cast({statman_update, example_nodedata(bar)}, S1), 171 | {noreply, S3} = handle_cast({statman_update, example_nodedata(quux)}, S2), 172 | 173 | 174 | ?assertEqual([{{<<"/highscores">>,db_a_latency}, 175 | [{key,{<<"/highscores">>,db_a_latency}}, 176 | {node,[quux,foo,bar]}, 177 | {type,histogram}, 178 | {value,[{2,9},{3,12},{4,3},{5,3}]}]}, 179 | {{<<"/highscores">>,db_b_latency}, 180 | [{key,{<<"/highscores">>,db_b_latency}}, 181 | {node,[quux,foo,bar]}, 182 | {type,histogram}, 183 | {value,[{2,9},{3,12},{4,3},{5,3}]}]}], merge(S3#state.metrics)). 184 | 185 | -------------------------------------------------------------------------------- /src/statman_poller.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Poller backwards compatibilty API helper 2 | -module(statman_poller). 3 | 4 | %% API 5 | -export([add_gauge/1, add_gauge/2, 6 | add_counter/1, add_counter/2, 7 | add_histogram/1, add_histogram/2 8 | ]). 9 | -export([remove_gauge/1, remove_counter/1, remove_histogram/1]). 10 | 11 | 12 | %%%=================================================================== 13 | %%% API 14 | %%%=================================================================== 15 | 16 | -spec add_gauge(fun()) -> ok. 17 | add_gauge(F) -> add_worker({gauge, F}, 10000). 18 | 19 | -spec add_gauge(fun(), pos_integer()) -> ok. 20 | add_gauge(F, Interval) -> add_worker({gauge, F}, Interval). 21 | 22 | -spec add_counter(fun()) -> ok. 23 | add_counter(F) -> add_worker({counter, F}, 10000). 24 | 25 | -spec add_counter(fun(), pos_integer()) -> ok. 26 | add_counter(F, Interval) -> add_worker({counter, F}, Interval). 27 | 28 | -spec add_histogram(fun()) -> ok. 29 | add_histogram(F) -> add_worker({histogram, F}, 10000). 30 | 31 | -spec add_histogram(fun(), pos_integer()) -> ok. 32 | add_histogram(F, Interval) -> add_worker({histogram, F}, Interval). 33 | 34 | -spec remove_gauge(fun()) -> ok. 35 | remove_gauge(F) -> remove_worker({gauge, F}). 36 | 37 | -spec remove_counter(fun()) -> ok. 38 | remove_counter(F) -> remove_worker({counter, F}). 39 | 40 | -spec remove_histogram(fun()) -> ok. 41 | remove_histogram(F) -> remove_worker({histogram, F}). 42 | 43 | 44 | %%%=================================================================== 45 | %%% Internal functionality 46 | %%%=================================================================== 47 | 48 | add_worker(TypedF, Interval) -> 49 | {ok, _Pid} = statman_poller_sup:add_worker(TypedF, Interval), 50 | ok. 51 | 52 | remove_worker(TypedF) -> 53 | statman_poller_sup:remove_worker(TypedF). 54 | -------------------------------------------------------------------------------- /src/statman_poller_sup.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Poller supervisor provides API for starting poller 2 | -module(statman_poller_sup). 3 | -behaviour(supervisor). 4 | 5 | %% API 6 | -export([start_link/0]). 7 | -export([init/1]). 8 | -export([add_gauge/1, add_gauge/2, 9 | add_counter/1, add_counter/2, 10 | add_histogram/1, add_histogram/2 11 | ]). 12 | -export([get_workers/0]). 13 | -export([remove_gauge/1, remove_counter/1, remove_histogram/1]). 14 | -export([add_worker/2, remove_worker/1]). 15 | 16 | %% Types 17 | -type types() :: gauge | counter | histogram. 18 | -type typed_fun() :: {types(), fun()}. 19 | -export_type([typed_fun/0, types/0]). 20 | 21 | 22 | %%%=================================================================== 23 | %%% API 24 | %%%=================================================================== 25 | 26 | -spec add_gauge(fun()) -> {ok, pid()}. 27 | add_gauge(F) -> add_worker({gauge, F}, 10000). 28 | 29 | -spec add_gauge(fun(), pos_integer()) -> {ok, pid()}. 30 | add_gauge(F, Interval) -> add_worker({gauge, F}, Interval). 31 | 32 | -spec add_counter(fun()) -> {ok, pid()}. 33 | add_counter(F) -> add_worker({counter, F}, 10000). 34 | 35 | -spec add_counter(fun(), pos_integer()) -> {ok, pid()}. 36 | add_counter(F, Interval) -> add_worker({counter, F}, Interval). 37 | 38 | -spec add_histogram(fun()) -> {ok, pid()}. 39 | add_histogram(F) -> add_worker({histogram, F}, 10000). 40 | 41 | -spec add_histogram(fun(), pos_integer()) -> {ok, pid()}. 42 | add_histogram(F, Interval) -> add_worker({histogram, F}, Interval). 43 | 44 | -spec remove_gauge(fun()) -> ok. 45 | remove_gauge(F) -> remove_worker({gauge, F}). 46 | 47 | -spec remove_counter(fun()) -> ok. 48 | remove_counter(F) -> remove_worker({counter, F}). 49 | 50 | -spec remove_histogram(fun()) -> ok. 51 | remove_histogram(F) -> remove_worker({histogram, F}). 52 | 53 | -spec get_workers() -> list(). 54 | get_workers() -> 55 | supervisor:which_children(?MODULE). 56 | 57 | -spec start_link() -> ignore | {error, any()} | {ok, pid()}. 58 | start_link() -> 59 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 60 | 61 | -spec add_worker(typed_fun(), pos_integer()) -> {ok, pid()}. 62 | add_worker(TypedF, Interval) -> 63 | Id = get_unique_id(TypedF), 64 | ChildSpec = get_worker_spec(Id, TypedF, Interval), 65 | case supervisor:start_child(?MODULE, ChildSpec) of 66 | {error, {already_started, Pid}} -> 67 | {ok, Pid}; 68 | {error, Reason} -> 69 | throw({unable_to_start_worker, Id, Reason}); 70 | {ok, Pid} -> 71 | {ok, Pid} 72 | end. 73 | 74 | -spec remove_worker(typed_fun()) -> ok. 75 | remove_worker(TypedF) -> 76 | Name = get_worker_name(get_unique_id(TypedF)), 77 | ok = supervisor:terminate_child(?MODULE, Name), 78 | ok = supervisor:delete_child(?MODULE, Name), 79 | ok. 80 | 81 | %%%=================================================================== 82 | %%% Supervisor callbacks 83 | %%%=================================================================== 84 | 85 | init([]) -> 86 | {ok, {{one_for_one, 5, 10}, []}}. 87 | 88 | 89 | %%%=================================================================== 90 | %%% Internal functionality 91 | %%%=================================================================== 92 | 93 | get_worker_spec(Id, TypedF, Interval) -> 94 | Name = get_worker_name(Id), 95 | {Name, 96 | {statman_poller_worker, start_link, [Name, TypedF, Interval]}, 97 | transient, 5000, worker, [statman_poller_worker] 98 | }. 99 | 100 | get_worker_name(Id) -> 101 | list_to_atom( 102 | atom_to_list(statman_poller_worker) ++ "_" ++ integer_to_list(Id) 103 | ). 104 | 105 | get_unique_id(TypedF) -> 106 | erlang:phash2(TypedF). 107 | -------------------------------------------------------------------------------- /src/statman_poller_worker.erl: -------------------------------------------------------------------------------- 1 | -module(statman_poller_worker). 2 | -behaviour(gen_server). 3 | 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | %% API 7 | -export([start_link/3]). 8 | 9 | %% gen_server callbacks 10 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 11 | terminate/2, code_change/3]). 12 | 13 | 14 | -record(state, {typed_fun :: tuple(), 15 | fun_state :: any(), 16 | timer_ref :: term(), 17 | interval :: integer() 18 | }). 19 | 20 | %%%=================================================================== 21 | %%% API 22 | %%%=================================================================== 23 | 24 | -spec start_link(atom(), statman_poller_sup:typed_fun(), pos_integer()) 25 | -> ignore | {error, any()} | {ok, pid()}. 26 | start_link(Name, TypedF, Interval) -> 27 | gen_server:start_link({local, Name}, ?MODULE, [TypedF, Interval], []). 28 | 29 | 30 | %%%=================================================================== 31 | %%% gen_server callbacks 32 | %%%=================================================================== 33 | 34 | init([TypedF, Interval]) -> 35 | {ok, #state{typed_fun = TypedF, 36 | timer_ref = start_timer(Interval), 37 | interval = Interval, 38 | fun_state = undefined}}. 39 | 40 | handle_call(_Msg, _From, State) -> 41 | {reply, unknown_call, State}. 42 | 43 | handle_cast(_Msg, State) -> 44 | {noreply, State}. 45 | 46 | %%TODO: do we need to spawn here? 47 | handle_info(poll, #state{typed_fun = {Type, F}, fun_state = FunState} = State) -> 48 | NewTimer = start_timer(State#state.interval), 49 | 50 | {NewFunState, Updates} = case erlang:fun_info(F, arity) of 51 | {arity, 0} -> {FunState, F()}; 52 | {arity, 1} -> F(FunState) 53 | end, 54 | case Type of 55 | gauge -> 56 | [statman_gauge:set(K, V) || {K, V} <- Updates]; 57 | counter -> 58 | [statman_counter:incr(K, V) || {K, V} <- Updates]; 59 | histogram -> 60 | [statman_histogram:record_value(K, statman_histogram:bin(V)) 61 | || {K, V} <- Updates] 62 | end, 63 | 64 | {noreply, State#state{fun_state = NewFunState, timer_ref = NewTimer}}; 65 | 66 | handle_info(_, State) -> 67 | %% Ignore unknown messages, might come from gen calls that timed 68 | %% out, but response got sent anyway.. 69 | {noreply, State}. 70 | 71 | terminate(_Reason, #state{timer_ref = undefined}) -> 72 | ok; 73 | terminate(_Reason, #state{timer_ref = TRef}) -> 74 | erlang:cancel_timer(TRef), 75 | ok. 76 | 77 | code_change(_OldVsn, State, _Extra) -> 78 | {ok, State}. 79 | 80 | 81 | %%%=================================================================== 82 | %%% Internal functionality 83 | %%%=================================================================== 84 | 85 | start_timer(Interval) -> 86 | erlang:send_after(Interval, self(), poll). 87 | -------------------------------------------------------------------------------- /src/statman_server.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Statman server, sends reports and owns the ETS tables 2 | %% 3 | %% Every second this gen_server sends a summary and the raw data of 4 | %% all available statistics to the installed subscribers which can 5 | %% further aggregate, summarize or publish the statistics. 6 | %% 7 | -module(statman_server). 8 | -behaviour(gen_server). 9 | 10 | -export([start_link/1, start_link/2, start_link/3, 11 | add_subscriber/1, remove_subscriber/1, report/0]). 12 | 13 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 14 | terminate/2, code_change/3]). 15 | 16 | -record(state, {counters, subscribers = [], report_interval}). 17 | -define(COUNTERS_TABLE, statman_server_counters). 18 | 19 | %%%=================================================================== 20 | %%% API 21 | %%%=================================================================== 22 | 23 | start_link(ReportInterval) -> 24 | start_link(ReportInterval, []). 25 | 26 | start_link(ReportInterval, StartSubscribers) -> 27 | start_link(ReportInterval, StartSubscribers, infinity). 28 | 29 | start_link(ReportInterval, StartSubscribers, GcInterval) -> 30 | gen_server:start_link({local, ?MODULE}, ?MODULE, 31 | [ReportInterval, StartSubscribers, GcInterval], []). 32 | 33 | add_subscriber(Ref) -> 34 | gen_server:call(?MODULE, {add_subscriber, Ref}). 35 | 36 | remove_subscriber(Ref) -> 37 | gen_server:call(?MODULE, {remove_subscriber, Ref}). 38 | 39 | report() -> 40 | ?MODULE ! report. 41 | 42 | %%%=================================================================== 43 | %%% gen_server callbacks 44 | %%%=================================================================== 45 | 46 | init([ReportInterval, StartSubscribers, GcInterval]) -> 47 | ok = statman_counter:init(), 48 | ok = statman_gauge:init(), 49 | ok = statman_histogram:init(), 50 | 51 | erlang:send_after(ReportInterval, self(), report), 52 | case GcInterval of 53 | infinity -> ok; 54 | N when is_integer(N) -> 55 | erlang:send_after(GcInterval, self(), {gc, GcInterval}) 56 | end, 57 | 58 | {ok, #state{counters = dict:new(), 59 | subscribers = StartSubscribers, 60 | report_interval = ReportInterval}}. 61 | 62 | handle_call({add_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 63 | {reply, ok, State#state{subscribers = [Ref | Sub]}}; 64 | handle_call({remove_subscriber, Ref}, _From, #state{subscribers = Sub} = State) -> 65 | {reply, ok, State#state{subscribers = lists:delete(Ref, Sub)}}. 66 | 67 | 68 | handle_cast(_Msg, State) -> 69 | {noreply, State}. 70 | 71 | handle_info(report, #state{report_interval = Window} = State) -> 72 | erlang:send_after(State#state.report_interval, self(), report), 73 | 74 | Stats = counters(Window) ++ histograms(Window) ++ gauges(Window), 75 | lists:foreach(fun (S) -> 76 | gen_server:cast(S, {statman_update, Stats}) 77 | end, State#state.subscribers), 78 | 79 | {noreply, State}; 80 | 81 | handle_info({gc, GcInterval} = GcMsg, State) -> 82 | erlang:send_after(GcInterval, self(), GcMsg), 83 | 84 | _NumGCed = statman_histogram:gc(), 85 | 86 | {noreply, State}. 87 | 88 | terminate(_Reason, _State) -> 89 | ok. 90 | 91 | code_change(_OldVsn, State, _Extra) -> 92 | {ok, State}. 93 | 94 | %%%=================================================================== 95 | %%% Internal functions 96 | %%%=================================================================== 97 | 98 | counters(Window) -> 99 | lists:map(fun ({K, V}) -> 100 | statman_counter:reset(K, V), 101 | [{key, K}, {node, node()}, {type, counter}, 102 | {value, V}, {window, Window}] 103 | end, statman_counter:get_all()). 104 | 105 | histograms(Window) -> 106 | lists:map(fun (Key) -> 107 | Data = statman_histogram:get_data(Key), 108 | statman_histogram:reset(Key, Data), 109 | [{key, Key}, {node, node()}, {type, histogram}, 110 | {value, Data}, {window, Window}] 111 | end, statman_histogram:keys()). 112 | 113 | gauges(Window) -> 114 | statman_gauge:expire(), 115 | lists:map(fun ({Key, Value}) -> 116 | [{key, Key}, {node, node()}, {type, gauge}, 117 | {value, Value}, {window, Window}] 118 | end, statman_gauge:get_all()). 119 | 120 | -------------------------------------------------------------------------------- /src/statman_sup.erl: -------------------------------------------------------------------------------- 1 | -module(statman_sup). 2 | -behaviour(supervisor). 3 | 4 | %% API 5 | -export([start_link/1]). 6 | -export([init/1]). 7 | 8 | -define(CHILD(I, Type, Args), 9 | {I, {I, start_link, Args}, permanent, 5000, Type, [I]}). 10 | 11 | %%%=================================================================== 12 | %%% API 13 | %%%=================================================================== 14 | 15 | start_link([]) -> 16 | supervisor:start_link({local, ?MODULE}, ?MODULE, [1000]); 17 | start_link([ReportInterval]) -> 18 | supervisor:start_link({local, ?MODULE}, ?MODULE, [ReportInterval]). 19 | 20 | 21 | %%%=================================================================== 22 | %%% Supervisor callbacks 23 | %%%=================================================================== 24 | 25 | init([ReportInterval]) -> 26 | Children = [ 27 | ?CHILD(statman_server, worker, [ReportInterval]), 28 | ?CHILD(statman_poller_sup, supervisor, []) 29 | ], 30 | {ok, {{one_for_one, 5, 10}, Children}}. 31 | -------------------------------------------------------------------------------- /src/statman_vm_metrics.erl: -------------------------------------------------------------------------------- 1 | %% @doc: Collection of functions for sending statistics from the 2 | 3 | -module(statman_vm_metrics). 4 | -compile([export_all]). 5 | 6 | 7 | get_counters() -> 8 | {{input, InputBytes}, {output, OutputBytes}} = erlang:statistics(io), 9 | [{{vm, io_in_bytes}, InputBytes}, {{vm, io_out_bytes}, OutputBytes}]. 10 | 11 | 12 | get_gauges() -> 13 | Memory = lists:map(fun ({K, V}) -> 14 | {{vm_memory, K}, V} 15 | end, erlang:memory()), 16 | 17 | RunQueue = {{vm, run_queue}, erlang:statistics(run_queue)}, 18 | ProcessCount = {{vm, process_count}, erlang:system_info(process_count)}, 19 | 20 | [RunQueue, ProcessCount] ++ Memory ++ message_stats() ++ ets_stats(). 21 | 22 | 23 | message_stats() -> 24 | ProcessInfo = lists:flatmap( 25 | fun (Pid) -> 26 | case process_info(Pid, message_queue_len) of 27 | undefined -> 28 | []; 29 | {message_queue_len, 0} -> 30 | []; 31 | {message_queue_len, Count} -> 32 | [{Count, Pid}] 33 | end 34 | end, processes()), 35 | TotalQueue = lists:sum(element(1, lists:unzip(ProcessInfo))), 36 | 37 | [{{vm, processes_with_queues}, length(ProcessInfo)}, 38 | {{vm, messages_in_queue}, TotalQueue}]. 39 | 40 | ets_stats() -> 41 | TotalSize = lists:sum( 42 | lists:map(fun (T) -> 43 | case ets:info(T, size) of 44 | N when is_integer(N) -> 45 | N; 46 | undefined -> 47 | 0 48 | end 49 | end, ets:all())), 50 | [{{vm_ets, objects}, TotalSize}]. 51 | 52 | 53 | gc(undefined) -> 54 | {NumGCs, _, _} = erlang:statistics(garbage_collection), 55 | {NumGCs, []}; 56 | gc(PrevNumGCs) -> 57 | {NumGCs, _, 0} = erlang:statistics(garbage_collection), 58 | {NumGCs, [{{vm, gcs}, NumGCs - PrevNumGCs}]}. 59 | -------------------------------------------------------------------------------- /test/statman_tests.erl: -------------------------------------------------------------------------------- 1 | -module(statman_tests). 2 | 3 | -compile(export_all). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | %% ============================================================================= 8 | statman_test_() -> 9 | {foreach, 10 | fun setup/0, fun teardown/1, 11 | [ 12 | {timeout, 200, {"Add/remove pollers", fun test_start_remove_pollers/0}}, 13 | {timeout, 200, {"Stateful pollers", fun test_stateful_pollers/0}} 14 | ] 15 | }. 16 | 17 | %% ============================================================================= 18 | setup() -> 19 | {ok, Pid} = statman_poller_sup:start_link(), 20 | 21 | statman_counter:init(), 22 | statman_gauge:init(), 23 | statman_histogram:init(), 24 | Pid. 25 | 26 | teardown(Pid) -> 27 | [ets:delete(T) || T <- [statman_counters, statman_gauges, statman_histograms]], 28 | 29 | process_flag(trap_exit, true), 30 | exit(Pid, kill), 31 | receive {'EXIT', Pid, killed} -> ok end, 32 | ok. 33 | 34 | test_start_remove_pollers() -> 35 | GaugeF = fun() -> [{gauge, 5}] end, 36 | CounterF = fun() -> [{counter, 5}] end, 37 | HistogramF = fun() -> [{histogram, 5}, {histogram, 10}] end, 38 | 39 | ?assertEqual([], statman_gauge:get_all()), 40 | ?assertEqual([], statman_counter:get_all()), 41 | ?assertEqual([], statman_histogram:keys()), 42 | 43 | {ok, GaugePid} = statman_poller_sup:add_gauge(GaugeF, 100), 44 | {ok, CounterPid} = statman_poller_sup:add_counter(CounterF, 100), 45 | {ok, HistogramPid} = statman_poller_sup:add_histogram(HistogramF, 100), 46 | 47 | ?assertEqual({ok, GaugePid}, statman_poller_sup:add_gauge(GaugeF, 100)), 48 | ?assertEqual({ok, CounterPid}, statman_poller_sup:add_counter(CounterF, 100)), 49 | ?assertEqual({ok, HistogramPid}, statman_poller_sup:add_histogram(HistogramF, 100)), 50 | 51 | timer:sleep(250), 52 | 53 | ?assertMatch([{gauge, _}], statman_gauge:get_all()), 54 | ?assertEqual([counter], statman_counter:counters()), 55 | ?assertEqual([histogram], statman_histogram:keys()), 56 | 57 | ok = statman_poller_sup:remove_gauge(GaugeF), 58 | ok = statman_poller_sup:remove_counter(CounterF), 59 | ok = statman_poller_sup:remove_histogram(HistogramF), 60 | 61 | ?assertEqual([], statman_poller_sup:get_workers()). 62 | 63 | test_stateful_pollers() -> 64 | ?assertEqual([], statman_counter:get_all()), 65 | 66 | {ok, _} = statman_poller_sup:add_counter(fun statman_vm_metrics:gc/1, 100), 67 | timer:sleep(250), 68 | ?assertEqual([{vm, gcs}], statman_counter:counters()). 69 | 70 | periodic_gc_test() -> 71 | GcInterval = 100, 72 | {ok, State} = statman_server:init([60000, [], GcInterval]), 73 | receive 74 | {gc, GcInterval} -> ok 75 | end, 76 | statman_histogram:record_value(test, os:timestamp()), 77 | statman_histogram:reset(test, statman_histogram:get_data(test)), 78 | {noreply, State} = statman_server:handle_info({gc, GcInterval}, State), 79 | ?assertEqual(0, ets:info(statman_histograms, size)), 80 | ok. 81 | --------------------------------------------------------------------------------