├── USE_REBAR_LOCKED ├── .gitignore ├── rel ├── vars.config ├── files │ ├── vm.args │ ├── sys.config │ ├── erl │ ├── nodetool │ └── estatsd └── reltool.config ├── src ├── estatsd.hrl ├── estatsd.app.src ├── estatsd_app.erl ├── estatsd_sup.erl ├── estatsd.erl ├── estatsd_shp.erl ├── estatsd_udp.erl └── estatsd_server.erl ├── priv └── bb │ ├── estatsd.config │ ├── opscode_estatsd_driver.erl │ ├── README.md │ └── opscode_stats_gen.erl ├── rebar.config ├── rebar.config.lock ├── LICENSE.txt ├── README.txt ├── Makefile └── test ├── estatsd_server_tests.erl ├── capture_tcp.erl └── estatsd_shp_tests.erl /USE_REBAR_LOCKED: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ebin/* 2 | *.beam 3 | deps/* 4 | rel/estatsd 5 | .eunit 6 | 7 | 8 | -------------------------------------------------------------------------------- /rel/vars.config: -------------------------------------------------------------------------------- 1 | {udp_port, 3344}. 2 | {udp_recbuf, 524288}. 3 | {udp_max_batch_size, 100}. 4 | {udp_max_batch_age, 2000}. 5 | {graphite_host, "127.0.0.1"}. 6 | {graphite_port, 2003}. 7 | {flush_interval, 10000}. 8 | -------------------------------------------------------------------------------- /src/estatsd.hrl: -------------------------------------------------------------------------------- 1 | -type shp_metric_type() :: 'm' | 'mr' | 'g' | 'h'. 2 | 3 | -record(shp_metric, {key :: binary(), 4 | value :: integer(), 5 | type :: shp_metric_type(), 6 | sample_rate :: float() | undefined}). 7 | -------------------------------------------------------------------------------- /src/estatsd.app.src: -------------------------------------------------------------------------------- 1 | {application, estatsd, 2 | [ 3 | {description, "Stats aggregation service that writes to graphite"}, 4 | {vsn, git}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib, 9 | crypto, 10 | inets 11 | ]}, 12 | {mod, {estatsd_app, []}}, 13 | {env, []} 14 | ]}. 15 | -------------------------------------------------------------------------------- /priv/bb/estatsd.config: -------------------------------------------------------------------------------- 1 | {mode, {rate, 1000}}. 2 | 3 | {duration, 5}. 4 | 5 | {concurrent, 2}. 6 | 7 | {driver, opscode_estatsd_driver}. 8 | 9 | {code_paths, ["deps/stats"]}. 10 | 11 | {key_generator, {function, opscode_stats_gen, new_stat, []}}. 12 | 13 | {value_generator, {fixed_bin, 10248}}. 14 | 15 | {operations, [{put, 1}]}. 16 | 17 | {estatsd_host, "127.0.0.1"}. 18 | {estatsd_port, 3344}. 19 | -------------------------------------------------------------------------------- /src/estatsd_app.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_app). 2 | 3 | -behaviour(application). 4 | 5 | %% Application callbacks 6 | -export([start/2, stop/1]). 7 | 8 | %% =================================================================== 9 | %% Application callbacks 10 | %% =================================================================== 11 | 12 | start(_StartType, _StartArgs) -> 13 | estatsd_sup:start_link(). 14 | 15 | stop(_State) -> 16 | ok. 17 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- mode: erlang -*- 2 | %% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*- 3 | %% ex: ts=4 sw=4 ft=erlang et 4 | 5 | {deps, 6 | [ 7 | {rebar_lock_deps_plugin, ".*", 8 | {git, "git://github.com/seth/rebar_lock_deps_plugin.git", "master"}}, 9 | {eper, ".*", 10 | {git, "git://github.com/massemanet/eper.git", {branch, "master"}}}, 11 | {bear, ".*", 12 | {git, "git://github.com/boundary/bear.git", {tag, "0.1.3"}}} 13 | ]}. 14 | 15 | %% Plugin usage 16 | {plugins, [rebar_lock_deps_plugin]}. 17 | 18 | {cover_enabled, true}. 19 | -------------------------------------------------------------------------------- /rel/files/vm.args: -------------------------------------------------------------------------------- 1 | ## Name of the node 2 | -name estatsd@127.0.0.1 3 | 4 | ## Cookie for distributed erlang 5 | -setcookie estatsd 6 | 7 | ## Heartbeat management; auto-restarts VM if it dies or becomes unresponsive 8 | ## (Disabled by default..use with caution!) 9 | ##-heart 10 | 11 | ## Enable kernel poll and a few async threads 12 | +K true 13 | +A 5 14 | 15 | ## Increase number of concurrent ports/sockets 16 | -env ERL_MAX_PORTS 4096 17 | 18 | ## Tweak GC to run more often 19 | -env ERL_FULLSWEEP_AFTER 10 20 | 21 | ## Increase logfile size to 10M 22 | -env RUN_ERL_LOG_MAXSIZE 10000000 23 | 24 | -------------------------------------------------------------------------------- /priv/bb/opscode_estatsd_driver.erl: -------------------------------------------------------------------------------- 1 | -module(opscode_estatsd_driver). 2 | 3 | -export([new/1, 4 | run/4]). 5 | 6 | -record(state, {host, 7 | port, 8 | sock}). 9 | 10 | new(_Id) -> 11 | Host = basho_bench_config:get(estatsd_host), 12 | Port = basho_bench_config:get(estatsd_port), 13 | {ok, Sock} = gen_udp:open(0, [binary, {active, false}]), 14 | {ok, #state{host=Host, port=Port, sock=Sock}}. 15 | 16 | run(_Op, MetricGen, _ValueGen, #state{sock=Sock, host=Host, port=Port}=State) -> 17 | ok = gen_udp:send(Sock, Host, Port, MetricGen()), 18 | {ok, State}. 19 | -------------------------------------------------------------------------------- /rebar.config.lock: -------------------------------------------------------------------------------- 1 | %% THIS FILE IS GENERATED. DO NOT EDIT IT MANUALLY %% 2 | 3 | {deps,[{bear,".*", 4 | {git,"git://github.com/boundary/bear.git", 5 | "3fd09d1b7bbd9de5b2d29f46df04a93fca9ce85e"}}, 6 | {eper,".*", 7 | {git,"git://github.com/massemanet/eper.git", 8 | "9c55d1653de26dc7492aeae986fb4a81d6c84fd9"}}, 9 | {rebar_lock_deps_plugin,".*", 10 | {git,"git://github.com/seth/rebar_lock_deps_plugin.git", 11 | "758d1cf4cc79483e1fbaf2ea59ed9e9f76b4a012"}}]}. 12 | {plugins,[rebar_lock_deps_plugin]}. 13 | {cover_enabled,true}. 14 | 15 | -------------------------------------------------------------------------------- /rel/files/sys.config: -------------------------------------------------------------------------------- 1 | [ 2 | %% SASL config 3 | {sasl, [ 4 | {sasl_error_logger, {file, "log/sasl-error.log"}}, 5 | {errlog_type, error}, 6 | {error_logger_mf_dir, "log/sasl"}, % Log directory 7 | {error_logger_mf_maxbytes, 10485760}, % 10 MB max file size 8 | {error_logger_mf_maxfiles, 5} % 5 files max 9 | ]}, 10 | 11 | {estatsd, [ 12 | {udp_listen_port, {{udp_port}} }, 13 | {udp_recbuf, {{udp_recbuf}} }, 14 | {udp_max_batch_size, {{udp_max_batch_size}} }, 15 | {udp_max_batch_age, {{udp_max_batch_age}} }, 16 | {graphite_port, {{graphite_port}} }, 17 | {graphite_host, "{{graphite_host}}" }, 18 | {flush_interval, {{flush_interval}} } 19 | ]} 20 | ]. 21 | 22 | -------------------------------------------------------------------------------- /rel/reltool.config: -------------------------------------------------------------------------------- 1 | {sys,[{lib_dirs,["apps","../deps"]}, 2 | {rel,"estatsd","1.1.0",[kernel,stdlib,sasl,crypto,inets,estatsd]}, 3 | {rel,"start_clean",[],[kernel,stdlib]}, 4 | {boot_rel,"estatsd"}, 5 | {profile,embedded}, 6 | {excl_sys_filters,["^bin/.*","^erts.*/bin/(dialyzer|typer)"]}, 7 | {excl_archive_filters,[".*"]}, 8 | {app,sasl,[{incl_cond,include}]}, 9 | {app,crypto,[{incl_cond,include}]}, 10 | {app,runtime_tools,[{incl_cond,include}]}, 11 | {app,eper,[{incl_cond,include}]}]}. 12 | {target_dir,"estatsd"}. 13 | {overlay_vars,"vars.config"}. 14 | {overlay,[{mkdir,"log/sasl"}, 15 | {mkdir,"etc/keys"}, 16 | {copy,"files/erl","{{erts_vsn}}/bin/erl"}, 17 | {copy,"files/nodetool","{{erts_vsn}}/bin/nodetool"}, 18 | {copy,"files/estatsd","bin/estatsd"}, 19 | {copy,"files/vm.args","etc/vm.args"}, 20 | {template,"files/sys.config","etc/sys.config"}]}. 21 | 22 | -------------------------------------------------------------------------------- /src/estatsd_sup.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_sup). 2 | 3 | -behaviour(supervisor). 4 | 5 | %% API 6 | -export([start_link/0]). 7 | 8 | %% Supervisor callbacks 9 | -export([init/1]). 10 | 11 | %% =================================================================== 12 | %% API functions 13 | %% =================================================================== 14 | 15 | 16 | start_link() -> 17 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 18 | 19 | %% =================================================================== 20 | %% Supervisor callbacks 21 | %% =================================================================== 22 | 23 | init([]) -> 24 | Children = [ 25 | {estatsd_server, 26 | {estatsd_server, start_link, []}, 27 | permanent, 5000, worker, [estatsd_server]}, 28 | 29 | {estatsd_udp, 30 | {estatsd_udp, start_link, []}, 31 | permanent, 5000, worker, [estatsd_udp]} 32 | ], 33 | {ok, { {one_for_one, 10000, 10}, Children} }. 34 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Richard Jones 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /rel/files/erl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This script replaces the default "erl" in erts-VSN/bin. This is necessary 4 | ## as escript depends on erl and in turn, erl depends on having access to a 5 | ## bootscript (start.boot). Note that this script is ONLY invoked as a side-effect 6 | ## of running escript -- the embedded node bypasses erl and uses erlexec directly 7 | ## (as it should). 8 | ## 9 | ## Note that this script makes the assumption that there is a start_clean.boot 10 | ## file available in $ROOTDIR/release/VSN. 11 | 12 | # Determine the abspath of where this script is executing from. 13 | ERTS_BIN_DIR=$(cd ${0%/*} && pwd) 14 | 15 | # Now determine the root directory -- this script runs from erts-VSN/bin, 16 | # so we simply need to strip off two dirs from the end of the ERTS_BIN_DIR 17 | # path. 18 | ROOTDIR=${ERTS_BIN_DIR%/*/*} 19 | 20 | # Parse out release and erts info 21 | START_ERL=`cat $ROOTDIR/releases/start_erl.data` 22 | ERTS_VSN=${START_ERL% *} 23 | APP_VSN=${START_ERL#* } 24 | 25 | BINDIR=$ROOTDIR/erts-$ERTS_VSN/bin 26 | EMU=beam 27 | PROGNAME=`echo $0 | sed 's/.*\\///'` 28 | CMD="$BINDIR/erlexec" 29 | export EMU 30 | export ROOTDIR 31 | export BINDIR 32 | export PROGNAME 33 | 34 | exec $CMD -boot $ROOTDIR/releases/$APP_VSN/start_clean ${1+"$@"} -------------------------------------------------------------------------------- /priv/bb/README.md: -------------------------------------------------------------------------------- 1 | # Basho Bench for estatsd # 2 | 3 | 1. Install basho_bench: 4 | 5 | git clone https://github.com/basho/basho_bench.git 6 | cd basho_bench 7 | make all 8 | 9 | 2. Copy custom stuff into basho_bench and rebuild: 10 | 11 | cp estatsd/priv/bb/*.erl basho_bench/src/ 12 | cd basho_bench 13 | 14 | Next, edit basho_bench/ebin/basho_bench.app and add the opscode_* 15 | modules you copied over into the module list (basho_bench, y u no 16 | use rebar to manage this list?). With that you should be able to 17 | run `make` again to pick up the customization. 18 | 19 | 3. Run a test 20 | 21 | ./basho_bench ~/oc/estatsd/priv/bb/estatsd.config 22 | 23 | This will take ~5 minutes (depending on configuration settings in 24 | estatsd.config). 25 | 26 | 4. Analyze results, make plot. For this you need a recent version of 27 | R. On OS X, you want to go [here][1] and install the dmg. I 28 | *think* the basho script will install the R packages that it needs 29 | and so once R is installed you should be able to do: 30 | 31 | make results 32 | 33 | Then look in tests/current/ and you should have csv files and the 34 | png file created by R. 35 | 36 | 37 | [1]: http://cran.fhcrc.org/bin/macosx/ 38 | 39 | -------------------------------------------------------------------------------- /src/estatsd.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd). 2 | 3 | -export([ 4 | increment/1, increment/2, increment/3, 5 | decrement/1, decrement/2, decrement/3, 6 | timing/2, 7 | start/0, stop/0 8 | ]). 9 | 10 | -define(SERVER, estatsd_server). 11 | 12 | %% @spec start() -> ok 13 | %% @doc Start the estatsd server. 14 | start() -> 15 | application:start(estatsd). 16 | 17 | %% @spec stop() -> ok 18 | %% @doc Stop the estatsd server. 19 | stop() -> 20 | application:stop(estatsd). 21 | 22 | % Convenience: just give it the now() tuple when the work started 23 | timing(Key, StartTime = {_,_,_}) -> 24 | Dur = erlang:round(timer:now_diff(erlang:now(), StartTime)/1000), 25 | timing(Key,Dur); 26 | 27 | % Log timing information, ms 28 | timing(Key, Duration) when is_integer(Duration) -> 29 | gen_server:cast(?SERVER, {timing, Key, Duration}); 30 | 31 | timing(Key, Duration) -> 32 | gen_server:cast(?SERVER, {timing, Key, erlang:round(Duration)}). 33 | 34 | 35 | % Increments one or more stats counters 36 | increment(Key) -> increment(Key, 1, 1). 37 | increment(Key, Amount) -> increment(Key, Amount, 1). 38 | increment(Key, Amount, Sample) -> 39 | gen_server:cast(?SERVER, {increment, Key, Amount, Sample}). 40 | 41 | decrement(Key) -> decrement(Key, -1, 1). 42 | decrement(Key, Amount) -> decrement(Key, Amount, 1). 43 | decrement(Key, Amount, Sample) -> 44 | increment(Key, 0 - Amount, Sample). 45 | 46 | 47 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | estatsd is a simple stats aggregation service that periodically dumps data to 2 | Graphite: http://graphite.wikidot.com/ 3 | 4 | NB: Graphite is good, despite the website being a bit ghetto. 5 | 6 | Inspired heavily by etsy statsd: 7 | http://codeascraft.etsy.com/2011/02/15/measure-anything-measure-everything/ 8 | 9 | QUICK DEMO 10 | ========== 11 | 12 | 1) Install and configure graphite (quick-ish) 13 | 2) Install rebar, have it in your path 14 | 3) rebar get-deps 15 | 4) rebar compile 16 | 5) erl -pa ebin 17 | 6) > application:start(estatsd). 18 | > estatsd:increment(foo, 123). 19 | 7) Observe graphite now has 1 data point. 20 | 21 | USAGE 22 | ===== 23 | 24 | Add this app to your rebar deps, and make sure it's started somehow 25 | eg: application:start(estatsd). 26 | 27 | You can configure custom graphite host/port and flush interval using 28 | application environment vars. See estatsd_sup for details. 29 | 30 | The following calls to estatsd are all gen_server:cast, ie non-blocking. 31 | 32 | Counters 33 | -------- 34 | 35 | estatsd:increment(num_foos). %% increment num_foos by one 36 | 37 | estatsd:decrement(<<"num_bars">>, 3). %% decrement num_bars by 3 38 | 39 | estatsd:increment("tcp.bytes_in", 512). %% increment tcp.bytes_in by 512 40 | 41 | Timers 42 | ------ 43 | 44 | estatsd:timing(sometask, 1534). %% report that sometask took 1534ms 45 | 46 | Or for your convenience: 47 | 48 | Start = erlang:now(), 49 | do_sometask(), 50 | estatsd:timing(sometast, Start). %% uses now() and now_diff for you 51 | 52 | Sampling 53 | -------- 54 | 55 | Only report 10% of some_frequent_task measurements: 56 | 57 | estatsd:timing(some_frequent_task, 12, 0.1) 58 | 59 | 60 | 61 | NOTES 62 | ===== 63 | 64 | This could be extended to take a callback for reporting mechanisms. 65 | Right now it's hardcoded to stick data into graphite. 66 | 67 | 68 | 69 | Richard Jones 70 | @metabrew 71 | -------------------------------------------------------------------------------- /priv/bb/opscode_stats_gen.erl: -------------------------------------------------------------------------------- 1 | -module(opscode_stats_gen). 2 | 3 | -define(TYPES, [<<"c">>, <<"ms">>, <<"e">>]). 4 | -define(TYPE_COUNT, length(?TYPES)). 5 | 6 | -define(SHP_TYPES, [<<"h">>, <<"m">>, <<"mr">>, <<"g">>]). 7 | -define(SHP_TYPE_COUNT, length(?SHP_TYPES)). 8 | 9 | -export([new_stat/1]). 10 | 11 | new_stat(_) -> 12 | case crypto:rand_uniform(0, 2) > 0 of 13 | true -> 14 | fun() -> shp_stat() end; 15 | false -> 16 | fun() -> legacy_stat() end 17 | end. 18 | 19 | shp_stat() -> 20 | Name = random_shp_name(), 21 | Type = get_shp_metric_type(Name), 22 | Value = new_value(Type), 23 | Body = iolist_to_binary([Name, ":", Value, "|", Type]), 24 | Size = integer_to_list(size(Body) + 1), 25 | iolist_to_binary(["1|", Size, "\n", Body]). 26 | 27 | legacy_stat() -> 28 | Name = random_name(), 29 | Type = get_metric_type(Name), 30 | Value = new_value(Type), 31 | list_to_binary([Name, "|", Value, "|", Type]). 32 | 33 | new_value(Type) when Type =:= <<"d">>; 34 | Type =:= <<"c">>; 35 | Type =:= <<"h">>; 36 | Type =:= <<"mr">> -> 37 | integer_to_list(crypto:rand_uniform(1, 25)); 38 | new_value(Type) when Type =:= <<"ms">>; 39 | Type =:= <<"m">> -> 40 | integer_to_list(crypto:rand_uniform(1, 5000)); 41 | new_value(<<"e">>) -> 42 | integer_to_list(crypto:rand_uniform(1, 500)). 43 | 44 | % random_name/0 and random_shp_name/0 determine how many unique 45 | % metrics will be tracked in the system during the benchmark run. 46 | 47 | random_name() -> 48 | io_lib:format("metric~4..0B", [crypto:rand_uniform(1, 1000)]). 49 | 50 | random_shp_name() -> 51 | io_lib:format("shp_metric~4..0B", [crypto:rand_uniform(1, 1000)]). 52 | 53 | select_type() -> 54 | lists:nth(crypto:rand_uniform(1, ?TYPE_COUNT), ?TYPES). 55 | 56 | select_shp_type() -> 57 | lists:nth(crypto:rand_uniform(1, ?SHP_TYPE_COUNT), ?SHP_TYPES). 58 | 59 | get_metric_type(Name) -> 60 | create_table(opscode_stats), 61 | case ets:lookup(opscode_stats, Name) of 62 | [] -> 63 | Type = select_type(), 64 | ets:insert_new(opscode_stats, {Name, Type}), 65 | Type; 66 | [{Name, Type}] -> 67 | Type 68 | end. 69 | 70 | get_shp_metric_type(Name) -> 71 | create_table(opscode_shp_stats), 72 | case ets:lookup(opscode_shp_stats, Name) of 73 | [] -> 74 | Type = select_shp_type(), 75 | ets:insert_new(opscode_shp_stats, {Name, Type}), 76 | Type; 77 | [{Name, Type}] -> 78 | Type 79 | end. 80 | 81 | create_table(Table) -> 82 | case catch ets:new(Table, [named_table, public]) of 83 | Table -> 84 | ok; 85 | %% Get this if the table is already created 86 | {'EXIT', {badarg, _Stacktrace}} -> 87 | ok 88 | end. 89 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DEPS=$(CURDIR)/deps 2 | 3 | # The release branch should have a file named USE_REBAR_LOCKED 4 | use_locked_config = $(wildcard USE_REBAR_LOCKED) 5 | ifeq ($(use_locked_config),USE_REBAR_LOCKED) 6 | rebar_config = rebar.config.lock 7 | else 8 | rebar_config = rebar.config 9 | endif 10 | REBAR = rebar -C $(rebar_config) 11 | 12 | all: compile test dialyzer 13 | 14 | compile: $(DEPS) 15 | @$(REBAR) compile 16 | 17 | dialyzer: 18 | dialyzer -Wunderspecs -r ebin 19 | 20 | compile_skip: 21 | @$(REBAR) compile skip_deps=true 22 | 23 | test: 24 | @$(REBAR) eunit skip_deps=true 25 | 26 | # For a release-only project, this won't make much sense, but could be 27 | # useful for release projects that have their own code 28 | clean: 29 | @$(REBAR) clean skip_deps=true 30 | 31 | allclean: 32 | @$(REBAR) clean 33 | 34 | update: compile 35 | @cd rel/estatsd;bin/estatsd restart 36 | 37 | distclean: relclean 38 | @rm -rf deps 39 | @$(REBAR) clean 40 | @rm -rf rel/apps rel/rebar.config 41 | 42 | tags: TAGS 43 | 44 | TAGS: 45 | find deps -name "*.[he]rl" -print | etags - 46 | 47 | # Only do munge_apps if we have files in src/ 48 | all_src_files = $(wildcard src/*) 49 | ifeq ($(strip $(all_src_files)),) 50 | munge_apps: 51 | @true 52 | else 53 | munge_apps: 54 | @mkdir -p rel/apps/estatsd 55 | @ln -sf `pwd`/ebin rel/apps/estatsd 56 | @ln -sf `pwd`/priv rel/apps/estatsd 57 | @cp rebar.config rel 58 | @echo '{deps_dir, ["../deps"]}.' >> rel/rebar.config 59 | endif 60 | 61 | generate: munge_apps 62 | @/bin/echo 'building OTP release package for estatsd' 63 | @/bin/echo "using rebar as: $(REBAR)" 64 | @cd rel;$(REBAR) generate 65 | @rm -rf rel/apps rel/rebar.config 66 | 67 | rel: rel/estatsd 68 | 69 | devrel: rel 70 | @/bin/echo -n Symlinking deps and apps into release 71 | @$(foreach dep,$(wildcard deps/*), /bin/echo -n .;rm -rf rel/estatsd/lib/$(shell basename $(dep))-* \ 72 | && ln -sf $(abspath $(dep)) rel/estatsd/lib;) 73 | @rm -rf rel/estatsd/lib/estatsd-*;mkdir -p rel/estatsd/lib/estatsd 74 | @ln -sf `pwd`/ebin rel/estatsd/lib/estatsd 75 | @ln -sf `pwd`/priv rel/estatsd/lib/estatsd 76 | @/bin/echo done. 77 | @/bin/echo Run \'make update\' to pick up changes in a running VM. 78 | 79 | rel/estatsd: compile generate 80 | 81 | relclean: 82 | @rm -rf rel/estatsd 83 | 84 | $(DEPS): 85 | @echo "Fetching deps as: $(REBAR)" 86 | @$(REBAR) get-deps 87 | 88 | prepare_release: distclean unlocked_deps rel lock_deps 89 | @echo 'release prepared, bumping version' 90 | @touch USE_REBAR_LOCKED 91 | @$(REBAR) bump-rel-version 92 | 93 | unlocked_deps: 94 | @echo 'Fetching deps as: rebar -C rebar.config' 95 | @rebar -C rebar.config get-deps 96 | 97 | lock_deps: 98 | @rebar lock-deps skip_deps=true 99 | 100 | .PHONY: distclean prepare_release lock_deps unlocked_deps update clean compile compile_skip allclean tags relclean devrel rel relclean generate munge_apps test 101 | -------------------------------------------------------------------------------- /src/estatsd_shp.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_shp). 2 | 3 | -export([parse_packet/1]). 4 | 5 | -ifdef(TEST). 6 | -compile([export_all]). 7 | -endif. 8 | 9 | -define(SHP_VERSION, 1). 10 | 11 | -include("estatsd.hrl"). 12 | 13 | 14 | % @doc Parse a binary in Stats Hero Protocol Version 1 15 | % 16 | -spec parse_packet(binary()) -> 17 | {bad_version, binary()} 18 | | {bad_length, {[any()] | integer(), binary()}} 19 | | [#shp_metric{}]. 20 | parse_packet(<<"1|", Rest/binary>>) -> 21 | parse_packet(Rest, []); 22 | parse_packet(Packet) when is_binary(Packet) -> 23 | {bad_version, Packet}. 24 | 25 | parse_packet(<<"\n", Rest/binary>>, Acc) -> 26 | Acc1 = lists:reverse(Acc), 27 | Length = try 28 | list_to_integer(Acc1) 29 | catch 30 | error:badarg -> 31 | Acc1 32 | end, 33 | Actual = size(Rest), 34 | case Length =:= Actual of 35 | true -> 36 | parse_body({Length, Rest}); 37 | false -> 38 | {bad_length, {Length, Rest}} 39 | end; 40 | parse_packet(<>, Acc) -> 41 | parse_packet(Rest, [C|Acc]); 42 | parse_packet(<<>>, Acc) -> 43 | {bad_length, {lists:reverse(Acc), <<>>}}. 44 | 45 | -spec parse_body({non_neg_integer(), binary()}) -> 46 | [(#shp_metric{} | {bad_metric, term()})]. 47 | 48 | parse_body({Length, GZBody = <<31, 139, _Rest/binary>>}) -> 49 | Body = zlib:gunzip(GZBody), 50 | parse_body({Length, Body}); 51 | parse_body({_Length, Body}) -> 52 | try 53 | Lines = binary:split(Body, <<"\n">>, [global]), 54 | [ parse_metric(L) || L <- Lines, L =/= <<>> ] 55 | catch 56 | error:Why -> 57 | error_logger:error_report({bad_metric, 58 | {Body, Why, erlang:get_stacktrace()}}), 59 | throw({bad_metric_body, Body}) 60 | 61 | end. 62 | 63 | -spec parse_metric(binary()) -> #shp_metric{}. 64 | 65 | parse_metric(Bin) -> 66 | try 67 | [Key, Value, Type | Rate] = binary:split(Bin, [<<":">>, <<"|">>], 68 | [global]), 69 | #shp_metric{key = Key, value = to_int(Value), type = parse_type(Type), 70 | sample_rate = parse_sample_rate(Rate)} 71 | catch 72 | throw:{bad_metric, Why} -> 73 | {bad_metric, Why}; 74 | error:{badmatch, _} -> 75 | {bad_metric, {parse_error, Bin}} 76 | end. 77 | 78 | -spec parse_type(<<_:8, _:_*8>>) -> 'g' | 'h' | 'm' | 'mr'. 79 | parse_type(<<"m">>) -> 80 | m; 81 | parse_type(<<"h">>) -> 82 | h; 83 | parse_type(<<"mr">>) -> 84 | mr; 85 | parse_type(<<"g">>) -> 86 | g; 87 | parse_type(Unknown) -> 88 | throw({bad_metric, {unknown_type, Unknown}}). 89 | 90 | -spec parse_sample_rate([binary()]) -> float(). 91 | 92 | parse_sample_rate([]) -> 93 | undefined; 94 | parse_sample_rate([<<"@", FloatBin/binary>>]) -> 95 | try 96 | list_to_float(binary_to_list(FloatBin)) 97 | catch 98 | error:badarg -> 99 | throw({bad_metric, {bad_sample_rate, FloatBin}}) 100 | end; 101 | parse_sample_rate(L) -> 102 | throw({bad_metric, {bad_sample_rate, L}}). 103 | 104 | -spec to_int(binary()) -> integer(). 105 | 106 | to_int(Value) when is_binary(Value) -> 107 | try 108 | list_to_integer(binary_to_list(Value)) 109 | catch 110 | error:badarg -> 111 | throw({bad_metric, {bad_value, Value}}) 112 | end. 113 | -------------------------------------------------------------------------------- /test/estatsd_server_tests.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_server_tests). 2 | 3 | -compile([export_all]). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | setup_server() -> 7 | UdpMaxBatchSize = 1, 8 | UdpMaxBatchAge = 1000, 9 | UdpBuf = 524288, 10 | {ok, _} = capture_tcp:start_link(0), 11 | {ok, CapturePort} = capture_tcp:what_port(), 12 | application:set_env(estatsd, flush_interval, 2000), 13 | application:set_env(estatsd, graphite_host, "127.0.0.1"), 14 | application:set_env(estatsd, graphite_port, CapturePort), 15 | application:set_env(estatsd, udp_listen_port, 0), 16 | application:set_env(estatsd, udp_recbuf, UdpBuf), 17 | application:set_env(estatsd, udp_max_batch_size, UdpMaxBatchSize), 18 | application:set_env(estatsd, udp_max_batch_age, UdpMaxBatchAge), 19 | application:start(crypto), 20 | application:start(inets), 21 | application:start(estatsd), 22 | {ok, EstatsdPort} = estatsd_udp:what_port(), 23 | ?debugVal(EstatsdPort), 24 | EstatsdPort. 25 | 26 | cleanup_server() -> 27 | capture_tcp:stop(), 28 | application:stop(estatsd). 29 | 30 | estatsd_sanity_test_() -> 31 | {setup, 32 | fun() -> 33 | setup_server() end, 34 | fun(_) -> 35 | cleanup_server() end, 36 | fun(Port) -> 37 | [{"UDP metrics sent to estatsd are buffered and then sent to graphite", 38 | fun() -> 39 | {ok, S} = gen_udp:open(0), 40 | ok = gen_udp:send(S, "127.0.0.1", Port, <<"mycounter:10|c">>), 41 | ok = gen_udp:send(S, "127.0.0.1", Port, <<"mycounter:10|c">>), 42 | ok = gen_udp:send(S, "127.0.0.1", Port, <<"mycounter:5|d">>), 43 | estatsd_server:force_flush(), 44 | %% sadly, need sleep here because flush is async. The 45 | %% work is done in an unsupervised spawned process. 46 | timer:sleep(200), 47 | {MsgCount, Msgs} = capture_tcp:read(), 48 | %% three UDP messages are sent, but these will be aggregated into 49 | %% a single message sent off to "graphite". 50 | ?assertEqual(1, MsgCount), 51 | Keys = [ K || {K, _, _} <- process_graphite(hd(Msgs)) ], 52 | ?assertEqual([<<"stats.mycounter">>, <<"stats_counts.mycounter">>, <<"statsd.numStats">>], 53 | Keys) 54 | end}] 55 | end}. 56 | 57 | process_graphite(Msg) -> 58 | [ parse_graphite_line(X) || X <- re:split(Msg, "\n"), X =/= <<>> ]. 59 | 60 | parse_graphite_line(Line) -> 61 | [Key, Value, Time] = re:split(Line, " "), 62 | {Key, Value, Time}. 63 | 64 | multi() -> 65 | Port = 3344, 66 | {ok, S} = gen_udp:open(0), 67 | Self = self(), 68 | SendUdp = fun() -> 69 | Msg = <<"multicounter:10|c">>, 70 | ok = gen_udp:send(S, "127.0.0.1", Port, Msg), 71 | Self ! self(), 72 | ok 73 | end, 74 | Pids = [ spawn(SendUdp) || _I <- lists:seq(1, 10) ], 75 | gather_pids(Pids), 76 | %% Metrics = folsom_metrics:get_metrics(), 77 | %% ?assertEqual(lists:usort(Metrics), Metrics), 78 | %% ?debugVal(Metrics), 79 | %% ?assert(lists:member(<<"multicounter">>, Metrics)), 80 | %% MultiCounter = folsom_metrics:get_metric_value(<<"multicounter">>), 81 | %% ?debugVal(MultiCounter), 82 | ok. 83 | 84 | mass(N) -> 85 | Port = 3344, 86 | {ok, S} = gen_udp:open(0), 87 | Self = self(), 88 | SendUdp = fun() -> 89 | Id = integer_to_list(crypto:rand_uniform(1, 300)), 90 | Msg = iolist_to_binary([<<"metric_">>, Id, <<":10|c">>]), 91 | ok = gen_udp:send(S, "127.0.0.1", Port, Msg), 92 | Self ! self(), 93 | ok 94 | end, 95 | Pids = [ spawn(SendUdp) || _I <- lists:seq(1, N) ], 96 | gather_pids(Pids), 97 | %% Metrics = folsom_metrics:get_metrics(), 98 | %% ?assertEqual(lists:usort(Metrics), lists:sort(Metrics)), 99 | %% ?debugVal(Metrics), 100 | ok. 101 | 102 | 103 | gather_pids([Pid|Rest]) -> 104 | receive 105 | Pid -> 106 | gather_pids(Rest) 107 | after 2000 -> 108 | gather_pids(Rest) 109 | end; 110 | gather_pids([]) -> 111 | done. 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /test/capture_tcp.erl: -------------------------------------------------------------------------------- 1 | %% Copyright 2012 Opscode, Inc. All Rights Reserved. 2 | %% 3 | %% This file is provided to you under the Apache License, 4 | %% Version 2.0 (the "License"); you may not use this file 5 | %% except in compliance with the License. You may obtain 6 | %% a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, 11 | %% software distributed under the License is distributed on an 12 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | %% KIND, either express or implied. See the License for the 14 | %% specific language governing permissions and limitations 15 | %% under the License. 16 | %% 17 | 18 | -module(capture_tcp). 19 | 20 | -behaviour(gen_server). 21 | -define(SERVER, ?MODULE). 22 | 23 | -define(to_int(Value), list_to_integer(binary_to_list(Value))). 24 | 25 | -export([peek/0, 26 | read/0, 27 | start_link/1, 28 | stop/0, 29 | what_port/0]). 30 | 31 | %% ------------------------------------------------------------------ 32 | %% gen_server Function Exports 33 | %% ------------------------------------------------------------------ 34 | 35 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, 36 | code_change/3]). 37 | 38 | -record(state, {port :: non_neg_integer(), 39 | socket :: inet:socket(), 40 | msg_count = 0 :: non_neg_integer(), 41 | buffer = [] :: iolist() 42 | }). 43 | 44 | %% ------------------------------------------------------------------ 45 | %% API Function Definitions 46 | %% ------------------------------------------------------------------ 47 | 48 | -spec start_link(non_neg_integer()) -> {ok, pid()} | {error, any()}. 49 | %% @doc Start a UDP capture server listening on `Port'. If `Port' is 50 | %% `0', the system will assign a usable port which you can later 51 | %% discover using {@link capture_udp:what_port/0}. 52 | start_link(Port) -> 53 | gen_server:start_link({local, ?SERVER}, ?MODULE, Port, []). 54 | 55 | stop() -> 56 | gen_server:call(?SERVER, stop). 57 | 58 | -spec what_port() -> {ok, non_neg_integer()}. 59 | %% @doc Return the port this server is listening on. 60 | what_port() -> 61 | gen_server:call(?SERVER, what_port). 62 | 63 | -spec peek() -> {non_neg_integer(), iolist()}. 64 | %% @doc Return the count and collected message iolist for the server. 65 | %% The server state is not modified. 66 | %% @see capture_udp:read/0 67 | peek() -> 68 | gen_server:call(?SERVER, peek). 69 | 70 | -spec read() -> {non_neg_integer(), iolist()}. 71 | %% @doc Return the message count and collected message iolist for the server. 72 | %% Calling this function resets the message buffer and message counter. 73 | %% @see capture_udp:peek/0 74 | read() -> 75 | gen_server:call(?SERVER, read). 76 | 77 | %% ------------------------------------------------------------------ 78 | %% gen_server Function Definitions 79 | %% ------------------------------------------------------------------ 80 | recv_server(LS) -> 81 | case gen_tcp:accept(LS) of 82 | {ok, S} -> 83 | recv_loop(S, []), 84 | recv_server(LS); 85 | Other -> 86 | io:format("accept returned ~w - goodbye!~n", [Other]), 87 | ok 88 | end. 89 | 90 | recv_loop(S, Acc) -> 91 | inet:setopts(S, [{active,once}]), 92 | receive 93 | {tcp, S, Data} -> 94 | recv_loop(S, [Data| Acc]); 95 | {tcp_closed, S} -> 96 | %% io:format("Socket ~w closed [~w]~n", [S, self()]), 97 | %% io:format("Data: ~p~n", [lists:reverse(Acc)]), 98 | gen_server:call(?SERVER, {do_recv, lists:reverse(Acc)}), 99 | ok 100 | end. 101 | 102 | init(Port) -> 103 | {ok, LSocket} = gen_tcp:listen(Port, [binary, {active, true}, {packet, 0}]), 104 | {ok, RealPort} = inet:port(LSocket), 105 | error_logger:info_msg("capture_tcp listening on ~p~n", [RealPort]), 106 | spawn(fun() -> recv_server(LSocket) end), 107 | {ok, #state{port = RealPort, socket = LSocket}}. 108 | 109 | handle_call(peek, _From, #state{msg_count = Count, buffer = Buffer}=State) -> 110 | {reply, {Count, lists:reverse(Buffer)}, State}; 111 | handle_call(read, _From, #state{msg_count = Count, buffer = Buffer}=State) -> 112 | {reply, {Count, lists:reverse(Buffer)}, State#state{msg_count = 0, buffer = []}}; 113 | handle_call(what_port, _From, #state{socket = Sock}=State) -> 114 | {reply, inet:port(Sock), State}; 115 | handle_call(stop, _From, State) -> 116 | {stop, normal, ok, State}; 117 | handle_call({do_recv, Data}, _From, #state{msg_count = Count, buffer = Buffer}=State) -> 118 | {reply, ok, State#state{msg_count = Count + 1, buffer = [Data | Buffer]}}; 119 | handle_call(_Request, _From, State) -> 120 | {noreply, ok, State}. 121 | 122 | handle_cast(_Msg, State) -> 123 | {noreply, State}. 124 | 125 | handle_info(_Msg, State) -> 126 | {noreply, State}. 127 | 128 | terminate(_Reason, _State) -> 129 | ok. 130 | 131 | code_change(_OldVsn, State, _Extra) -> 132 | {ok, State}. 133 | -------------------------------------------------------------------------------- /rel/files/nodetool: -------------------------------------------------------------------------------- 1 | %% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*- 2 | %% ex: ft=erlang ts=4 sw=4 et 3 | %% ------------------------------------------------------------------- 4 | %% 5 | %% nodetool: Helper Script for interacting with live nodes 6 | %% 7 | %% ------------------------------------------------------------------- 8 | 9 | main(Args) -> 10 | ok = start_epmd(), 11 | %% Extract the args 12 | {RestArgs, TargetNode} = process_args(Args, [], undefined), 13 | 14 | %% See if the node is currently running -- if it's not, we'll bail 15 | case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of 16 | {true, pong} -> 17 | ok; 18 | {_, pang} -> 19 | io:format("Node ~p not responding to pings.\n", [TargetNode]), 20 | halt(1) 21 | end, 22 | 23 | case RestArgs of 24 | ["ping"] -> 25 | %% If we got this far, the node already responsed to a ping, so just dump 26 | %% a "pong" 27 | io:format("pong\n"); 28 | ["stop"] -> 29 | io:format("~p\n", [rpc:call(TargetNode, init, stop, [], 60000)]); 30 | ["restart"] -> 31 | io:format("~p\n", [rpc:call(TargetNode, init, restart, [], 60000)]); 32 | ["reboot"] -> 33 | io:format("~p\n", [rpc:call(TargetNode, init, reboot, [], 60000)]); 34 | ["rpc", Module, Function | RpcArgs] -> 35 | case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), 36 | [RpcArgs], 60000) of 37 | ok -> 38 | ok; 39 | {badrpc, Reason} -> 40 | io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), 41 | halt(1); 42 | _ -> 43 | halt(1) 44 | end; 45 | ["rpcterms", Module, Function, ArgsAsString] -> 46 | case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), 47 | consult(ArgsAsString), 60000) of 48 | {badrpc, Reason} -> 49 | io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), 50 | halt(1); 51 | Other -> 52 | io:format("~p\n", [Other]) 53 | end; 54 | Other -> 55 | io:format("Other: ~p\n", [Other]), 56 | io:format("Usage: nodetool {ping|stop|restart|reboot}\n") 57 | end, 58 | net_kernel:stop(). 59 | 60 | process_args([], Acc, TargetNode) -> 61 | {lists:reverse(Acc), TargetNode}; 62 | process_args(["-setcookie", Cookie | Rest], Acc, TargetNode) -> 63 | erlang:set_cookie(node(), list_to_atom(Cookie)), 64 | process_args(Rest, Acc, TargetNode); 65 | process_args(["-name", TargetName | Rest], Acc, _) -> 66 | ThisNode = append_node_suffix(TargetName, "_maint_"), 67 | {ok, _} = net_kernel:start([ThisNode, longnames]), 68 | process_args(Rest, Acc, nodename(TargetName)); 69 | process_args(["-sname", TargetName | Rest], Acc, _) -> 70 | ThisNode = append_node_suffix(TargetName, "_maint_"), 71 | {ok, _} = net_kernel:start([ThisNode, shortnames]), 72 | process_args(Rest, Acc, nodename(TargetName)); 73 | process_args([Arg | Rest], Acc, Opts) -> 74 | process_args(Rest, [Arg | Acc], Opts). 75 | 76 | 77 | start_epmd() -> 78 | [] = os:cmd(epmd_path() ++ " -daemon"), 79 | ok. 80 | 81 | epmd_path() -> 82 | ErtsBinDir = filename:dirname(escript:script_name()), 83 | Name = "epmd", 84 | case os:find_executable(Name, ErtsBinDir) of 85 | false -> 86 | case os:find_executable(Name) of 87 | false -> 88 | io:format("Could not find epmd.~n"), 89 | halt(1); 90 | GlobalEpmd -> 91 | GlobalEpmd 92 | end; 93 | Epmd -> 94 | Epmd 95 | end. 96 | 97 | 98 | nodename(Name) -> 99 | case string:tokens(Name, "@") of 100 | [_Node, _Host] -> 101 | list_to_atom(Name); 102 | [Node] -> 103 | [_, Host] = string:tokens(atom_to_list(node()), "@"), 104 | list_to_atom(lists:concat([Node, "@", Host])) 105 | end. 106 | 107 | append_node_suffix(Name, Suffix) -> 108 | case string:tokens(Name, "@") of 109 | [Node, Host] -> 110 | list_to_atom(lists:concat([Node, Suffix, os:getpid(), "@", Host])); 111 | [Node] -> 112 | list_to_atom(lists:concat([Node, Suffix, os:getpid()])) 113 | end. 114 | 115 | 116 | %% 117 | %% Given a string or binary, parse it into a list of terms, ala file:consult/0 118 | %% 119 | consult(Str) when is_list(Str) -> 120 | consult([], Str, []); 121 | consult(Bin) when is_binary(Bin)-> 122 | consult([], binary_to_list(Bin), []). 123 | 124 | consult(Cont, Str, Acc) -> 125 | case erl_scan:tokens(Cont, Str, 0) of 126 | {done, Result, Remaining} -> 127 | case Result of 128 | {ok, Tokens, _} -> 129 | {ok, Term} = erl_parse:parse_term(Tokens), 130 | consult([], Remaining, [Term | Acc]); 131 | {eof, _Other} -> 132 | lists:reverse(Acc); 133 | {error, Info, _} -> 134 | {error, Info} 135 | end; 136 | {more, Cont1} -> 137 | consult(Cont1, eof, Acc) 138 | end. 139 | -------------------------------------------------------------------------------- /rel/files/estatsd: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # -*- tab-width:4;indent-tabs-mode:nil -*- 3 | # ex: ts=4 sw=4 et 4 | 5 | RUNNER_SCRIPT_DIR=$(cd ${0%/*} && pwd) 6 | 7 | RUNNER_BASE_DIR=${RUNNER_SCRIPT_DIR%/*} 8 | RUNNER_ETC_DIR=$RUNNER_BASE_DIR/etc 9 | RUNNER_LOG_DIR=$RUNNER_BASE_DIR/log 10 | # Note the trailing slash on $PIPE_DIR/ 11 | PIPE_DIR=$RUNNER_BASE_DIR/pipe_dir/ 12 | RUNNER_USER= 13 | 14 | ## Set the erlang log size 15 | export RUN_ERL_LOG_MAXSIZE=10000000 16 | export RUN_ERL_LOG_ALIVE_FORMAT=%d-%b-%Y::%H:%M:%S 17 | 18 | # Make sure this script is running as the appropriate user 19 | if [ ! -z "$RUNNER_USER" ] && [ `whoami` != "$RUNNER_USER" ]; then 20 | exec sudo -u $RUNNER_USER -i $0 $@ 21 | fi 22 | 23 | # Make sure CWD is set to runner base dir 24 | cd $RUNNER_BASE_DIR 25 | 26 | # Make sure log directory exists 27 | mkdir -p $RUNNER_LOG_DIR 28 | 29 | # Extract the target node name from node.args 30 | NAME_ARG=`egrep -e '^-s?name' $RUNNER_ETC_DIR/vm.args` 31 | if [ -z "$NAME_ARG" ]; then 32 | echo "vm.args needs to have either -name or -sname parameter." 33 | exit 1 34 | fi 35 | 36 | # Extract the target cookie 37 | COOKIE_ARG=`grep -e '^-setcookie' $RUNNER_ETC_DIR/vm.args` 38 | if [ -z "$COOKIE_ARG" ]; then 39 | echo "vm.args needs to have a -setcookie parameter." 40 | exit 1 41 | fi 42 | 43 | # Identify the script name 44 | SCRIPT=`basename $0` 45 | 46 | # Parse out release and erts info 47 | START_ERL=`cat $RUNNER_BASE_DIR/releases/start_erl.data` 48 | ERTS_VSN=${START_ERL% *} 49 | APP_VSN=${START_ERL#* } 50 | 51 | # Add ERTS bin dir to our path 52 | ERTS_PATH=$RUNNER_BASE_DIR/erts-$ERTS_VSN/bin 53 | 54 | # Setup command to control the node 55 | NODETOOL="$ERTS_PATH/escript $ERTS_PATH/nodetool $NAME_ARG $COOKIE_ARG" 56 | 57 | # Check the first argument for instructions 58 | case "$1" in 59 | runit) 60 | BOOTFILE=$SCRIPT 61 | # Setup beam-required vars 62 | ROOTDIR=$RUNNER_BASE_DIR 63 | BINDIR=$ROOTDIR/erts-$ERTS_VSN/bin 64 | EMU=beam 65 | PROGNAME=`echo $0 | sed 's/.*\\///'` 66 | CMD="$BINDIR/erlexec -noshell -boot $RUNNER_BASE_DIR/releases/$APP_VSN/$BOOTFILE -embedded -config $RUNNER_ETC_DIR/sys.config -args_file $RUNNER_ETC_DIR/vm.args -- ${1+"$@"}" 67 | export EMU 68 | export ROOTDIR 69 | export BINDIR 70 | export PROGNAME 71 | 72 | # Start the VM 73 | exec $CMD 74 | ;; 75 | start) 76 | # Make sure there is not already a node running 77 | RES=`$NODETOOL ping` 78 | if [ "$RES" = "pong" ]; then 79 | echo "Node is already running!" 80 | exit 1 81 | fi 82 | HEART_COMMAND="$RUNNER_BASE_DIR/bin/$SCRIPT start" 83 | export HEART_COMMAND 84 | mkdir -p $PIPE_DIR 85 | shift # remove $1 86 | $ERTS_PATH/run_erl -daemon $PIPE_DIR $RUNNER_LOG_DIR "exec $RUNNER_BASE_DIR/bin/$SCRIPT console $@" 2>&1 87 | ;; 88 | 89 | stop) 90 | # Wait for the node to completely stop... 91 | case `uname -s` in 92 | Linux|Darwin|FreeBSD|DragonFly|NetBSD|OpenBSD) 93 | # PID COMMAND 94 | PID=`ps ax -o pid= -o command=|\ 95 | grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` 96 | ;; 97 | SunOS) 98 | # PID COMMAND 99 | PID=`ps -ef -o pid= -o args=|\ 100 | grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` 101 | ;; 102 | CYGWIN*) 103 | # UID PID PPID TTY STIME COMMAND 104 | PID=`ps -efW|grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $2}'` 105 | ;; 106 | esac 107 | $NODETOOL stop 108 | while `kill -0 $PID 2>/dev/null`; 109 | do 110 | sleep 1 111 | done 112 | ;; 113 | 114 | restart) 115 | ## Restart the VM without exiting the process 116 | $NODETOOL restart 117 | ;; 118 | 119 | reboot) 120 | ## Restart the VM completely (uses heart to restart it) 121 | $NODETOOL reboot 122 | ;; 123 | 124 | ping) 125 | ## See if the VM is alive 126 | $NODETOOL ping 127 | ;; 128 | 129 | attach) 130 | # Make sure a node IS running 131 | RES=`$NODETOOL ping` 132 | if [ "$RES" != "pong" ]; then 133 | echo "Node is not running!" 134 | exit 1 135 | fi 136 | 137 | shift 138 | $ERTS_PATH/to_erl $PIPE_DIR 139 | ;; 140 | 141 | console|console_clean) 142 | # .boot file typically just $SCRIPT (ie, the app name) 143 | # however, for debugging, sometimes start_clean.boot is useful: 144 | case "$1" in 145 | console) BOOTFILE=$SCRIPT ;; 146 | console_clean) BOOTFILE=start_clean ;; 147 | esac 148 | # Setup beam-required vars 149 | ROOTDIR=$RUNNER_BASE_DIR 150 | BINDIR=$ROOTDIR/erts-$ERTS_VSN/bin 151 | EMU=beam 152 | PROGNAME=`echo $0 | sed 's/.*\\///'` 153 | CMD="$BINDIR/erlexec -boot $RUNNER_BASE_DIR/releases/$APP_VSN/$BOOTFILE -embedded -config $RUNNER_ETC_DIR/sys.config -args_file $RUNNER_ETC_DIR/vm.args -- ${1+"$@"}" 154 | export EMU 155 | export ROOTDIR 156 | export BINDIR 157 | export PROGNAME 158 | 159 | # Dump environment info for logging purposes 160 | echo "Exec: $CMD" 161 | echo "Root: $ROOTDIR" 162 | 163 | # Log the startup 164 | logger -t "$SCRIPT[$$]" "Starting up" 165 | 166 | # Start the VM 167 | exec $CMD 168 | ;; 169 | 170 | *) 171 | echo "Usage: $SCRIPT {start|stop|restart|reboot|ping|console|console_clean|attach}" 172 | exit 1 173 | ;; 174 | esac 175 | 176 | exit 0 177 | -------------------------------------------------------------------------------- /src/estatsd_udp.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_udp). 2 | -behaviour(gen_server). 3 | -define(SERVER, ?MODULE). 4 | 5 | -define(to_int(Value), list_to_integer(binary_to_list(Value))). 6 | 7 | -include("estatsd.hrl"). 8 | 9 | %% ------------------------------------------------------------------ 10 | %% API Function Exports 11 | %% ------------------------------------------------------------------ 12 | 13 | -export([ 14 | start_link/0, 15 | what_port/0 16 | ]). 17 | 18 | %% ------------------------------------------------------------------ 19 | %% gen_server Function Exports 20 | %% ------------------------------------------------------------------ 21 | 22 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, 23 | code_change/3]). 24 | 25 | %% ------------------------------------------------------------------ 26 | %% API Function Definitions 27 | %% ------------------------------------------------------------------ 28 | 29 | start_link() -> 30 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 31 | 32 | what_port() -> 33 | gen_server:call(?MODULE, what_port). 34 | 35 | %% ------------------------------------------------------------------ 36 | %% gen_server Function Definitions 37 | %% ------------------------------------------------------------------ 38 | -record(state, {port :: non_neg_integer(), 39 | socket :: inet:socket(), 40 | batch = [] :: [binary()], 41 | batch_max :: non_neg_integer(), 42 | batch_max_age :: non_neg_integer() 43 | }). 44 | 45 | init([]) -> 46 | {ok, Port} = application:get_env(estatsd, udp_listen_port), 47 | {ok, RecBuf} = application:get_env(estatsd, udp_recbuf), 48 | {ok, BatchMax} = application:get_env(estatsd, udp_max_batch_size), 49 | {ok, BatchAge} = application:get_env(estatsd, udp_max_batch_age), 50 | {ok, Socket} = gen_udp:open(Port, [binary, {active, once}, 51 | {recbuf, RecBuf}]), 52 | {ok, RealPort} = inet:port(Socket), 53 | error_logger:info_msg("estatsd will listen on UDP ~p with recbuf ~p~n", 54 | [RealPort, RecBuf]), 55 | error_logger:info_msg("batch size ~p with max age of ~pms~n", 56 | [BatchMax, BatchAge]), 57 | {ok, #state{port = RealPort, socket = Socket, 58 | batch = [], 59 | batch_max = BatchMax, 60 | batch_max_age = BatchAge}}. 61 | 62 | handle_call(what_port, _From, #state{port = Port} = State) -> 63 | {reply, {ok, Port}, State}; 64 | handle_call(_Request, _From, State) -> 65 | {noreply, ok, State}. 66 | 67 | handle_cast(_Msg, State) -> 68 | {noreply, State}. 69 | 70 | 71 | handle_info({udp, Socket, _Host, _Port, Bin}, 72 | #state{batch=Batch, batch_max=Max}=State) when length(Batch) == Max -> 73 | error_logger:info_msg("spawn batch ~p FULL~n", [Max]), 74 | start_batch_worker(Batch), 75 | inet:setopts(Socket, [{active, once}]), 76 | {noreply, State#state{batch=[Bin]}}; 77 | handle_info({udp, Socket, _Host, _Port, Bin}, #state{batch=Batch, 78 | batch_max_age=MaxAge}=State) -> 79 | inet:setopts(Socket, [{active, once}]), 80 | {noreply, State#state{batch=[Bin|Batch]}, MaxAge}; 81 | handle_info(timeout, #state{batch=Batch}=State) -> 82 | error_logger:info_msg("spawn batch ~p TIMEOUT~n", [length(Batch)]), 83 | start_batch_worker(Batch), 84 | {noreply, State#state{batch=[]}}; 85 | handle_info(_Msg, State) -> 86 | {noreply, State}. 87 | 88 | terminate(_Reason, _State) -> 89 | ok. 90 | 91 | code_change(_OldVsn, State, _Extra) -> 92 | {ok, State}. 93 | 94 | %% ------------------------------------------------------------------ 95 | %% Internal Function Definitions 96 | %% ------------------------------------------------------------------ 97 | start_batch_worker(Batch) -> 98 | %% Make sure we process messages in the order received 99 | proc_lib:spawn(fun() -> handle_messages(lists:reverse(Batch)) end). 100 | 101 | handle_messages(Batch) -> 102 | [ handle_message(M) || M <- Batch ], 103 | ok. 104 | 105 | is_legacy_message(<<"1|", _Rest/binary>>) -> 106 | false; 107 | is_legacy_message(_Bin) -> 108 | true. 109 | 110 | handle_message(Bin) -> 111 | case is_legacy_message(Bin) of 112 | true -> handle_legacy_message(Bin); 113 | false -> handle_shp_message(Bin) 114 | end. 115 | 116 | handle_shp_message(Bin) -> 117 | [ send_metric(erlang:atom_to_binary(Type, utf8), Key, Value) 118 | || #shp_metric{key = Key, value = Value, 119 | type = Type} <- estatsd_shp:parse_packet(Bin) ]. 120 | 121 | handle_legacy_message(Bin) -> 122 | try 123 | Lines = binary:split(Bin, <<"\n">>, [global]), 124 | [ parse_line(L) || L <- Lines ] 125 | catch 126 | error:Why -> 127 | error_logger:error_report({error, "handle_message failed", 128 | Bin, Why, erlang:get_stacktrace()}) 129 | end. 130 | 131 | parse_line(<<>>) -> 132 | skip; 133 | parse_line(Bin) -> 134 | [Key, Value, Type] = binary:split(Bin, [<<":">>, <<"|">>], [global]), 135 | send_metric(Type, Key, Value). 136 | 137 | send_metric(Type, Key, Value) -> 138 | send_estatsd_metric(Type, Key, Value). 139 | 140 | send_estatsd_metric(Type, Key, Value) 141 | when Type =:= <<"ms">> orelse Type =:= <<"h">> -> 142 | estatsd:timing(Key, convert_value(Type, Value)); 143 | send_estatsd_metric(Type, Key, Value) 144 | when Type =:= <<"c">> orelse Type =:= <<"m">> -> 145 | estatsd:increment(Key, convert_value(Type, Value)); 146 | send_estatsd_metric(_Type, _Key, _Value) -> 147 | % if it isn't one of the above types, we ignore the request. 148 | ignored. 149 | 150 | convert_value(<<"e">>, Value) -> 151 | Value; 152 | convert_value(_Type, Value) when is_binary(Value) -> 153 | ?to_int(Value); 154 | convert_value(_Type, Value) when is_integer(Value) -> 155 | Value. 156 | -------------------------------------------------------------------------------- /test/estatsd_shp_tests.erl: -------------------------------------------------------------------------------- 1 | -module(estatsd_shp_tests). 2 | 3 | -include_lib("eunit/include/eunit.hrl"). 4 | -include("../src/estatsd.hrl"). 5 | 6 | -define(SHP_VERSION, 1). 7 | 8 | estatsd_shp_test_() -> 9 | {foreach, 10 | fun() -> 11 | setup 12 | end, 13 | fun(_X) -> 14 | cleanup 15 | end, 16 | [ 17 | {"parse_packet valid packet", 18 | fun() -> 19 | 20 | Packet = <<"1|42\ndeploys.OpscodeAccount.application:1000|h\n">>, 21 | ?assertEqual([#shp_metric{key = <<"deploys.OpscodeAccount.application">>, 22 | value = 1000, 23 | type = h, 24 | sample_rate = undefined}], 25 | estatsd_shp:parse_packet(Packet)) 26 | end 27 | }, 28 | 29 | {"parse_packet packet no trailing LF", 30 | fun() -> 31 | 32 | Packet = <<"1|41\ndeploys.OpscodeAccount.application:1000|h">>, 33 | ?assertEqual([#shp_metric{key = <<"deploys.OpscodeAccount.application">>, 34 | value = 1000, 35 | type = h, 36 | sample_rate = undefined}], 37 | estatsd_shp:parse_packet(Packet)) 38 | end 39 | }, 40 | 41 | {"parse_packet multiple metrics", 42 | fun() -> 43 | NumMetrics = 20, 44 | IO = lists:map(fun(I) -> 45 | C = integer_to_list(I), 46 | ["metric-", C, ":", C, "|h\n"] 47 | end, lists:seq(1, NumMetrics)), 48 | Body = iolist_to_binary(IO), 49 | Size = integer_to_list(size(Body)), 50 | Packet = iolist_to_binary(["1|", Size, "\n", Body]), 51 | Metrics = estatsd_shp:parse_packet(Packet), 52 | Expect = lists:map(fun(I) -> 53 | C = integer_to_list(I), 54 | #shp_metric{key = iolist_to_binary(["metric-", C]), 55 | value = I, 56 | type = h, 57 | sample_rate = undefined} 58 | end, lists:seq(1, NumMetrics)), 59 | ?assertEqual(Expect, Metrics) 60 | end 61 | }, 62 | 63 | {"parse_packet bad version", 64 | fun() -> 65 | BadVersions = [<<"2|12\na_label:1|m">>, 66 | <<"212\na_label:1|m">>, 67 | <<"x|12\na_label:1|m">>, 68 | <<>>], 69 | [ ?assertEqual({bad_version, P}, estatsd_shp:parse_packet(P)) 70 | || P <- BadVersions ] 71 | end 72 | }, 73 | 74 | {"parse_packet content length mismatch", 75 | generator, 76 | fun() -> 77 | BadLength = [{<<"1|11\na_label:1|m\n">>, 78 | {11, <<"a_label:1|m\n">>}}, 79 | 80 | {<<"1|12\nx:1|m\n">>, 81 | {12, <<"x:1|m\n">>}} 82 | ], 83 | [ ?_assertEqual({bad_length, {L, R}}, estatsd_shp:parse_packet(P)) 84 | || {P, {L, R}} <- BadLength ] 85 | end 86 | }, 87 | 88 | {"parse_packet invalid content length", 89 | generator, 90 | fun() -> 91 | Packets = [{<<"1|1.0\nlabel:1|m">>, 92 | {"1.0", <<"label:1|m">>}}, 93 | 94 | {<<"1|abc\nlabel:1|m">>, 95 | {"abc", <<"label:1|m">>}}, 96 | 97 | {<<"1|label:1|m">>, 98 | {"label:1|m", <<>>}} 99 | 100 | ], 101 | 102 | [ ?_assertEqual({bad_length, {L, R}}, estatsd_shp:parse_packet(P)) 103 | || {P, {L, R}} <- Packets ] 104 | end 105 | }, 106 | 107 | {"parse_metric valid metric tests", 108 | generator, 109 | fun() -> 110 | Tests = [{<<"label:1|m">>, 111 | #shp_metric{key = <<"label">>, value = 1, type = 'm'}}, 112 | 113 | {<<"label:123|h">>, 114 | #shp_metric{key = <<"label">>, value = 123, 115 | type = 'h'}}, 116 | 117 | {<<"x:-123|g">>, 118 | #shp_metric{key = <<"x">>, value = -123, type = 'g'}}, 119 | 120 | 121 | {<<"x:123|h">>, 122 | #shp_metric{key = <<"x">>, value = 123, type = 'h'}}, 123 | 124 | % sample rate 125 | {<<"x:123|h|@0.43">>, 126 | #shp_metric{key = <<"x">>, value = 123, type = 'h', 127 | sample_rate = 0.43}} 128 | ], 129 | [ ?_assertEqual(Expect, estatsd_shp:parse_metric(In)) || {In, Expect} <- Tests ] 130 | end 131 | }, 132 | 133 | {"gzip compressed body", 134 | fun() -> 135 | Body = <<"a_label:1|m\n">>, 136 | GZBody = zlib:gzip(Body), 137 | % we add one for the '\n'. Not sure the \n should be 138 | % included in the length. 139 | BodySize = integer_to_list(size(GZBody)), 140 | Packet = iolist_to_binary(["1|", BodySize, "\n", 141 | GZBody]), 142 | ?assertEqual([#shp_metric{key = <<"a_label">>, 143 | value = 1, 144 | type = m, 145 | sample_rate = undefined}], 146 | estatsd_shp:parse_packet(Packet)) 147 | end 148 | 149 | }, 150 | 151 | {"parse_metric bad metrics", 152 | generator, 153 | fun() -> 154 | Tests = [ 155 | % bad type 156 | {<<"x:1|q">>, {bad_metric, {unknown_type, <<"q">>}}}, 157 | % bad value 158 | {<<"x:1.0|m">>, {bad_metric, {bad_value, <<"1.0">>}}}, 159 | % bad parse 160 | {<<"x:10m">>, {bad_metric, {parse_error, <<"x:10m">>}}}, 161 | {<<"x:1|m|a|b">>, {bad_metric, 162 | {bad_sample_rate, [<<"a">>, <<"b">>]}}} 163 | ], 164 | [ ?_assertEqual(Expect, estatsd_shp:parse_metric(Line)) || 165 | {Line, Expect} <- Tests ] 166 | end 167 | }, 168 | 169 | {"parse_metric bad sample rate", 170 | generator, 171 | fun() -> 172 | EatAt = fun([$@|S]) -> 173 | list_to_binary(S) 174 | end, 175 | SampleRates = ["@0.a", "@01", "@5", "@0.1x"], 176 | Expects = [ {bad_metric, {bad_sample_rate, EatAt(S)}} 177 | || S <- SampleRates ], 178 | Tests = [ {iolist_to_binary([<<"x:1|m|">>, S]), E} 179 | || {S, E} <- lists:zip(SampleRates, Expects) ], 180 | [ ?_assertEqual(Expect, estatsd_shp:parse_metric(Line)) || 181 | {Line, Expect} <- Tests ] 182 | end 183 | } 184 | 185 | ]}. 186 | -------------------------------------------------------------------------------- /src/estatsd_server.erl: -------------------------------------------------------------------------------- 1 | %% Stats aggregation process that periodically dumps data to graphite 2 | %% Will calculate 90th percentile etc. 3 | %% Inspired by etsy statsd: 4 | %% http://codeascraft.etsy.com/2011/02/15/measure-anything-measure-everything/ 5 | %% 6 | %% This could be extended to take a callback for reporting mechanisms. 7 | %% Right now it's hardcoded to stick data into graphite. 8 | %% 9 | %% Richard Jones 10 | %% 11 | -module(estatsd_server). 12 | -behaviour(gen_server). 13 | 14 | -export([start_link/0]). 15 | 16 | %-export([key2str/1,flush/0]). %% export for debugging 17 | 18 | -export([ 19 | code_change/3, 20 | force_flush/0, 21 | handle_call/3, 22 | handle_cast/2, 23 | handle_info/2, 24 | init/1, 25 | terminate/2 26 | ]). 27 | 28 | -record(state, {timers, % gb_tree of timer data 29 | flush_interval, % ms interval between stats flushing 30 | flush_timer, % TRef of interval timer 31 | graphite_host, % graphite server host 32 | graphite_port % graphite server port 33 | }). 34 | 35 | start_link() -> 36 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 37 | 38 | %% @doc Only intended for testing and debugging use 39 | force_flush() -> 40 | gen_server:call(?MODULE, flush). 41 | 42 | %% 43 | 44 | init([]) -> 45 | {ok, FlushIntervalMs} = application:get_env(estatsd, flush_interval), 46 | {ok, GraphiteHost} = application:get_env(estatsd, graphite_host), 47 | {ok, GraphitePort} = application:get_env(estatsd, graphite_port), 48 | error_logger:info_msg("estatsd will flush stats to ~p:~w every ~wms\n", 49 | [ GraphiteHost, GraphitePort, FlushIntervalMs ]), 50 | ets:new(statsd, [named_table, set]), 51 | %% Flush out stats to graphite periodically 52 | {ok, Tref} = timer:apply_interval(FlushIntervalMs, gen_server, call, 53 | [?MODULE, flush]), 54 | State = #state{ timers = gb_trees:empty(), 55 | flush_interval = FlushIntervalMs, 56 | flush_timer = Tref, 57 | graphite_host = GraphiteHost, 58 | graphite_port = GraphitePort 59 | }, 60 | {ok, State}. 61 | 62 | handle_cast({increment, Key, Delta0, Sample}, State) when Sample >= 0, Sample =< 1 -> 63 | Delta = Delta0 * ( 1 / Sample ), %% account for sample rates < 1.0 64 | case ets:lookup(statsd, Key) of 65 | [] -> 66 | ets:insert(statsd, {Key, {Delta,1}}); 67 | [{Key,{Tot,Times}}] -> 68 | ets:insert(statsd, {Key,{Tot+Delta, Times+1}}), 69 | ok 70 | end, 71 | {noreply, State}; 72 | 73 | handle_cast({timing, Key, Duration}, State) -> 74 | case gb_trees:lookup(Key, State#state.timers) of 75 | none -> 76 | {noreply, State#state{timers = gb_trees:insert(Key, [Duration], State#state.timers)}}; 77 | {value, Val} -> 78 | {noreply, State#state{timers = gb_trees:update(Key, [Duration|Val], State#state.timers)}} 79 | end. 80 | 81 | handle_call(flush, _From, State) -> 82 | All = ets:tab2list(statsd), 83 | spawn( fun() -> do_report(All, State) end ), 84 | %% WIPE ALL 85 | ets:delete_all_objects(statsd), 86 | NewState = State#state{timers = gb_trees:empty()}, 87 | {reply, ok, NewState}; 88 | handle_call(_,_,State) -> 89 | {reply, ok, State}. 90 | 91 | handle_info(_Msg, State) -> {noreply, State}. 92 | 93 | code_change(_, _, State) -> {ok, State}. 94 | 95 | terminate(_, _) -> ok. 96 | 97 | %% INTERNAL STUFF 98 | 99 | send_to_graphite(Msg, State) -> 100 | error_logger:info_msg("sending data to graphite~n"), 101 | % io:format("SENDING: ~s\n", [Msg]), 102 | case gen_tcp:connect(State#state.graphite_host, 103 | State#state.graphite_port, 104 | [list, {packet, 0}]) of 105 | {ok, Sock} -> 106 | gen_tcp:send(Sock, Msg), 107 | gen_tcp:close(Sock), 108 | ok; 109 | E -> 110 | %error_logger:error_msg("Failed to connect to graphite: ~p", [E]), 111 | E 112 | end. 113 | 114 | % this string munging is damn ugly compared to javascript :( 115 | key2str(K) when is_atom(K) -> 116 | atom_to_list(K); 117 | key2str(K) when is_binary(K) -> 118 | key2str(binary_to_list(K)); 119 | key2str(K) when is_list(K) -> 120 | {ok, R1} = re:compile("\\s+"), 121 | {ok, R2} = re:compile("/"), 122 | {ok, R3} = re:compile("[^a-zA-Z_\\-0-9\\.]"), 123 | Opts = [global, {return, list}], 124 | S1 = re:replace(K, R1, "_", Opts), 125 | S2 = re:replace(S1, R2, "-", Opts), 126 | S3 = re:replace(S2, R3, "", Opts), 127 | S3. 128 | 129 | num2str(NN) -> lists:flatten(io_lib:format("~w",[NN])). 130 | 131 | unixtime() -> {Meg,S,_Mic} = erlang:now(), Meg*1000000 + S. 132 | 133 | %% Aggregate the stats and generate a report to send to graphite 134 | do_report(All, State) -> 135 | % One time stamp string used in all stats lines: 136 | TsStr = num2str(unixtime()), 137 | {MsgCounters, NumCounters} = do_report_counters(All, TsStr, State), 138 | {MsgTimers, NumTimers} = do_report_timers(TsStr, State), 139 | %% REPORT TO GRAPHITE 140 | case NumTimers + NumCounters of 141 | 0 -> nothing_to_report; 142 | NumStats -> 143 | FinalMsg = [ MsgCounters, 144 | MsgTimers, 145 | %% Also graph the number of graphs we're graphing: 146 | "statsd.numStats ", num2str(NumStats), " ", TsStr, "\n" 147 | ], 148 | send_to_graphite(FinalMsg, State) 149 | end. 150 | 151 | do_report_counters(All, TsStr, State) -> 152 | Msg = lists:foldl( 153 | fun({Key, {Val0,NumVals}}, Acc) -> 154 | KeyS = key2str(Key), 155 | Val = Val0 / (State#state.flush_interval/1000), 156 | %% Build stats string for graphite 157 | Fragment = [ "stats.", KeyS, " ", 158 | io_lib:format("~w", [Val]), " ", 159 | TsStr, "\n", 160 | 161 | "stats_counts.", KeyS, " ", 162 | io_lib:format("~w",[NumVals]), " ", 163 | TsStr, "\n" 164 | ], 165 | [ Fragment | Acc ] 166 | end, [], All), 167 | {Msg, length(All)}. 168 | 169 | do_report_timers(TsStr, State) -> 170 | Timings = gb_trees:to_list(State#state.timers), 171 | Msg = lists:foldl( 172 | fun({Key, Values}, Acc) -> 173 | %% Note that if there are fewer than 5 values, all stats will be zero 174 | %% https://github.com/boundary/bear/blob/master/src/bear.erl#L37 175 | Stats = bear:get_statistics(Values), 176 | 177 | %% Build stats string for graphite 178 | KeyS = key2str(Key), 179 | Startl = [ "stats.timers.", KeyS, "." ], 180 | Endl = [" ", TsStr, "\n"], 181 | Fragment = [ [Startl, Name, " ", num2str(Val), Endl] || 182 | {Name,Val} <- reported_metrics(Stats) 183 | ], 184 | [ Fragment | Acc ] 185 | end, [], Timings), 186 | {Msg, length(Msg)}. 187 | 188 | 189 | %% @doc Extract all the statistics we care about from a bear-computed 190 | %% set of stats. Generates a list of label/value pairs. 191 | %% 192 | %% See bear:get_statistics/1. 193 | reported_metrics(Stats) -> 194 | %% Standard stuff here 195 | BaseMetrics = [{"mean", proplists:get_value(arithmetic_mean, Stats)}, 196 | {"median", proplists:get_value(median, Stats)}, 197 | {"upper", proplists:get_value(max, Stats)}, 198 | {"lower", proplists:get_value(min, Stats)}, 199 | {"count", proplists:get_value(n, Stats)}], 200 | 201 | %% These need to be percentiles that bear computes already. 202 | %% https://github.com/boundary/bear/blob/master/src/bear.erl 203 | %% 204 | %% Currently, this is 50, 75, 90, 95, 99, and 999 205 | PercentilesToReport = [90, 95, 99], 206 | 207 | %% Extract all the percentiles, creating appropriate metric names. 208 | %% 90th percentile label => "upper_90", 95th percentile => 209 | %% "upper_95", etc. 210 | Percentiles = [{"upper_"++num2str(Percentile), 211 | percentile(Percentile, Stats)} || Percentile <- PercentilesToReport], 212 | 213 | BaseMetrics ++ Percentiles. 214 | 215 | %% @doc Helper function to extract a percentile measurement from a 216 | %% bear-generated proplist of statistics. 217 | percentile(Percentile, Stats) -> 218 | Percentiles = proplists:get_value(percentile, Stats), 219 | proplists:get_value(Percentile, Percentiles). 220 | --------------------------------------------------------------------------------